# Script for Class 1 # Note that SS data is at www.stat.pitt.edu/stoffer/tsa2 # set working directory for convenience to place on your system setwd("~/courses/stat910/") ### J&J Quarterly Earnings per Share ### # read SS data on J&J quarterly earnings from SS web site jj.raw <- scan("http://www.stat.pitt.edu/stoffer/tsa2/data/jj.dat") plot(jj.raw) jj.ts <- ts(jj.raw, start=c(1960,1), frequency = 4) # time labels, but come at cost plot(jj.ts) # log scale on y axis plot(jj.ts, log="y") # Want to fit a regression on time, but regression complicates ts objects # Simply work with regular R series jj <- jj.raw time <- 1960+(0:83)/4 quarter <- as.factor(rep(1:4,21)) # quarter labels plot(time,log(jj), main="Log of J&J Quarterly Earnings Per Share") # Fit a regression on time, residuals have patterns regr <- lm(log(jj) ~ time); summary(regr) abline(regr, col="red") # add to plot plot(time, residuals(regr), col=1:4) # color is nice for this # Revise regression by adding quarterly indicators regr.2 <- lm(log(jj) ~ time + quarter); summary(regr.2) plot(time, residuals(regr.2), col=1:4) # color is nice for this regr.3 <- lm(log(jj) ~ time + quarter + quarter*time); summary(regr.3) plot(time, residuals(regr.3), col=1:4) # color is nice for this # Q: What's happening? Certainly not random pattern left in residuals. # Q: What's the presence of a pattern here say about the regressions? ### Global temperature deviations ### temp <- scan("http://stat.wharton.upenn.edu/~stine/stat910/data/nasa_global.dat") time <- 1880 + (0:(length(temp)-1))/12 plot(time,temp, type="l", main="NASA Global Temperature Deviation", xlab="Year", ylab="1/100 Degree Deviation") # plot deviations of deviations smth <- loess(temp ~ time) lines(time,smth$fitted, col="red") smth.4 <- loess(temp ~ time, span=0.4) lines(time,smth.4$fitted, col="blue") plot(time, smth.4$residuals) ### Speech Signal ### # Much longer, with quite different and regular repeating pattern speech <- scan("http://www.stat.pitt.edu/stoffer/tsa2/data/speech.dat") length(speech) plot(speech, type="l") # look at the autocorrelations acf(speech) # look at the periodogram pg <- spec.pgram(speech, demean=TRUE) s <- spec.pgram(speech, taper=0.5, demean=TRUE, plot=FALSE, spans=c(25,13)) lines(s$freq, s$spec, col="red") # periodogram of periodogram! dev <- log(pg$spec)-log(s$spec) plot(dev,type="l") spec.pgram(dev, taper=0.5, demean=TRUE, plot=TRUE, spans=c(13,5)) ### Southern Oscillation and Fish ### # Arranged as two series, with 453 months in each soi <- scan("http://www.stat.pitt.edu/stoffer/tsa2/data/soi.dat") fish <- scan("http://www.stat.pitt.edu/stoffer/tsa2/data/recruit.dat") par(mfrow=c(2,1)) plot(soi, main="Southern Oscillation", type="l") plot(fish, main="Fish Recruitment", type="l") par(mfrow=c(1,1)) # no contemporaneous correlation plot(soi,fish) # dependence at other lags? plot(soi[-453], fish[-1]) plot(soi[-(453-(0:3))], fish[-(1:4)]) plot(soi[-(453-(0:7))], fish[-(1:8)]) # this function computes the correlations at varying lags: which is the leading indicator? ccf(soi,fish) ### Brain Imaging ### mri <- read.table("http://www.stat.pitt.edu/stoffer/tsa2/data/fmri.dat") time <- 2*(1:128) # these are very much in phase plot(time,mri[,2],type="l",col="red") lines(time,mri[,3],type="l",col="blue") lines(time,mri[,4],type="l",col="green") lines(time,mri[,5],type="l",col="black") lines(time,mri[,6],type="l",col="orange") # these less so plot(time,mri[,7],type="l",col="red") lines(time,mri[,8],type="l",col="blue") lines(time,mri[,9],type="l",col="green") ### Earthquake or Explosion ### # Arranged as 2048 of earthquake, then explosion (sampled at 40/second) boom <- matrix(scan("http://www.stat.pitt.edu/stoffer/tsa2/data/eq5exp6.dat"), ncol=2) length(boom[,1]) par(mfrow=c(2,1)) plot(boom[,1], main="Earthquake", type="l") plot(boom[,2], main="Explosion", type="l") par(mfrow=c(1,1)) ### Random Walk ### # can be very hard to distinguish this sort of noise from results of a model # since you only have one realization, one sample path n <- 100; x <- 1:n; rw <- cumsum(rnorm(n)); plot(rw) regr <- lm(rw ~ x) summary(regr) # summary adds significance coefficients(regr) coefficients(summary(regr)) # do a little simulation of the t-statistic of the slope nreps <- 100; tstats <- rep(0,nreps) for(rep in 1:nreps) { rw <- cumsum(rnorm(n)) regr <- lm(rw ~ x) tstats[rep] <- coefficients(summary(regr))[2,3] } hist(tstats) # count how many |t| are larger than two; how many should be? sum(abs(tstats)>2) ### Two different types of processes... both generated by the filter command: ### Linear Process ### xt <- filter(rnorm(100), filter=c(.2,.4,.4,.4,.2), method="convolution"); plot(xt) ### Autoregression ### xt <- filter(rnorm(100), filter=c(.2,.5,.2), method="recursive"); plot(xt) # How would you distinguish among these?