### Assignment #3 R code ############################################################## # 3.9, S&S ############################################################## cmort <- scan("http://www.stat.pitt.edu/stoffer/tsa2/data/cmort.dat") time <- 1:508 # weekly smoothed averages during 1970s # --- sequence plot plot(time,cmort, type="o") # --- correlations; partial correlations par(mfrow=c(2,1)) acf(cmort, lag.max=60); # long to see annual @ 52 weeks pacf(cmort, lag.max=60); par(mfrow=c(1,1)) # --- fit suggested AR(2) using ols ar2 <- ar.ols(cmort, aic=FALSE, order.max=2, demean=FALSE, intercept=TRUE) names(ar2) ar2; ar2$asy.se.coef; ar2$var.pred # 1 2 # 0.4286 0.4418 # 0.0398 0.0398 # --- prediction intervals, using AR(2) pred <- predict(ar2, cmort, n.ahead=20, se.fit=TRUE) # [1] 87.59986 86.76349 87.33714 87.21350 # [1] 5.68485 6.18497 7.13423 7.59335 # --- draw the predictions with interval at end of data n<-508 show <- 450:n plot(time[show],cmort[show], xlim=c(show[1],520)) lines(time[show],cmort[show]-ar2$resid[show], col="red") nf <- length(pred$pred); lines(n+1:nf, pred$pred, lty=2, col="red") lines(n+1:nf, pred$pred+1.96*pred$se, col="red") lines(n+1:nf, pred$pred-1.96*pred$se, col="red") # --- residual correlations show problems ar2 <- arima(cmort, order=c(2,0,0), include.mean=TRUE, method="ML"); print(ar2) # ar1 ar2 intercept # 0.4301 0.4424 88.8416 # s.e. 0.0397 0.0398 1.9415 tsdiag(ar2, gof.lag=70) ############################################################## # 3.31, S&S ############################################################## # --- use read.table since the data are more than one column Temp <- read.table("http://www.stat.pitt.edu/stoffer/tsa2/data/globtemp2.dat") # --- means are deviations from "normal" colnames(Temp) <- c("Year", "Mean", "5-Year Mean") time <- 1880:2004 xt <- Temp[,2] # --- begin with sequence plot - always plot(time, xt, type="o") # --- evidently not stationary, so difference it dtime <- time[-1] dxt <- diff(xt) plot(dtime, dxt, type="o", xlab="Change in Temperature", ylab="Year") # --- correlations; partial correlations par(mfrow=c(2,1)) acf(dxt); pacf(dxt); par(mfrow=c(1,1)) # --- fit several low-order arma models m300 <- arima(dxt, order=c(3,0,0), include.mean=TRUE, method="ML"); print(m300) tsdiag(m300) m003<- arima(dxt, order=c(0,0,3), include.mean=TRUE, method="ML"); print(m003) tsdiag(m003) m303<- arima(dxt, order=c(3,0,3), include.mean=TRUE, method="ML"); print(m303) tsdiag(m303) # --- put the AIC into a matrix (indices off by 1) maxp <- maxq <- 6 aic.table <- matrix(NA,nrow=1+maxp, ncol=1+maxq) rownames(aic.table)<-paste("p",0:maxp,sep="=") # nice labels help! colnames(aic.table)<-paste("q",0:maxq,sep="=") for (p in 0:maxp) for (q in 0:maxq) { aic.table[1+p,1+q] <- arima(dxt, order=c(p,0,q), include.mean=TRUE, method="ML")$aic } aic.table pp <- 0:(1+maxp); qq <- 0:(1+maxq) # one more than nrow or ncol image(pp,qq,aic.table, col=topo.colors(50)) # better image without q=0 which is really poor fit image(pp,qq[-1],aic.table[,-1], col=topo.colors(50)) # --- MA(4) seems to work as well as any, at least over these choices of fits m004<- arima(dxt, order=c(0,0,4), include.mean=TRUE, method="ML"); print(m004) tsdiag(m004) # --- show predictions... let ARIMA do the predictions. Oddly, estimates differ a bit source("http://www.stat.pitt.edu/stoffer/tsa2/Rcode/itall.R") # compare to Arima numbers m014 <- arima(xt, order=c(0,1,4), include.mean=TRUE, method="ML"); print(m014); print(m004) tsdiag(m014) n<-length(xt) show <- 100:n plot(time[show],xt[show], ylim=c(0,0.85),xlim=c(1979,2010)) lines(time[show],xt[show]-residuals(m014)[show], col="red") pred <- predict(m014, n.ahead=5) nf <- length(pred$pred); lines(2004+1:nf, pred$pred, lty=2, col="red") lines(2004+1:nf, pred$pred+1.96*pred$se, col="red") lines(2004+1:nf, pred$pred-1.96*pred$se, col="red")