### R commands for Lecture 5 on regressinon setwd("/Users/bob/work/courses/stat910/") ################################################# # # Functions # ################################################# mean.na <- function(x) { mean(x,na.rm = TRUE) } sd.na <- function(x) { sd (x,na.rm = TRUE) } lag.plot <- function(xt,lag) { n <- length(xt); plot(xt[-(n:(n-lag+1))],xt[-(1:lag)], xlab=paste("X[t-",lag,"]"), ylab="X_t") } generalized.difference <- function(xt, alpha) { n<-length(xt) c(NA, xt[-1]-alpha*xt[-n]) # jam a leading missing value } ################################################# # # Examples # ################################################# ### Global temperature since 1900 data <- scan("http://www.stat.pitt.edu/stoffer/tsa2/data/globtemp.dat") xt <- data[45:142] time <- 1900:1997 plot(time,xt, type="o") # this type connect the dots # Line explains 2/3 of variance (R2 = 0.65), but clearly leaves a pattern # MIGHT this be a random walk? regr <- lm(xt ~ time); summary(regr) abline(regr, col="red") # plot of regr gives variety of diagnostics plot(regr) # Sequence plot of the residuals plot(time,residuals(regr),type="o"); abline(h=0, col="gray") # Autocorrelation plot of residuals (where do horizontal lines come from?) acf(residuals(regr), lag.max=25) points(0:20,0.5^(0:25),col="red") # Lag plots to make sure nothing odd (outliers, curvature) e <- residuals(regr) n <- length(e) plot(e[-n],e[-1]) par(mfrow=c(2,2)) for(i in 2:5) { lag.plot(e,i) } par(mfrow=c(1,1)) # alternative would be to consider differencing the series (see text) plot(time,xt, type="o") plot(time[-1],diff(xt),type="o") acf(diff(xt), lag.max=25) ### Temperature, particulates, and mortality # 6-day averages, after smoothing (n = 508) mort <- scan("http://www.stat.pitt.edu/stoffer/tsa2/data/cmort.dat") temp <- scan("http://www.stat.pitt.edu/stoffer/tsa2/data/temp.dat") part <- scan("http://www.stat.pitt.edu/stoffer/tsa2/data/part.dat") time <- 1:(length(temp)) # mortality as the response, note seasonality plot(mort, type="b",pch=20) par(mfrow=c(3,1)) plot(mort, type="o") plot(temp, type="o") plot(part, type="o") par(mfrow=c(1,1)) # where's the seasonality (put time last) pairs(cbind(mort,temp,part)) # ,time)) plot(temp, mort) lines(lowess(temp,mort), col="red") # vaguely quadratic pattern ctemp <- temp-mean(temp) ctemp2 <- ctemp^2 qregr <- lm(mort ~ ctemp + ctemp2); summary(qregr) index <- order(temp) lines(temp[index], fitted.values(qregr)[index], col="blue") plot(residuals(qregr), type="l") # add time trend as well qregr <- lm(mort ~ time + ctemp + ctemp2 + part); summary(qregr) plot(residuals(qregr), type="l") # evident skewness in qq plot plot(qregr) # temporal dependence? Lag enough for annual cycle acf(residuals(qregr), lag.max=60) plot(residuals(qregr), type="l") for(j in seq(0,500,by=52)) abline(v=j, col="green", lty=2) ################################################# # # OLS vs GLS # ################################################# # Generate series n <- 100 alpha <- 0.95 sigma <- sqrt(1/(1-alpha^2)) beta <- 1.5 rho <- 0.95 # simple example filter(rep(1,10),0.5,method="recursive") et <- as.vector(filter(rnorm(n),alpha, method="recursive")) xt <- as.vector(filter(rnorm(n), rho, method="recursive")) yt <- beta*xt + et plot(xt,yt) # OLS regression, sans constant ... ols.regr <- lm(yt~xt-1); summary(ols.regr) plot(ols.regr) plot(residuals(ols.regr), type="o"); # explain this? acf(residuals(ols.regr)) # Actual SE of the ols estimator (first term is close to OLS) sigma/sqrt(n*(1/(1-rho^2))) * sqrt((1+alpha*rho)/(1-alpha*rho)) # GLS regression, using generalized differences gxt <- generalized.difference(xt,rho) gyt <- generalized.difference(yt,alpha) gls.regr <- lm(gyt ~ gxt-1); summary(gls.regr) # Simulate the estimators # Work dates back to Rao&Griliches, JASA, 1969 n.reps <- 500 results <- matrix(0,nrow=n.reps, ncol=5) colnames(results) <- c("ols","est.gls","gls","alpha","b") for(i in 1:n.reps) { et <- as.vector(filter(rnorm(n),alpha, method="recursive")) xt <- as.vector(filter(rnorm(n), rho, method="recursive")) yt <- beta*xt + et results[i,"ols"] <- coefficients( ols.regr<-lm(yt~xt-1) ) results[i,"alpha"] <- alpha.hat <- (acf(residuals(ols.regr),plot=FALSE)$acf)[2] gxt <- generalized.difference(xt,alpha.hat) gyt <- generalized.difference(yt,alpha.hat) results[i,"est.gls"] <- coefficients( lm(gyt ~ gxt-1) ) gxt <- generalized.difference(xt,alpha) gyt <- generalized.difference(yt,alpha) results[i,"gls"] <- coefficients( lm(gyt ~ gxt-1) ) newyt <- yt[-1] newxt <- xt[-1] lagyt <- yt[-n] lagxt <- xt[-n] big.regr <- lm(newyt ~ lagyt + newxt + lagxt - 1) results[i,"b"] <- coefficients(big.regr)[2] } apply(results,2,mean) apply(results,2,sd) boxplot(as.data.frame(results)) pairs(results)