### R commands for Lecture 5 on regressinon setwd("/Users/bob/work/courses/stat910/") ################################################# # # Functions # ################################################# mean.na <- function(x) { mean(x,na.rm = TRUE) } sd.na <- function(x) { sd (x,na.rm = TRUE) } lag.plot <- function(xt,lag) { n <- length(xt); plot(xt[-(n:(n-lag+1))],xt[-(1:lag)], xlab=paste("X[t-",lag,"]"), ylab="X_t") } generalized.difference <- function(xt, alpha) { n<-length(xt) c(NA, xt[-1]-alpha*xt[-n]) # jam a leading missing value } ################################################# # # Examples # ################################################# ### Global temperature since 1900 data <- scan("data/ss/globtemp.dat") xt <- data[45:142] time <- 1900:1997 plot(time,xt, type="o") # this type connect the dots # Line is not such a great fit regr <- lm(xt ~ time); summary(regr) abline(regr, col="red") plot(regr) # Sequence plot of the residuals plot(time,residuals(regr),type="o"); abline(h=0, col="gray") # Autocorrelation plot of residuals (where to horizontal lines come from?) acf(residuals(regr), max.lag=20) points(0:20,0.5^(0:20),col="red") # Lag plots to make sure nothing odd (outliers, curvature) e <- residuals(regr) n <- length(e) plot(e[-n],e[-1]) par(mfrow=c(2,2)) for(i in 2:6) { lag.plot(e,i) } par(mfrow=c(1,1)) ### Temperature, particulates, and mortality # 6-day averages, after smoothing mort <- scan("data/ss/cmort.dat") temp <- scan("data/ss/temp.dat") part <- scan("data/ss/part.dat") par(mfrow=c(3,1)) plot(mort, type="o") plot(temp, type="o") plot(part, type="o") par(mfrow=c(1,1)) pairs(cbind(mort,temp,part)) plot(temp, mort) lines(lowess(temp,mort)) ctemp <- temp-mean(temp) ctemp2 <- ctemp^2 time <- 1:(length(temp)) qregr <- lm(mort ~ ctemp + ctemp2); summary(qregr) index <- order(temp) lines(temp[index], fitted.values(qregr)[index], col="red") acf(residuals(qregr)) ################################################# # # OLS vs GLS # ################################################# # Generate series n <- 100 alpha <- 0.95 sigma <- sqrt(1/(1-alpha^2)) beta <- 1.5 rho <- 0.95 et <- as.vector(filter(rnorm(n),alpha, method="recursive")) xt <- as.vector(filter(rnorm(n), rho, method="recursive")) yt <- beta*xt + et plot(xt,yt) # OLS regression, sans constant ols.regr <- lm(yt~xt-1); summary(ols.regr) plot(ols.regr) plot(residuals(ols.regr), type="o"); acf(residuals(ols.regr)) # Actual SE of the ols estimator (first term is close to OLS) sigma/sqrt(n*(1/(1-rho^2))) * sqrt((1+alpha*rho)/(1-alpha*rho)) # GLS regression, using generalized differences gxt <- generalized.difference(xt,rho) gyt <- generalized.difference(yt,alpha) gls.regr <- lm(gyt ~ gxt-1); summary(gls.regr) # Simulate the estimators # Work dates back to Rao&Griliches, JASA, 1969 n.reps <- 500 results <- matrix(0,nrow=n.reps, ncol=5) colnames(results) <- c("ols","est.gls","gls","alpha","b") for(i in 1:n.reps) { et <- as.vector(filter(rnorm(n),alpha, method="recursive")) xt <- as.vector(filter(rnorm(n), rho, method="recursive")) yt <- beta*xt + et results[i,"ols"] <- coefficients( ols.regr<-lm(yt~xt-1) ) results[i,"alpha"] <- alpha.hat <- (acf(residuals(ols.regr),plot=FALSE)$acf)[2] gxt <- generalized.difference(xt,alpha.hat) gyt <- generalized.difference(yt,alpha.hat) results[i,"est.gls"] <- coefficients( lm(gyt ~ gxt-1) ) gxt <- generalized.difference(xt,alpha) gyt <- generalized.difference(yt,alpha) results[i,"gls"] <- coefficients( lm(gyt ~ gxt-1) ) newyt <- yt[-1] newxt <- xt[-1] lagyt <- yt[-n] lagxt <- xt[-n] big.regr <- lm(newyt ~ lagyt + newxt + lagxt - 1) results[i,"b"] <- coefficients(big.regr)[2] } apply(results,2,mean) apply(results,2,sd) boxplot(as.data.frame(results)) pairs(results)