### R commands for White estimator and bootstrap with time series setwd("/Users/bob/work/courses/stat910/") ################################################# # # Functions # ################################################# mean.na <- function(x) { mean(x,na.rm = TRUE) } sd.na <- function(x) { sd (x,na.rm = TRUE) } set.par <- function(row,col) par(mfrow=c(row,col), mar=c(3,3,3,1.5)) reset.par <- function( ) par(mfrow=c(1,1), mar=c(5,4,4,2)+0.1) # identity matrix, using wild indexing identity.matrix <- function(n) { result <- matrix(0,nrow=n,ncol=n); result[row(result)==col(result)] <- 1; result } # multiply by square root of inverse of AR(1) covariance matrix # s <- matmult.sqrt.ar.covinv(identity.matrix(5), 0.8) # upper tri # solve(t(s)%*%s) matmult.sqrt.ar.covinv <- function (mat, sigma, alpha) { n <- nrow(mat); p <- ncol(mat) result <- matrix(0,n,p) for(i in 1:n-1) result[i,]<-(mat[i,]-alpha*mat[i+1,]); result[n,]<-mat[n,] * sqrt(1-alpha^2) result/(sigma*sqrt(1-alpha^2)) } # approximate multiply by the square root of the covariance matrix # This will be *very* slow since covariance matrix is dense # matmult.ar.cov(identity.matrix(5),0.8) matmult.ar.cov <- function (mat, sigma, alpha) { n <- nrow(mat); p <- ncol(mat) result <- matrix(0,nrow=n,ncol=p) # result is same dim as input for(i in 1:n) { coef <- alpha^(abs((1:n)-i)); # row of gamma; could be cached result[i,]<-coef %*% mat; } sigma^2 * result } ## check # s <- matmult.sqrt.ar.covinv(identity.matrix(5), 3, 0.8) # round(matmult.ar.cov(t(s) %*% s,3,0.8), digits=4) # # model needs to have the x=TRUE option used in lm # # alpha is the residual autocorrelation ols.true.covariance.matrix <- function(regr, sigma, alpha) { x <- regr$x m <- x %*% solve(t(x) %*% x); # X(X'X)^-1 t(m) %*% matmult.ar.cov(m,sigma,alpha) # (X'X)^-1X' G X(X'X)^-1 } # s2 (X'X)^{-1} ols.nominal.covariance.matrix <- function(regr) { x <- regr$x; e <- residuals(regr); s2 <- as.vector((e%*%e)/regr$df.residual); s2 * solve(t(x) %*% x) } # make sure x is part of regression object white.covariance.matrix <- function(regr, block.size) { x <- regr$x; wx <- x %*% solve(t(x)%*%x) # X (X'X)^-1 n <- nrow(x); q <- ncol(x); n.blocks <- n/block.size; if(n.blocks != as.integer(n.blocks)) { cat("Need n = multiple of block size\n"); return (0); } g <- as.factor(rep(1:n.blocks,rep(block.size,n.blocks))) # blocking group e <- split(residuals(regr),g); # group residuals indx <- split(1:n, g); result <- matrix(0,nrow=q,ncol=q) for (b in 1:n.blocks) { v <- as.vector(e[[b]] %*% wx[ indx[[b]], ]); result <- result + v %o% v; # outer product } colnames(result) <- colnames(x) result } # block bootstrap covariance estimates; need regr to have x, y defined block.bs.covariance.matrix <- function(regr, block.size, n.reps) { x <- regr$x; y <- regr$y n <- nrow(x); q <- ncol(x); beta <- matrix(0,n.reps,q) n.blocks <- n/block.size; if(n.blocks != as.integer(n.blocks)) { cat("Need n = multiple of block size\n"); return (0); } for(b in 1:n.reps) { i <- sample(1:(n-block.size), n.blocks, replace=TRUE) i <- as.vector(outer(i,0:block.size-1,"+")) y.s <- y[i]; x.s <- x[i,] beta[b,] <- qr.solve(x.s,y.s) } cov(beta) } ################################################# # # Data # ################################################# ### Temperature, particulates, and mortality # 6-day averages, after smoothing, n = 508 # reduce length to 500 to make it easier to have integer blocks sizes mort <- scan("data/ss/cmort.dat")[1:500] temp <- scan("data/ss/temp.dat")[1:500] part <- scan("data/ss/part.dat")[1:500] # nonlinear in temp pairs(cbind(mort,temp,part)) # center temp variable ctemp <- (temp-mean(temp)) ctemp2 <- ctemp^2 time <- 1:(length(temp)) # R2 = 0.5938; get model with x and y included qregr <- lm(mort ~ time + ctemp + ctemp2 + part, x=TRUE,y=TRUE); summary(qregr) s2 <- as.numeric((residuals(qregr)%*% residuals(qregr))/qregr$df.residual) # autocorrelation remains, with peak at lag 2 set.par(2,1) plot(residuals(qregr), type="o") acf(residuals(qregr)) reset.par() ################################################# # # Compare SE estimates with real data # ################################################# ols <- ols.nominal.covariance.matrix(qregr); diag(ols) # true to the extent that s2 = sigma^2, ar(1) errors tru <- ols.true.covariance.matrix(qregr,sqrt(s2),0.6); diag(tru) round(diag(tru)/diag(ols),1) # what is effect of block size whi100 <- white.covariance.matrix (qregr, 100) ; round(diag(tru)/diag(whi100),1) whi050 <- white.covariance.matrix (qregr, 50) ; round(diag(tru)/diag(whi050),1) whi025 <- white.covariance.matrix (qregr, 25) ; round(diag(tru)/diag(whi025),1) # bootstrap bbs050<- block.bs.covariance.matrix(qregr,50,500) ; round(diag(tru)/diag(bbs050),1) round(diag(whi050)/diag(bbs050),1) ################################################# # # Compare SE estimates with simulated data # ################################################# # build response to be fitted values from prior regr # with AR error terms; force errors to have obs sd alpha <- 0.8; sigma <- sqrt(s2) et <- filter(rnorm(500), filter=alpha, method="recursive") et <- et * sigma/sd(et) sim.mort <- fitted.values(qregr) + et sim.qregr <- lm(sim.mort ~ time + ctemp + ctemp2 + part, x=TRUE,y=TRUE); summary(sim.qregr) tru <- ols.true.covariance.matrix(sim.qregr,sigma,alpha); diag(tru) ols <- ols.claimed.covariance.matrix(sim.qregr); diag(ols) round(diag(tru)/diag(ols),1) # what is effect of block size whi025 <- white.covariance.matrix (sim.qregr, 25) round(diag(tru)/diag(whi025),1) whi050 <- white.covariance.matrix (sim.qregr, 50) round(diag(tru)/diag(whi050),1) whi100 <- white.covariance.matrix (sim.qregr, 100) round(diag(tru)/diag(whi100),1) # bootstrap bbs050 <- block.bs.covariance.matrix(sim.qregr,50,500) round(diag(tru)/diag(bbs050),1) round(diag(whi050)/diag(bbs050),1) ################################################# # # Iterate these steps ( what about covariances or other functions of var mat? ) # ################################################# alpha <- 0.95; sigma <- sqrt(s2) tru <- ols.true.covariance.matrix(sim.qregr, sigma, alpha); n.reps <- 100 var.white050 <- var.white100 <- var.white025 <- matrix(0, nrow=n.reps, ncol=5) var.bs050 <- var.bs100 <- var.bs025 <- matrix(0, nrow=n.reps, ncol=5) for(rep in 1:n.reps) { cat(rep) et <- filter(rnorm(500), filter=alpha, method="recursive") et <- et * sigma/sd(et) sim.mort <- fitted.values(qregr) + et sim.qregr <- lm(sim.mort ~ time + ctemp + ctemp2 + part, x=TRUE,y=TRUE); var.white025[rep,] <- diag(white.covariance.matrix(sim.qregr, 25)) var.white050[rep,] <- diag(white.covariance.matrix(sim.qregr, 50)) var.white100[rep,] <- diag(white.covariance.matrix(sim.qregr,100)) var.bs025[rep,] <- diag(block.bs.covariance.matrix(sim.qregr, 25,500)) var.bs050[rep,] <- diag(block.bs.covariance.matrix(sim.qregr, 50,500)) var.bs100[rep,] <- diag(block.bs.covariance.matrix(sim.qregr,100,500)) } apply(var.white100,2,mean) do.boxplot <- function(col) { boxplot(as.data.frame(cbind(var.white025[,col],var.white050[,col], var.white100[,col], var.bs025[,col], var.bs050[,col], var.bs100[,col])), names=c("Wh 25", "Wh 50", "Wh 100", "BBS 25", "BBS 50", "BBS 100")) abline(h=tru[col,col],col="red") } set.par(2,2) do.boxplot(2); do.boxplot(3); do.boxplot(4); do.boxplot(5) reset.par() do.pairs <- function(col) { pairs(as.data.frame(cbind(var.white025[,col],var.white050[,col], var.white100[,col], var.bs025[,col], var.bs050[,col], var.bs100[,col])), labels=c("Wh 25", "Wh 50", "Wh 100", "BBS 25", "BBS 50", "BBS 100")) } do.pairs(3)