# Resampling for time series ######################################################################## # # Simulations -- in general -- provide insights for time series # Example: skewness in the distribution of an AR(1) coef # ######################################################################## # --- simulate ols estimator for an AR(1) process n <- 50 phi <- 0.95 n.reps <- 400 phi.hat.ls <- rep(0,n.reps) phi.hat.yw <- rep(0,n.reps) phi.hat.bg <- rep(0,n.reps) for(rep in 1:n.reps) { yt <- arima.sim(list(ar=c(phi)), n, n.start=50) phi.hat.ls[rep] <- ar.ols(yt, aic=FALSE, order.max=1, demean=TRUE)$ar phi.hat.yw[rep] <- ar.yw (yt, aic=FALSE, order.max=1, demean=TRUE)$ar phi.hat.bg[rep] <- ar.burg (yt, aic=FALSE, order.max=1, demean=TRUE)$ar } hist(phi.hat.ls, freq=FALSE, ylim=c(0,9), xlim=c(0,1)); lines(density(phi.hat.ls), col="blue") lines(density(phi.hat.yw), col="blue", lty=2) lines(density(phi.hat.bg), col="blue", lty=4) se <- sqrt(1-phi^2)/sqrt(n) z <- seq(0,1,length.out=200); lines(z,dnorm(z, mean=phi, sd=se), col="red") ######################################################################## # # Bootstrap resampling # ######################################################################## # --- Bootstrap resampling for the mean # R has a package for bootstrap, but I will do these 'by hand' resample <- function(x, n=length(x)) { sample(x,n,replace=TRUE) } # --- Sample from a normal distribution, then use resampling to estimate its sampling distribution # In this case, we know the right answer n <- 30 mu <- 100; sigma <- 25 x <- mu + sigma * rnorm(n) # non-zero mean, non-one SD make sure location and scale work! # - classical SE and 90% interval se <- sd(x)/sqrt(n-1) ; se ci <- mean(x) + se * qt(c(0.05,0.95),n-1); ci # - bootstrap version B <- 100 m.star <- rep(0,B) for(b in 1:B) { x.star <- resample(x); m.star[b] <- mean(x.star); } sd(m.star) quantile(m.star,c(0.05,0.95)) # --- compare bootstrap CDF of mean to "true" dist ms <- c(min(m.star)-sd(m.star), sort(m.star), max(m.star)+sd(m.star)) plot(ms, seq(0,1,length.out=length(ms)), type="s", ylab="BS CDF", xlim=c(85,115)) lines(ms, pnorm(ms,mean=mean(x), sd=sd(x)/sqrt(n)), col="black", lty=1) # - if you knew mu and sigma ms <- seq(mu - 4*sigma/sqrt(n), mu+4*sigma/sqrt(n), by=0.1) lines(ms, pnorm(ms,mean=mu, sd=sigma/sqrt(n)), col="red", lty=2) ######################################################################## # # Resampling dependent data # Data are an autoregression with mean 100; naive resampling fails # ######################################################################## # --- Expression for variance of X-bar from AR(1) with regr coef phi se.ar.mean <- function(phi,sigma2, n) { v <- n; for (h in 1:(n-1)) v <- v + 2 * (n-h) * phi^h sqrt(v * sigma2/(1-phi^2)) / n } # --- Pick process n <- 100 sigma <- 10 phi <- 0.9 # try some larger phi as well gamma.0 <- sigma^2/(1-phi^2) # --- simulate std error of mean and phi^ of autoregression n.reps <- 1000 p.sim <- rep(0,n.reps) m.sim <- rep(0,n.reps) for(i in 1:n.reps) { y <- arima.sim(list(ar=c(phi)), n, rand.gen=function(n){rnorm(n,sd=sigma)}, n.start=50) m.sim[i] <- mean(y) p.sim[i] <- ar.burg(y,aic=FALSE,order.max=1)$ar } # --- compare simulated SE to right answer se.ar.mean(phi,sigma^2,n); sd(m.sim) sqrt(1-phi^2)/sqrt(n); sd(p.sim) ################################################################################# # # Compare bootstrap estimates to those from plug-in estimates # ################################################################################# # - original data yt <- arima.sim(list(ar=c(phi)), n, rand.gen=function(n){rnorm(n,sd=sigma)}, n.start=50) # - original fit fitted.model <- ar.burg(yt,aic=FALSE,order.max=1) phi.hat <- fitted.model$ar; phi.hat sigma.hat <- sd(fitted.model$resid, na.rm=TRUE); sigma.hat gamma0.hat <- sigma.hat^2/(1-phi.hat^2); gamma0.hat; var(yt) # close? se.ar.mean(phi.hat,sigma.hat^2,n) # plug-in estimator # --- parametric bootstrap procedure; make special function for resampling errors wt <- fitted.model$resid[-1] # first is missing rboot <- function(n) resample(wt,n) yt.star <- arima.sim(list(ar=c(phi.hat)), n, rand.gen=rboot, n.start=50) plot(yt.star) B <- 1000 m.star <- rep(0,B) phi.star <- rep(0,B) for(b in 1:B) { yt.star <- arima.sim(list(ar=c(phi.hat)), n, rand.gen=rboot, n.start=50) m.star[b] <- mean(yt.star); phi.star[b] <- ar.burg(yt.star,aic=FALSE,order.max=1)$ar } sd(m.star) # compare to plug-in estimator quantile(m.star,c(0.05,0.95)) hist(phi.star, freq=FALSE, ylim=c(0,9), xlim=c(0,1)); lines(density(phi.star), col="blue") # --- compare to plug-in estimate se <- sqrt(1-phi.hat^2)/sqrt(n) z <- seq(0,1,length.out=200); lines(z,dnorm(z, mean=phi.hat, sd=se), col="red") ######################################################################## # # Subsampling... prior bootstrap depends on knowing its an AR(1) # ######################################################################## # applies the function 'stat' to the blocks with indicated length and overlap # assume result of stat is scalar apply.to.blocks <- function(stat, x, block.len, shift) { starting.index <- seq(1,length(x)-block.len+1,by=shift) # let R do the messy part N <- length(starting.index) z <- rep(NA,N) for (j in 1:length(z)) z[j] <- stat(x[starting.index[j]:(starting.index[j]+block.len-1)]) z } apply.to.blocks(mean, 1:100, 10, 8) # --- Apply to an iid process (keep in blocks, though could sample randomly) n <- 100 sigma <- 10 phi <- 0.8 x <- mu + sigma * rnorm(n) block.size <- 50 shift <- 5 mm <- apply.to.blocks(mean, x, block.size, shift); cat(length(mm), "blocks\n") # rescale mm <- sqrt(block.size) * (mm-mean(x)) # close to right? Cmp to dist of sqrt(n)(xbar-mu) plot(c(min(mm)-sigma/2,sort(mm),max(mm)+sigma/2),c(0,(1:length(mm))/length(mm),1),type="s", xlab="sqrt(b)(stat-parm)", ylab="Empirical CDF") z <- seq(-3*sigma, 3*sigma, by=sigma/(10)) lines(z,pnorm(z,mean=0, sd=sigma), col="red") # --- Apply to mean of an AR(1) process n <- 1000 mu <- 1000 sigma <- 25 x <- mu + arima.sim(list(ar=c(phi)), n, rand.gen=function(n){rnorm(n,sd=sigma)}, n.start=50) block.size <-100 shift <- 20 mm <- apply.to.blocks(mean, x, block.size, shift); cat(length(mm), "blocks\n") # rescale mm <- sqrt(block.size) * (mm-mean(x)) # close to right? Cmp to dist of sqrt(n)(xbar-mu) plot(c(min(mm)-sigma/2,sort(mm),max(mm)+sigma/2),c(0,(1:length(mm))/length(mm),1),type="s", xlab="sqrt(b)(stat-parm)", ylab="Empirical CDF") s <- sqrt(n)*se.ar.mean(phi,sigma^2,n); cat(s,"= asym SD of mean\n") z <- seq(-3*s, 3*s, by=s/(10)) lines(z,pnorm(z,mean=0, sd=s), col="red")