# Resampling for time series


########################################################################
#   
#   Simulations -- in general -- provide insights for time series
#            Example: skewness in the distribution of an AR(1) coef
#
########################################################################

# --- simulate ols estimator for an AR(1) process

n      <- 50
phi    <- 0.95

n.reps  <- 400
phi.hat.ls <- rep(0,n.reps)
phi.hat.yw <- rep(0,n.reps)
phi.hat.bg <- rep(0,n.reps)
for(rep in 1:n.reps) {
	yt <- arima.sim(list(ar=c(phi)), n, n.start=50)
	phi.hat.ls[rep] <- ar.ols(yt, aic=FALSE, order.max=1, demean=TRUE)$ar
	phi.hat.yw[rep] <- ar.yw (yt, aic=FALSE, order.max=1, demean=TRUE)$ar
	phi.hat.bg[rep] <- ar.burg (yt, aic=FALSE, order.max=1, demean=TRUE)$ar
	}

hist(phi.hat.ls, freq=FALSE, ylim=c(0,9), xlim=c(0,1)); 
lines(density(phi.hat.ls), col="blue")
lines(density(phi.hat.yw), col="blue", lty=2)
lines(density(phi.hat.bg), col="blue", lty=4)

se <- sqrt(1-phi^2)/sqrt(n)
z <- seq(0,1,length.out=200);
lines(z,dnorm(z, mean=phi, sd=se), col="red")


########################################################################
#   
#   Bootstrap resampling
#
########################################################################


# --- Bootstrap resampling for the mean
#     R has a package for bootstrap, but I will do these 'by hand'

resample <- function(x, n=length(x)) { sample(x,n,replace=TRUE) }


# --- Sample from a normal distribution, then use resampling to estimate its sampling distribution
#     In this case, we know the right answer

n  <- 30
mu    <- 100;
sigma <- 25
x  <- mu + sigma * rnorm(n)   # non-zero mean, non-one SD make sure location and scale work!

# - classical SE and 90% interval
se <- sd(x)/sqrt(n-1)   ; se
ci <- mean(x) + se * qt(c(0.05,0.95),n-1); ci

# - bootstrap version
B <- 100
m.star <- rep(0,B)
for(b in 1:B) { 
	x.star <- resample(x); 
	m.star[b] <- mean(x.star);
	}
	
sd(m.star)
quantile(m.star,c(0.05,0.95))

# --- compare bootstrap CDF of mean to "true" dist
ms <- c(min(m.star)-sd(m.star), sort(m.star), max(m.star)+sd(m.star))
plot(ms, seq(0,1,length.out=length(ms)), type="s", ylab="BS CDF", xlim=c(85,115))
lines(ms, pnorm(ms,mean=mean(x), sd=sd(x)/sqrt(n)), col="black", lty=1)
# - if you knew mu and sigma
ms <- seq(mu - 4*sigma/sqrt(n), mu+4*sigma/sqrt(n), by=0.1)
lines(ms, pnorm(ms,mean=mu, sd=sigma/sqrt(n)), col="red", lty=2)


########################################################################
#   
#   Resampling dependent data
#    Data are an autoregression with mean 100; naive resampling fails
#
########################################################################

# --- Expression for variance of X-bar from AR(1) with regr coef phi
se.ar.mean <- function(phi,sigma2, n) {
	v <- n;
	for (h in 1:(n-1)) v <- v + 2 * (n-h) * phi^h
	sqrt(v * sigma2/(1-phi^2)) / n
}

# --- Pick process
n       <- 100
sigma   <- 10
phi     <- 0.9    # try some larger phi as well
gamma.0 <- sigma^2/(1-phi^2)

# --- simulate std error of mean and phi^ of autoregression
n.reps <- 1000
p.sim <- rep(0,n.reps)
m.sim <- rep(0,n.reps)
for(i in 1:n.reps) {
  y <- arima.sim(list(ar=c(phi)), n, rand.gen=function(n){rnorm(n,sd=sigma)}, n.start=50)
  m.sim[i] <- mean(y)
  p.sim[i] <- ar.burg(y,aic=FALSE,order.max=1)$ar
}

# --- compare simulated SE to right answer
se.ar.mean(phi,sigma^2,n);       sd(m.sim)
    sqrt(1-phi^2)/sqrt(n);       sd(p.sim)


#################################################################################
#     
#   Compare bootstrap estimates to those from plug-in estimates
#
#################################################################################

# - original data
yt <- arima.sim(list(ar=c(phi)), n, rand.gen=function(n){rnorm(n,sd=sigma)}, n.start=50)

# - original fit
fitted.model <- ar.burg(yt,aic=FALSE,order.max=1)
phi.hat      <- fitted.model$ar; phi.hat
sigma.hat    <- sd(fitted.model$resid, na.rm=TRUE); sigma.hat
gamma0.hat   <- sigma.hat^2/(1-phi.hat^2); gamma0.hat; var(yt)             # close?

se.ar.mean(phi.hat,sigma.hat^2,n)   # plug-in estimator


# --- parametric bootstrap procedure; make special function for resampling errors
wt <- fitted.model$resid[-1]  # first is missing
rboot <- function(n) resample(wt,n) 
yt.star <- arima.sim(list(ar=c(phi.hat)), n, rand.gen=rboot, n.start=50)
plot(yt.star)

B <- 1000
m.star   <- rep(0,B)
phi.star <- rep(0,B)
for(b in 1:B) { 
	yt.star <- arima.sim(list(ar=c(phi.hat)), n, rand.gen=rboot, n.start=50)
	  m.star[b] <- mean(yt.star);
	phi.star[b] <- ar.burg(yt.star,aic=FALSE,order.max=1)$ar
	}
	
sd(m.star)   # compare to plug-in estimator
quantile(m.star,c(0.05,0.95))

hist(phi.star, freq=FALSE, ylim=c(0,9), xlim=c(0,1)); 
lines(density(phi.star), col="blue")

# --- compare to plug-in estimate
se <- sqrt(1-phi.hat^2)/sqrt(n)
z <- seq(0,1,length.out=200);
lines(z,dnorm(z, mean=phi.hat, sd=se), col="red")


########################################################################
#   
#   Subsampling...  prior bootstrap depends on knowing its an AR(1)
# 
########################################################################

# applies the function 'stat' to the blocks with indicated length and overlap 
# assume result of stat is scalar

apply.to.blocks <- function(stat, x, block.len, shift) {
	starting.index <- seq(1,length(x)-block.len+1,by=shift)  # let R do the messy part
	N <- length(starting.index)
	z <- rep(NA,N)                                          
	for (j in 1:length(z)) z[j] <- stat(x[starting.index[j]:(starting.index[j]+block.len-1)])
	z
}

apply.to.blocks(mean, 1:100, 10, 8)


# --- Apply to an iid process (keep in blocks, though could sample randomly)
n     <- 100
sigma <- 10
phi   <- 0.8

x <- mu + sigma * rnorm(n)

block.size <- 50
shift      <-  5
mm <- apply.to.blocks(mean, x, block.size, shift); cat(length(mm), "blocks\n")

# rescale
mm <- sqrt(block.size) * (mm-mean(x)) 

# close to right?  Cmp to dist of sqrt(n)(xbar-mu)
plot(c(min(mm)-sigma/2,sort(mm),max(mm)+sigma/2),c(0,(1:length(mm))/length(mm),1),type="s",
      xlab="sqrt(b)(stat-parm)", ylab="Empirical CDF")
z <- seq(-3*sigma, 3*sigma, by=sigma/(10))
lines(z,pnorm(z,mean=0, sd=sigma), col="red")


# --- Apply to mean of an AR(1) process

n     <- 1000
mu    <- 1000
sigma <-   25 

x <- mu + arima.sim(list(ar=c(phi)), n, rand.gen=function(n){rnorm(n,sd=sigma)}, n.start=50)

block.size <-100
shift      <- 20
mm <- apply.to.blocks(mean, x, block.size, shift); cat(length(mm), "blocks\n")

# rescale
mm <- sqrt(block.size) * (mm-mean(x)) 

# close to right?  Cmp to dist of sqrt(n)(xbar-mu)
plot(c(min(mm)-sigma/2,sort(mm),max(mm)+sigma/2),c(0,(1:length(mm))/length(mm),1),type="s",
      xlab="sqrt(b)(stat-parm)", ylab="Empirical CDF")

s <- sqrt(n)*se.ar.mean(phi,sigma^2,n); cat(s,"= asym SD of mean\n")
z <- seq(-3*s, 3*s, by=s/(10))
lines(z,pnorm(z,mean=0, sd=s), col="red")