### Simulate means and covariances of processes
sim.movavg <- function(input,coef) {
filter(input,coef,sides=1)
}
cov.movavg <- function(coef) {
len <- length(coef);
cov <- rep (0,len);
c.rot <- coef;
for (i in 1:len) {
cov[i] <- sum(coef * c.rot);
c.rot <- c(c.rot[2:len],0);
cat(c.rot,"\n") # show the rotating vector
}
cov
}
mean.na <- function(x) { mean(x,na.rm = TRUE) }
sd.na <- function(x) { sd (x,na.rm = TRUE) }
# Feeding the filter an "impulse" function exposes the filter weights.
# This example also shows how R's filter function reverses the order of
# the moving average weights in the calculations and inserts missing values.
x<-sim.movavg(c(0,0,1,0,0,0,0,0), c(.8,.5,.1))
# When feed Gaussian white noise, you see a superposition of the weights,
# and so the filter weights are not so easily observed.
coef <- c(.8,.7,.6,.5,.4,.3)
x<-sim.movavg(rnorm(300),coef)
plot(x)
# Note the effect of missing values on a typical R function (mean)
# The presence of any missing data sets the function result to NA.
# The convenience wrapper handles missing values.
mean(x)
mean.na(x)
# To compare the OLS and GLS estimates of the mean of a process,
# I first normalize the coefficients so that the variance of the
# output process matches the variance of the input process.
coef <- coef / sqrt(sum(coef*coef))
# Here are the true covariances
# g(0) = 1 by construction, so we can interpret these as correlations
rho <- cov.movavg(coef)
rho
# Now I'll simulate iid Gaussian white note and then use that white
# noise to produce a moving average. The noise and moving average are
# obviously correlated. The mean of the iid noise *is* the GLS
# estimate of the mean. Think about that one...
n <- 200;
x.ind <- rnorm(200)
x.ma <- sim.movavg(x.ind,coef)
mean.na(x.ind)
mean.na(x.ma)
# Simulate two estimators to compare the standard errors
n <- 200;
k <- length(coef)
n.reps <- 100
results <- matrix(0,nrow=n.reps,ncol=2)
for(i in 1:n.reps) {
x <- rnorm(n); # x is the input white noise
xbar <- mean(x[-(1:(k-1))]); # exclude to compensate for missing in filtered data
x.ma <- sim.movavg(x,coef) # moving average derived from x
xbar.ma <- mean.na(x.ma)
results[i,1]<- xbar; results[i,2]<- xbar.ma
}
plot(results[,1], results[,2]) # shows the resulting paired comparison
apply(results,2,sd) # sd(results[,i])
boxplot(results[,1],results[,2])
# Now move on to the covariances. The function acf handles
# the details, producing a named list of results. I only want the acf
# component
n <- 200
k <- length(coef)
n.reps <- 1000
n.lags <- 20
results <- matrix(0,nrow=n.reps,ncol=n.lags+1)
for(i in 1:n.reps) {
x.ma <- sim.movavg(rnorm(n+k-1),coef) # so that its n long after missing
x.ma <- x.ma[-(1:(k-1))] # avoid initial na
results[i,] <- acf(x.ma,lag.max=n.lags,type="correlation", plot=FALSE)$acf
}
# Scatterplot matrix shows the dependence among the estimates
pairs(results[,2:6]) # rho(1) == 1
plot(apply(results,2,sd)) # larger variances at smaller rho
# Check out the bias and variance. It is useful to explore how the evident
# bias changes as we go from n=200 to n=1000, say.
boxplot(as.data.frame(results[,2:10])) # a data frame is a list of columns
points(1:5,rho[2:6],col="red", pch=19)
points(6:9,rep(0,4),col="red", pch=19)
# Treat the estimated correlations (at large lags) as a time series
# Look at the dependent in this time series.
n <- 1000
n.lags <- 250
x.ma <- sim.ma(rnorm(n+k-1),coef)[-(1:(k-1))]
rho <- acf(x.ma,lag.max=n.lags,type="correlation", plot=FALSE)$acf
rho <- as.vector(rho)
# Notice the ripples in the ACF after about 20-30 lags
# The acf starts to look itself like a stationary process, albeit
# on that is "smoother" (and thus more dependent) than the original
# time series.
plot(rho); abline(h=0,col="gray")
# ACF of the acf
plot(acf(rho[40:n.lags], lag.max=50))