### Simulate means and covariances of processes sim.movavg <- function(input,coef) { filter(input,coef,sides=1) } cov.movavg <- function(coef) { len <- length(coef); cov <- rep (0,len); c.rot <- coef; for (i in 1:len) { cov[i] <- sum(coef * c.rot); c.rot <- c(c.rot[2:len],0); cat(c.rot,"\n") # show the rotating vector } cov } mean.na <- function(x) { mean(x,na.rm = TRUE) } sd.na <- function(x) { sd (x,na.rm = TRUE) } # Feeding the filter an "impulse" function exposes the filter weights. # This example also shows how R's filter function reverses the order of # the moving average weights in the calculations and inserts missing values. x<-sim.movavg(c(0,0,1,0,0,0,0,0), c(.8,.5,.1)) # When feed Gaussian white noise, you see a superposition of the weights, # and so the filter weights are not so easily observed. coef <- c(.8,.7,.6,.5,.4,.3) x<-sim.movavg(rnorm(300),coef) plot(x) # Note the effect of missing values on a typical R function (mean) # The presence of any missing data sets the function result to NA. # The convenience wrapper handles missing values. mean(x) mean.na(x) # To compare the OLS and GLS estimates of the mean of a process, # I first normalize the coefficients so that the variance of the # output process matches the variance of the input process. coef <- coef / sqrt(sum(coef*coef)) # Here are the true covariances # g(0) = 1 by construction, so we can interpret these as correlations rho <- cov.movavg(coef) rho # Now I'll simulate iid Gaussian white note and then use that white # noise to produce a moving average. The noise and moving average are # obviously correlated. The mean of the iid noise *is* the GLS # estimate of the mean. Think about that one... n <- 200; x.ind <- rnorm(200) x.ma <- sim.movavg(x.ind,coef) mean.na(x.ind) mean.na(x.ma) # Simulate two estimators to compare the standard errors n <- 200; k <- length(coef) n.reps <- 100 results <- matrix(0,nrow=n.reps,ncol=2) for(i in 1:n.reps) { x <- rnorm(n); # x is the input white noise xbar <- mean(x[-(1:(k-1))]); # exclude to compensate for missing in filtered data x.ma <- sim.movavg(x,coef) # moving average derived from x xbar.ma <- mean.na(x.ma) results[i,1]<- xbar; results[i,2]<- xbar.ma } plot(results[,1], results[,2]) # shows the resulting paired comparison apply(results,2,sd) # sd(results[,i]) boxplot(results[,1],results[,2]) # Now move on to the covariances. The function acf handles # the details, producing a named list of results. I only want the acf # component n <- 200 k <- length(coef) n.reps <- 1000 n.lags <- 20 results <- matrix(0,nrow=n.reps,ncol=n.lags+1) for(i in 1:n.reps) { x.ma <- sim.movavg(rnorm(n+k-1),coef) # so that its n long after missing x.ma <- x.ma[-(1:(k-1))] # avoid initial na results[i,] <- acf(x.ma,lag.max=n.lags,type="correlation", plot=FALSE)\$acf } # Scatterplot matrix shows the dependence among the estimates pairs(results[,2:6]) # rho(1) == 1 plot(apply(results,2,sd)) # larger variances at smaller rho # Check out the bias and variance. It is useful to explore how the evident # bias changes as we go from n=200 to n=1000, say. boxplot(as.data.frame(results[,2:10])) # a data frame is a list of columns points(1:5,rho[2:6],col="red", pch=19) points(6:9,rep(0,4),col="red", pch=19) # Treat the estimated correlations (at large lags) as a time series # Look at the dependent in this time series. n <- 1000 n.lags <- 250 x.ma <- sim.ma(rnorm(n+k-1),coef)[-(1:(k-1))] rho <- acf(x.ma,lag.max=n.lags,type="correlation", plot=FALSE)\$acf rho <- as.vector(rho) # Notice the ripples in the ACF after about 20-30 lags # The acf starts to look itself like a stationary process, albeit # on that is "smoother" (and thus more dependent) than the original # time series. plot(rho); abline(h=0,col="gray") # ACF of the acf plot(acf(rho[40:n.lags], lag.max=50))