# rollAnalysis.ssc scripts for Rolling Analysis chapter # author: Eric Zivot # created: February 8, 2002 # updated: May 20, 2002 # ################################################################### module(finmetrics) # # compute rolling averages, variances and standard deviations # msft.ret = getReturns(singleIndex.dat[,"MSFT"]) sp500.ret = getReturns(singleIndex.dat[,"SP500"]) start(msft.ret) end(msft.ret) nrow(msft.ret) # # use aggregateSeries to compute rolling means and standard # deviations # roll.mean = aggregateSeries(msft.ret,moving=24,adj=1,FUN=mean) roll.sd = aggregateSeries(msft.ret,moving=24,adj=1,FUN=stdev) class(roll.mean) nrow(roll.mean) roll.mean[1:5] plot(msft.ret,roll.mean,roll.sd,plot.args=list(lty=c(1,3,4))) legend(0,-0.2,legend=c("Returns","Rolling mean","Rolling sd"), lty=c(1,3,4)) # compute two-sided moving averages roll.mean.5 = aggregateSeries(msft.ret,moving=24,adj=0.5,FUN=mean) roll.mean.5[1:5] # use user-written function to compute rolling means and sd # values with one call to aggregateSeries mean.sd = function (x) { tmp1 = mean(x) tmp2 = stdev(x) ans = concat(tmp1,tmp2) ans } roll.mean.sd = aggregateSeries(msft.ret,moving=24,adj=1, FUN=mean.sd,colnames=c("mean","sd")) roll.mean.sd[1:5,] # compute asymptotic standard error bands lower.mean = roll.mean.sd[,"mean"]-2*roll.mean.sd[,"sd"]/sqrt(24) upper.mean = roll.mean.sd[,"mean"]+2*roll.mean.sd[,"sd"]/sqrt(24) lower.sd = roll.mean.sd[,"sd"]-2*roll.mean.sd[,"sd"]/sqrt(2*24) upper.sd = roll.mean.sd[,"sd"]+2*roll.mean.sd[,"sd"]/sqrt(2*24) par(mfrow=c(2,1)) plot(roll.mean.sd[,"mean"],lower.mean,upper.mean, main="24 month rolling means",plot.args=list(lty=c(1,2,2))) plot(roll.mean.sd[,"sd"],lower.sd,upper.sd, main="24 month rolling stdandard deviations", plot.args=list(lty=c(1,2,2))) par(mfrow=c(1,1)) # # illustrate computation of moving averages using an increment # not equal to 1 by specifying by and k.by. Evidently can't do this with # moving option # roll.mean.2 = aggregateSeries(msft.ret,adj=1, by="years",k.by=1,FUN=mean) roll.mean.2[1:5] # # use FinMetrics functions SMA and rollVar to compute rolling # means and standard deviations and rollMax and rollMin to compute # rolling mins and maxs # roll2.mean = SMA(msft.ret,n=24) roll2.sd = sqrt(rollVar(msft.ret,n=24)) roll.max = rollMax(msft.ret,n=24) roll.min = rollMin(msft.ret,n=24) plot(msft.ret,roll.mean,roll.sd,plot.args=list(lty=c(1,1,1))) legend(0,-0.2,legend=c("Returns","Rolling mean","Rolling sd"), lty=1:3) # compare computation time dos.time(SMA(msft.ret,n=24)) dos.time(aggregateSeries(msft.ret,moving=24,adj=1,FUN=mean)) dos.time(sqrt(rollVar(msft.ret,n=24))) dos.time(aggregateSeries(msft.ret,moving=24,adj=1,FUN=stdev)) # compute high frequency rolling standard deviations msft.ret2.d = getReturns(DowJones30[,"MSFT"],type="continuous")^2 roll.sd.25 = sqrt(SMA(msft.ret2.d,n=25)) roll.sd.50 = sqrt(SMA(msft.ret2.d,n=50)) roll.sd.100 = sqrt(SMA(msft.ret2.d,n=100)) plot(roll.sd.25,roll.sd.50,roll.sd.100, plot.args=(list(lty=c(1,3,4)))) legend(0,0.055,legend=c("n=25","n=50","n=100"), lty=c(1,3,4)) # # rolling correlations # ret.ts = getReturns(singleIndex.dat,type="continuous") colIds(ret.ts) cor.coef = function(x) cor(x)[1,2] roll.cor = aggregateSeries(ret.ts,moving=24,together=T, adj=1,FUN=cor.coef) # do trellis graphics trellisPlot(roll.cor,ret.ts, scales=list(y=list(relation="free"))) # compute approximate std errors lower.rho = roll.cor-2*sqrt((1-roll.cor^2)/sqrt(24)) upper.rho = roll.cor+2*sqrt((1-roll.cor^2)/sqrt(24)) # plot rolling estimates with data and SEs smpl = positions(ret.ts)>=start(roll.cor) par(mfrow=c(2,1)) plot(ret.ts[smpl,],main="Returns on Microsoft and S&P 500 index", plot.args=list(lty=c(1,3))) legend(0,-0.2,legend=c("Microsoft","S&P 500"), lty=c(1,3)) plot(roll.cor,main="24-month rolling correlations") par(mfrow=c(1,1)) # # EWMA estimates # # outlier example data set.seed(667) e = rnorm(100) e[20] = 10 roll.mean = SMA(e,n=10,trim=F) roll.sd = sqrt(rollVar(e,n=10,trim=F)) par(mfrow=c(2,1)) tsplot(e) tsplot(roll.mean,roll.sd) legend(75,3,legend=c("Rolling mean","Rolling sd"), lty=1:2) args(EWMA) ewma95.mean = EWMA(e,lambda=0.95) ewma75.mean = EWMA(e,lambda=0.75) ewma50.mean = EWMA(e,lambda=0.5) par(mfrow=c(1,1)) tsplot(ewma95.mean,ewma75.mean,ewma50.mean) legend(60,4,legend=c("lamda=0.95","lamda=0.75","lamda=0.50"), lty=1:3) # EWMA estimation of SD and rho using high frequency data smpl = (positions(DowJones30) >= timeDate("1/1/1996")) msft.ret.d = getReturns(DowJones30[smpl,"MSFT"]) ibm.ret.d = getReturns(DowJones30[smpl,"IBM"]) msft.ewma95.sd = sqrt(EWMA(msft.ret.d^2,lambda=0.95)) ibm.ewma95.sd = sqrt(EWMA(ibm.ret.d^2,lambda=0.95)) cov.ewma95 = EWMA(msft.ret.d*ibm.ret.d,lambda=0.95) cor.ewma95 = cov.ewma95/(msft.ewma95.sd*ibm.ewma95.sd) par(mfrow=c(2,1)) plot(msft.ewma95.sd,ibm.ewma95.sd,main="Rolling EWMA SD values", plot.args=list(lty=c(1,3))) legend(0,0.055,legend=c("Microsoft","IBM"),lty=c(1,3)) plot(cor.ewma95,main="Rolling EWMA correlation values") par(mfrow=c(1,1)) # # Analysis of inhomogeneous data # colIds(highFreq3M.df) # create timeDate sequence td = timeDate(julian=(highFreq3M.df$trade.day-1), ms=highFreq3M.df$trade.time*1000, in.origin=c(month=12,day=1,year=1999),zone="GMT") hf3M.ts = timeSeries(pos=td,data=highFreq3M.df) hf3M.ts[1:20,] smpl = positions(hf3M.ts) < timeDate("12/4/1999") hf3M.ts = hf3M.ts[smpl,] # use align to create homogeneous data # first use aggregateSeries to get positions tmp = aggregateSeries(hf3M.ts,by="minutes",k.by=5,FUN=mean) positions(tmp) # previous tick interpolation hf3M.5min = align(hf3M.ts[,"trade.price"], pos=positions(tmp),how="before") hf3M.5min[1:5,] hf3M.5min = align(hf3M.ts[,"trade.price"], pos=positions(tmp),how="interp") hf3M.5min[1:5,] hf3M.5min.ewma = EWMA(hf3M.5min,lambda=0.9, na.rm=T) plot(hf3M.5min.ewma) # # inhomogeneous time series operators # iMA(1:100, 4, iter=10) # rolling unitroot test using aggregateSeries - works much better # use Shiller's annual dp.ratio data # get annual dividend/price ratio from shiller data dp.ratio = shiller.annual[,"dp.ratio"] plot(dp.ratio,main="S&P 500 Annual D/P",ylab="D/P") adf.test = function(x, trend = "c", lags = 3) { tmp = unitroot(x,trend=trend,lags=lags) ans = tmp$sval } roll.adf = aggregateSeries(dp.ratio,moving=50,adj=1,FUN=adf.test) smpl = positions(dp.ratio)>=start(roll.adf) par(mfrow=c(2,1)) plot(dp.ratio[smpl,],main="S&P 500 D/P",reference.grid=F) plot(roll.adf,main="Rolling ADF statistics",reference.grid=F) par(mfrow=c(1,1)) # compute rolling ADF t and n tests adf.tests = function(x, trend = "c", lags = 3) { tmp1 = unitroot(x,trend=trend,lags=lags,statistic="t") tmp2 = unitroot(x,trend=trend,lags=lags,statistic="n") ans = concat(tmp1$sval,tmp2$sval) } roll.adf = aggregateSeries(dp.ratio,moving=50,adj=1, FUN=adf.tests,colnames=c("t.test","norm.bias")) smpl = positions(dp.ratio)>=start(roll.adf) cvt.05 = qunitroot(0.05,trend="c",n.sample=50) cvn.05 = qunitroot(0.05,trend="c",statistic="n",n.sample=50) par(mfrow=c(2,1)) plot(roll.adf[,"t.test"],main="Rolling ADF t-statistics", reference.grid=F) abline(h=cvt.05) plot(roll.adf[,"norm.bias"],main="Rolling ADF normalized bias", reference.grid=F) abline(h=cvn.05) par(mfrow=c(1,1)) # # Technical indicators # # typical price smpl = positions(djia) >= timeDate("1/1/1990") dj = djia[smpl,] tp.dj = TA.typicalPrice(dj[,"high"], dj[,"low"],dj[,"close"]) class(tp.dj) plot.out = plot(dj[,1:4],plot.type="hloc") lines.render(positions(tp.dj),seriesData(tp.dj), x.scale=plot.out$scale) # MACD msft.macd = TA.macd(msft.dat[,"Close"]) colIds(msft.macd) = c("MACD","Signal") plot(msft.macd,plot.args=list(lty=c(1:3))) legend(0.5,-3,legend=colIds(msft.macd), lty=c(1,3)) # Chaikin's volatility msft.cv = TA.chaikinv(msft.dat[,"High"], msft.dat[,"Low"],n.range=10,n.change=10) plot(msft.cv) # accumulation/distribution indicator msft.adi = TA.adi(msft.dat[,"High"],msft.dat[,"Low"], msft.dat[,"Close"],msft.dat[,"Volume"]) # # rolling regression # # rolling estimation of CAPM for Microsoft colIds(excessReturns.ts) start(excessReturns.ts) end(excessReturns.ts) tmp.df = as.data.frame(seriesData(excessReturns.ts)) tmp.ts = timeSeries(pos=positions(excessReturns.ts), data=tmp.df) class(seriesData(tmp.ts)) ols.fit = OLS(MSFT~SP500,data=excessReturns.ts) summary(ols.fit) args(rollOLS) roll.fit = rollOLS(MSFT~SP500,data=excessReturns.ts, width=24,incr=1) names(roll.fit) roll.fit$nwin roll.fit summary(roll.fit) # save plots 3 and 4 plot(roll.fit, which=c(2,3)) # 1-step ahead predictions roll.pred = predict(roll.fit,n.step=1) # roll.pred is an object of class "listof" whose elements # are the predictions. If the data passed to rollOLS is a # timeSeries then the components of roll.pred will be timeSeries class(roll.pred) names(roll.pred) roll.pred[[1]] # create prediction errors. Note NA values are created at the # beginning of the series ehat.1step = excessReturns.ts[,"MSFT"]-roll.pred[[1]] # plot predictions, actuals and errors par(mfrow=c(2,1)) plot(excessReturns.ts[,"MSFT"],roll.pred[[1]], main="Returns on MSFT and 1-step forecasts", plot.args=list(lty=c(1,3))) legend(0,-0.2,legend=c("Actual","1-step forecast"), lty=c(1,3)) plot(ehat.1step,main="1-step forecast error") par(mfrow=c(1,1)) # # compute rmse, mae # ehat.1step = ehat.1step[!is.na(ehat.1step),] me.1step = mean(ehat.1step) mse.1step = as.numeric(var(ehat.1step)) rmse.1step = sqrt(mse.1step) mae.1step = mean(abs(ehat.1step)) mape.1step = mean(abs(ehat.1step/excessReturns.ts[,"MSFT"]),na.rm=T) # 1 and 2 step ahead predictions roll.pred.12 = predict(roll.fit,n.steps=1:2) names(roll.pred.12) roll.pred.12[[1]] roll.pred.12[[2]] # use lapply to compute prediction errors make.ehat = function(x,y){ ans = y - x ans[!is.na(ans),] } ehat.list = lapply(roll.pred.12,make.ehat, excessReturns.ts[,"MSFT"]) names(ehat.list) # use lapply to compue forecast error evaluation statistics make.errorStats = function(x){ me = mean(x) mse = as.numeric(var(x)) rmse = sqrt(mse) mae = mean(abs(x)) ans = list(ME=me,MSE=mse,RMSE=rmse,MAE=mae) ans } errorStat.list = lapply(ehat.list,make.errorStats) unlist(errorStat.list) sapply(ehat.list,make.errorStats) # rolling estimation using tmp.ts roll.fit2 = rollOLS(MSFT~SP500,data=tmp.ts, width=24,incr=1) # use rollOLS to look at non-overlapping subsample estimates roll.fit2 = rollOLS(MSFT~SP500,data=excessReturns.ts, width=65,incr=65) summary(roll.fit2) # # illustrate backtesting by estimating two models by rollOLS and # then evaluate the models by comparing the 1 and 2 step ahead # prediction errors # # predict stock returns using lagged dividend/price ratio # and lagged earnings/price ratio # colIds(shiller.annual) # compute log of real data ln.p = log(shiller.annual[,"real.price"]) colIds(ln.p) = "ln.p" ln.dpratio = log(dp.ratio) colIds(ln.dpratio) = "ln.dpratio" ln.epratio = -log(shiller.annual[,"pe.10"]) ln.epratio = ln.epratio[!is.na(ln.epratio),] colIds(ln.epratio) = "ln.epratio" # compute cc real total returns - see CLM pg. 261 ln.r = diff(ln.p) + log(1+exp(ln.dpratio[-1,])) colIds(ln.r) = "ln.r" stock.ts = seriesMerge(ln.p,ln.d,ln.dpratio, ln.epratio,ln.r,pos=positions(ln.epratio)) start(stock.ts) end(stock.ts) # full sample regressions ols.fit1 = OLS(ln.r~tslag(ln.dpratio),data=stock.ts) # rolling regressions using 50 year windows and 1 year increment roll.dp.fit = rollOLS(ln.r~tslag(ln.dpratio),data=stock.ts, width=50,incr=1) roll.ep.fit = rollOLS(ln.r~tslag(ln.epratio),data=stock.ts, width=50,incr=1) # compute 1 to 5 year ahead prediction errors roll.dp.pred = predict(roll.dp.fit,n.steps=1:5) roll.ep.pred = predict(roll.ep.fit,n.steps=1:5) # compute prediction errors using lapply ehat.dp.list = lapply(roll.dp.pred,make.ehat, stock.ts[,"ln.r"]) ehat.ep.list = lapply(roll.ep.pred,make.ehat, stock.ts[,"ln.r"]) # compute error summaries using lapply errorStats.dp.list = lapply(ehat.dp.list,make.errorStats) errorStats.ep.list = lapply(ehat.ep.list,make.errorStats) tmp = cbind(unlist(errorStats.dp.list),unlist(errorStats.ep.list)) colIds(tmp) = c("D/P","E/P") tmp # compute Diebold Mariano statistics using for loop nfcst = nrow(ehat.dp.list[[1]]) d.mse = matrix(0,nfcst,5) d.mae = matrix(0,nfcst,5) DM.mse = rep(0,5) DM.mae = rep(0,5) for (i in 1:5) { d.mse[,i] = ehat.dp.list[[i]]^2 - ehat.ep.list[[i]]^2 DM.mse[i] = mean(d.mse[,i])/sqrt(asymp.var(d.mse[,i],bandwidth=i-1,window="rectangular")) d.mae[,i] = abs(ehat.dp.list[[i]]) - abs(ehat.ep.list[[i]]) DM.mae[i] = mean(d.mae[,i])/sqrt(asymp.var(d.mae[,i],bandwidth=i-1,window="rectangular")) } names(DM.mse) = names(ehat.dp.list) names(DM.mae) = names(ehat.dp.list) cbind(DM.mse,DM.mae) # # illustrate the use of roll # # # use generic roll function # # # illustrate rolling regression using roll # # the data argument must be a data frame or a # timeSeries with a data frame in the data slot for roll to # work correctly # roll.fit = roll(FUN=OLS,data=excessReturns.ts, width=24,incr=1,formula=MSFT~SP500) class(roll.fit) names(roll.fit) class(roll.fit$coef) nrow(roll.fit$coef) class(roll.fit$residuals) nrow(roll.fit$residuals) ncol(roll.fit$residuals) roll.fit$coef[1:2,] roll.fit$residuals[1:2,] roll.fit = roll(FUN=OLS,data=excessReturns.ts, width=24,incr=1,formula=MSFT~SP500, save.list=c("coef","residuals"),trace=F)