# rollAnalysis.ssc			scripts for Rolling Analysis chapter
# author: Eric Zivot
# created: February 8, 2002
# updated: May 20, 2002
#
###################################################################

module(finmetrics)

#
# compute rolling averages, variances and standard deviations
#

msft.ret = getReturns(singleIndex.dat[,"MSFT"])
sp500.ret = getReturns(singleIndex.dat[,"SP500"])
start(msft.ret)
end(msft.ret)
nrow(msft.ret)

#
# use aggregateSeries to compute rolling means and standard
# deviations
#
roll.mean = aggregateSeries(msft.ret,moving=24,adj=1,FUN=mean)
roll.sd = aggregateSeries(msft.ret,moving=24,adj=1,FUN=stdev)
class(roll.mean)
nrow(roll.mean)
roll.mean[1:5]

plot(msft.ret,roll.mean,roll.sd,plot.args=list(lty=c(1,3,4)))
legend(0,-0.2,legend=c("Returns","Rolling mean","Rolling sd"),
lty=c(1,3,4))

# compute two-sided moving averages

roll.mean.5 = aggregateSeries(msft.ret,moving=24,adj=0.5,FUN=mean)
roll.mean.5[1:5]

# use user-written function to compute rolling means and sd
# values with one call to aggregateSeries

mean.sd = function (x) {
	tmp1 = mean(x)
	tmp2 = stdev(x)
	ans = concat(tmp1,tmp2)
	ans
}
roll.mean.sd = aggregateSeries(msft.ret,moving=24,adj=1,
FUN=mean.sd,colnames=c("mean","sd"))
roll.mean.sd[1:5,]

# compute asymptotic standard error bands
lower.mean = roll.mean.sd[,"mean"]-2*roll.mean.sd[,"sd"]/sqrt(24)
upper.mean = roll.mean.sd[,"mean"]+2*roll.mean.sd[,"sd"]/sqrt(24)
lower.sd = roll.mean.sd[,"sd"]-2*roll.mean.sd[,"sd"]/sqrt(2*24)
upper.sd = roll.mean.sd[,"sd"]+2*roll.mean.sd[,"sd"]/sqrt(2*24)
par(mfrow=c(2,1))
plot(roll.mean.sd[,"mean"],lower.mean,upper.mean,
main="24 month rolling means",plot.args=list(lty=c(1,2,2)))
plot(roll.mean.sd[,"sd"],lower.sd,upper.sd,
main="24 month rolling stdandard deviations",
plot.args=list(lty=c(1,2,2)))
par(mfrow=c(1,1))

# 
# illustrate computation of moving averages using an increment
# not equal to 1 by specifying by and k.by. Evidently can't do this with
# moving option
#

roll.mean.2 = aggregateSeries(msft.ret,adj=1,
by="years",k.by=1,FUN=mean)
roll.mean.2[1:5]

# 
# use FinMetrics functions SMA and rollVar to compute rolling
# means and standard deviations and rollMax and rollMin to compute
# rolling mins and maxs
#
roll2.mean = SMA(msft.ret,n=24)
roll2.sd = sqrt(rollVar(msft.ret,n=24))
roll.max = rollMax(msft.ret,n=24)
roll.min = rollMin(msft.ret,n=24)

plot(msft.ret,roll.mean,roll.sd,plot.args=list(lty=c(1,1,1)))
legend(0,-0.2,legend=c("Returns","Rolling mean","Rolling sd"),
lty=1:3)

# compare computation time

dos.time(SMA(msft.ret,n=24))
dos.time(aggregateSeries(msft.ret,moving=24,adj=1,FUN=mean))
dos.time(sqrt(rollVar(msft.ret,n=24)))
dos.time(aggregateSeries(msft.ret,moving=24,adj=1,FUN=stdev))

# compute high frequency rolling standard deviations
msft.ret2.d = getReturns(DowJones30[,"MSFT"],type="continuous")^2
roll.sd.25 = sqrt(SMA(msft.ret2.d,n=25))
roll.sd.50 = sqrt(SMA(msft.ret2.d,n=50))
roll.sd.100 = sqrt(SMA(msft.ret2.d,n=100))
plot(roll.sd.25,roll.sd.50,roll.sd.100,
plot.args=(list(lty=c(1,3,4))))
legend(0,0.055,legend=c("n=25","n=50","n=100"),
lty=c(1,3,4))

#
# rolling correlations
#
ret.ts = getReturns(singleIndex.dat,type="continuous")
colIds(ret.ts)
cor.coef = function(x) cor(x)[1,2]

roll.cor = aggregateSeries(ret.ts,moving=24,together=T,
adj=1,FUN=cor.coef)
# do trellis graphics
trellisPlot(roll.cor,ret.ts,
scales=list(y=list(relation="free")))

# compute approximate std errors
lower.rho = roll.cor-2*sqrt((1-roll.cor^2)/sqrt(24))
upper.rho = roll.cor+2*sqrt((1-roll.cor^2)/sqrt(24))
# plot rolling estimates with data and SEs
smpl = positions(ret.ts)>=start(roll.cor)
par(mfrow=c(2,1))
plot(ret.ts[smpl,],main="Returns on Microsoft and S&P 500 index",
plot.args=list(lty=c(1,3)))
legend(0,-0.2,legend=c("Microsoft","S&P 500"),
lty=c(1,3))
plot(roll.cor,main="24-month rolling correlations")
par(mfrow=c(1,1))

#
# EWMA estimates
#

# outlier example data
set.seed(667)
e = rnorm(100)
e[20] = 10
roll.mean = SMA(e,n=10,trim=F)
roll.sd = sqrt(rollVar(e,n=10,trim=F))
par(mfrow=c(2,1))
tsplot(e)
tsplot(roll.mean,roll.sd)
legend(75,3,legend=c("Rolling mean","Rolling sd"),
lty=1:2)

args(EWMA)

ewma95.mean = EWMA(e,lambda=0.95)
ewma75.mean = EWMA(e,lambda=0.75)
ewma50.mean = EWMA(e,lambda=0.5)
par(mfrow=c(1,1))
tsplot(ewma95.mean,ewma75.mean,ewma50.mean)
legend(60,4,legend=c("lamda=0.95","lamda=0.75","lamda=0.50"),
lty=1:3)

# EWMA estimation of SD and rho using high frequency data

smpl = (positions(DowJones30) >= timeDate("1/1/1996"))
msft.ret.d = getReturns(DowJones30[smpl,"MSFT"])
ibm.ret.d = getReturns(DowJones30[smpl,"IBM"])
msft.ewma95.sd = sqrt(EWMA(msft.ret.d^2,lambda=0.95))
ibm.ewma95.sd = sqrt(EWMA(ibm.ret.d^2,lambda=0.95))
cov.ewma95 = EWMA(msft.ret.d*ibm.ret.d,lambda=0.95)
cor.ewma95 = cov.ewma95/(msft.ewma95.sd*ibm.ewma95.sd)
par(mfrow=c(2,1))
plot(msft.ewma95.sd,ibm.ewma95.sd,main="Rolling EWMA SD values",
plot.args=list(lty=c(1,3)))
legend(0,0.055,legend=c("Microsoft","IBM"),lty=c(1,3))
plot(cor.ewma95,main="Rolling EWMA correlation values")
par(mfrow=c(1,1))

#
# Analysis of inhomogeneous data
#
colIds(highFreq3M.df)

# create timeDate sequence
td = timeDate(julian=(highFreq3M.df$trade.day-1),
ms=highFreq3M.df$trade.time*1000,
in.origin=c(month=12,day=1,year=1999),zone="GMT")
hf3M.ts = timeSeries(pos=td,data=highFreq3M.df)
hf3M.ts[1:20,]

smpl = positions(hf3M.ts) < timeDate("12/4/1999")
hf3M.ts = hf3M.ts[smpl,]

# use align to create homogeneous data
# first use aggregateSeries to get positions
tmp = aggregateSeries(hf3M.ts,by="minutes",k.by=5,FUN=mean)
positions(tmp)

# previous tick interpolation
hf3M.5min = align(hf3M.ts[,"trade.price"],
pos=positions(tmp),how="before")
hf3M.5min[1:5,]

hf3M.5min = align(hf3M.ts[,"trade.price"],
pos=positions(tmp),how="interp")
hf3M.5min[1:5,]

hf3M.5min.ewma = EWMA(hf3M.5min,lambda=0.9, na.rm=T)
plot(hf3M.5min.ewma)

#
# inhomogeneous time series operators
#
iMA(1:100, 4, iter=10)


# rolling unitroot test using aggregateSeries - works much better
# use Shiller's annual dp.ratio data
# get annual dividend/price ratio from shiller data

dp.ratio = shiller.annual[,"dp.ratio"]
plot(dp.ratio,main="S&P 500 Annual D/P",ylab="D/P")

adf.test = function(x, trend = "c", lags = 3) 
{
	tmp = unitroot(x,trend=trend,lags=lags)
	ans = tmp$sval
}

roll.adf = aggregateSeries(dp.ratio,moving=50,adj=1,FUN=adf.test)
smpl = positions(dp.ratio)>=start(roll.adf)
par(mfrow=c(2,1))
plot(dp.ratio[smpl,],main="S&P 500 D/P",reference.grid=F)
plot(roll.adf,main="Rolling ADF statistics",reference.grid=F)
par(mfrow=c(1,1))

# compute rolling ADF t and n tests
adf.tests = function(x, trend = "c", lags = 3) 
{
	tmp1 = unitroot(x,trend=trend,lags=lags,statistic="t")
	tmp2 = unitroot(x,trend=trend,lags=lags,statistic="n")
	ans = concat(tmp1$sval,tmp2$sval)
}
roll.adf = aggregateSeries(dp.ratio,moving=50,adj=1,
FUN=adf.tests,colnames=c("t.test","norm.bias"))

smpl = positions(dp.ratio)>=start(roll.adf)
cvt.05 = qunitroot(0.05,trend="c",n.sample=50)
cvn.05 = qunitroot(0.05,trend="c",statistic="n",n.sample=50)
par(mfrow=c(2,1))
plot(roll.adf[,"t.test"],main="Rolling ADF t-statistics",
reference.grid=F)
abline(h=cvt.05)
plot(roll.adf[,"norm.bias"],main="Rolling ADF normalized bias",
reference.grid=F)
abline(h=cvn.05)
par(mfrow=c(1,1))

#
# Technical indicators
#

# typical price

smpl = positions(djia) >= timeDate("1/1/1990")
dj = djia[smpl,]
tp.dj = TA.typicalPrice(dj[,"high"],
dj[,"low"],dj[,"close"])
class(tp.dj)
plot.out = plot(dj[,1:4],plot.type="hloc")
lines.render(positions(tp.dj),seriesData(tp.dj),
x.scale=plot.out$scale)

# MACD

msft.macd = TA.macd(msft.dat[,"Close"])
colIds(msft.macd) = c("MACD","Signal")
plot(msft.macd,plot.args=list(lty=c(1:3)))
legend(0.5,-3,legend=colIds(msft.macd),
lty=c(1,3))

# Chaikin's volatility

msft.cv = TA.chaikinv(msft.dat[,"High"],
msft.dat[,"Low"],n.range=10,n.change=10)
plot(msft.cv)

# accumulation/distribution indicator

msft.adi = TA.adi(msft.dat[,"High"],msft.dat[,"Low"],
msft.dat[,"Close"],msft.dat[,"Volume"])

#
# rolling regression
#

# rolling estimation of CAPM for Microsoft

colIds(excessReturns.ts)
start(excessReturns.ts)
end(excessReturns.ts)

tmp.df = as.data.frame(seriesData(excessReturns.ts))
tmp.ts = timeSeries(pos=positions(excessReturns.ts),
data=tmp.df)
class(seriesData(tmp.ts))

ols.fit = OLS(MSFT~SP500,data=excessReturns.ts)
summary(ols.fit)

args(rollOLS)
roll.fit = rollOLS(MSFT~SP500,data=excessReturns.ts,
width=24,incr=1)
names(roll.fit)
roll.fit$nwin
roll.fit
summary(roll.fit)
# save plots 3 and 4
plot(roll.fit, which=c(2,3))

# 1-step ahead predictions
roll.pred = predict(roll.fit,n.step=1)

# roll.pred is an object of class "listof" whose elements
# are the predictions. If the data passed to rollOLS is a 
# timeSeries then the components of roll.pred will be timeSeries
class(roll.pred)
names(roll.pred)
roll.pred[[1]]

# create prediction errors. Note NA values are created at the
# beginning of the series
ehat.1step = excessReturns.ts[,"MSFT"]-roll.pred[[1]]

# plot predictions, actuals and errors
par(mfrow=c(2,1))
plot(excessReturns.ts[,"MSFT"],roll.pred[[1]],
main="Returns on MSFT and 1-step forecasts",
plot.args=list(lty=c(1,3)))
legend(0,-0.2,legend=c("Actual","1-step forecast"),
lty=c(1,3))
plot(ehat.1step,main="1-step forecast error")
par(mfrow=c(1,1))

#
# compute rmse, mae
#
ehat.1step = ehat.1step[!is.na(ehat.1step),]
me.1step = mean(ehat.1step)
mse.1step = as.numeric(var(ehat.1step))
rmse.1step = sqrt(mse.1step)
mae.1step = mean(abs(ehat.1step))
mape.1step = mean(abs(ehat.1step/excessReturns.ts[,"MSFT"]),na.rm=T)

# 1 and 2 step ahead predictions
roll.pred.12 = predict(roll.fit,n.steps=1:2)
names(roll.pred.12)
roll.pred.12[[1]]
roll.pred.12[[2]]
# use lapply to compute prediction errors
make.ehat = function(x,y){
	ans = y - x
	ans[!is.na(ans),]
	}
ehat.list = lapply(roll.pred.12,make.ehat,
excessReturns.ts[,"MSFT"])
names(ehat.list)
# use lapply to compue forecast error evaluation statistics
make.errorStats = function(x){
	me = mean(x)
	mse = as.numeric(var(x))
	rmse = sqrt(mse)
	mae = mean(abs(x))
	ans = list(ME=me,MSE=mse,RMSE=rmse,MAE=mae)
	ans
}
errorStat.list = lapply(ehat.list,make.errorStats)
unlist(errorStat.list)

sapply(ehat.list,make.errorStats)


# rolling estimation using tmp.ts

roll.fit2 = rollOLS(MSFT~SP500,data=tmp.ts,
width=24,incr=1)

# use rollOLS to look at non-overlapping subsample estimates
roll.fit2 = rollOLS(MSFT~SP500,data=excessReturns.ts,
width=65,incr=65)
summary(roll.fit2)

#
# illustrate backtesting by estimating two models by rollOLS and
# then evaluate the models by comparing the 1 and 2 step ahead
# prediction errors
#
# predict stock returns using lagged dividend/price ratio
# and lagged earnings/price ratio
#
colIds(shiller.annual)

# compute log of real data
ln.p = log(shiller.annual[,"real.price"])
colIds(ln.p) = "ln.p"
ln.dpratio = log(dp.ratio)
colIds(ln.dpratio) = "ln.dpratio"
ln.epratio = -log(shiller.annual[,"pe.10"])
ln.epratio = ln.epratio[!is.na(ln.epratio),]
colIds(ln.epratio) = "ln.epratio"

# compute cc real total returns - see CLM pg. 261
ln.r = diff(ln.p) + log(1+exp(ln.dpratio[-1,]))
colIds(ln.r) = "ln.r"
stock.ts = seriesMerge(ln.p,ln.d,ln.dpratio,
ln.epratio,ln.r,pos=positions(ln.epratio))
start(stock.ts)
end(stock.ts)

# full sample regressions
ols.fit1 = OLS(ln.r~tslag(ln.dpratio),data=stock.ts)

# rolling regressions using 50 year windows and 1 year increment
roll.dp.fit = rollOLS(ln.r~tslag(ln.dpratio),data=stock.ts,
width=50,incr=1)
roll.ep.fit = rollOLS(ln.r~tslag(ln.epratio),data=stock.ts,
width=50,incr=1)

# compute 1 to 5 year ahead prediction errors
roll.dp.pred = predict(roll.dp.fit,n.steps=1:5)
roll.ep.pred = predict(roll.ep.fit,n.steps=1:5)

# compute prediction errors using lapply
ehat.dp.list = lapply(roll.dp.pred,make.ehat,
stock.ts[,"ln.r"])
ehat.ep.list = lapply(roll.ep.pred,make.ehat,
stock.ts[,"ln.r"])

# compute error summaries using lapply
errorStats.dp.list = lapply(ehat.dp.list,make.errorStats)
errorStats.ep.list = lapply(ehat.ep.list,make.errorStats)
tmp = cbind(unlist(errorStats.dp.list),unlist(errorStats.ep.list))
colIds(tmp) = c("D/P","E/P")
tmp

# compute Diebold Mariano statistics using for loop
nfcst = nrow(ehat.dp.list[[1]])
d.mse = matrix(0,nfcst,5)
d.mae = matrix(0,nfcst,5)
DM.mse = rep(0,5)
DM.mae = rep(0,5)
for (i in 1:5) {
	d.mse[,i] = ehat.dp.list[[i]]^2 - ehat.ep.list[[i]]^2
	DM.mse[i] = mean(d.mse[,i])/sqrt(asymp.var(d.mse[,i],bandwidth=i-1,window="rectangular"))
	d.mae[,i] = abs(ehat.dp.list[[i]]) - abs(ehat.ep.list[[i]])
	DM.mae[i] = mean(d.mae[,i])/sqrt(asymp.var(d.mae[,i],bandwidth=i-1,window="rectangular"))
}
names(DM.mse) = names(ehat.dp.list)
names(DM.mae) = names(ehat.dp.list)
cbind(DM.mse,DM.mae)

#
# illustrate the use of roll
#

# 
# use generic roll function
#

#
# illustrate rolling regression using roll
#
# the data argument must be a data frame or a 
# timeSeries with a data frame in the data slot for roll to 
# work correctly
#

roll.fit = roll(FUN=OLS,data=excessReturns.ts,
width=24,incr=1,formula=MSFT~SP500)
class(roll.fit)
names(roll.fit)
class(roll.fit$coef)
nrow(roll.fit$coef)
class(roll.fit$residuals)
nrow(roll.fit$residuals)
ncol(roll.fit$residuals)
roll.fit$coef[1:2,]
roll.fit$residuals[1:2,]

roll.fit = roll(FUN=OLS,data=excessReturns.ts,
width=24,incr=1,formula=MSFT~SP500,
save.list=c("coef","residuals"),trace=F)