# # Kalman Filtering in R # # See web site for Shumway & Stoffer for documents load("tsa3.rda") # --- Notation for model is as in textbook # # State: xt = F xt-1 + wt, Var(w) = Q # Obs : yt = H xt + vt, Var(v) = R # # Level 0 (no exogenous inputs) # # filtering: Kfilter0(n,y,H,mu.0,sigma.0,F, sqrtQ, sqrtR) # smoothing: Ksmooth0( " ) #################################################### # # # Apply to an AR(2) process, Akaike form # # # #################################################### n <- 200 phi <-c(1.2,-0.8) p <- length(phi) sigma.w <- 150; psi <- c(1,ARMAtoMA(ar=phi, lag.max=p-1)) yt <- arima.sim(list(ar=phi), n, sd=sigma.w) acf2(yt) # --- use Akaike's predictor form with phi on the bottom, in reverse order F <- matrix(c(0,1,phi[p:1]),nrow=2,ncol=2,byrow=TRUE); F G <- psi[0:1 + 1]; G # zero-based origin would be handy for this H <- matrix(c(1,0), nrow=1, ncol=2); H # not minimal dimension # --- upper triangular factors cQ <- sigma.w * matrix(c(psi,0,0),nrow=2,ncol=2,byrow=TRUE) # rank 1 cR <- 0 # --- Initialize: need 2x2 covariance matrix, so head for Y-W # Mathematica version # Solve[
# {g0 == p1 g1 + p2 g2 + s2,
# g1 == p1 g0 + p2 g1,
# g2 == p1 g1 + p2 g0},
# {g0, g1, g2}] a <-matrix(c(1 ,-phi[1] ,-phi[2], -phi[1],1-phi[2], 0 , -phi[2], -phi[1], 1) ,nrow=3,ncol=3,byrow=TRUE) b <-c(sigma.w^2,0,0) gamma = solve(a,b); gamma # 1.125000e+05 7.500000e+04 -1.665335e-11 mu.0 <- rep(0,2) sigma.0 <- chol(matrix(c(gamma[0+1],gamma[1+1],gamma[1+1],gamma[0+1]-sigma.w^2),nrow=2,ncol=2)) sigma.0 # --- Run the filter kf <- Kfilter0(n,yt, H, mu.0,sigma.0, F, cQ, cR) names(kf) # --- Two results: one-step ahead prediction (t|t-1) and filtered (t|t) ## notes: ## mu_t|t-1=ks$xp, P_t|t-1=ks$Pp, "p" for prediction ## mu_t|t=ks$xf, P_t|t=ks$Pf, "f" for filter ## mu_t|n=ks$xs, P_t|n=ks$Ps, "s" for smoother dim(kf$xp) # 2 1 n cbind(kf$xp[,,51], kf$xp[,,52], kf$xp[,,53]) cbind(kf$xf[,,50], kf$xf[,,51], kf$xf[,,52]) # one step ahead predictions predicted <- as.vector(kf$xp[1,1,]) plot(yt, type="o", col="seagreen"); lines(predicted,col="tan"); # filtered (which match the data because the first element of state is data) filtered <- as.vector(kf$xf[1,1,]); lines(filtered,col="brown") plot(filtered-yt); # --- One step variance is constant very quickly: why? Whats a short-cut calculation? kf$Pp[1,1,] # --- What does smoothing do? Not happy since first element of state = data ks <- Ksmooth0(n,yt, H, mu.0,sigma.0, F, cQ, cR) smoothed <- as.vector(ks$xs[1,1,]); plot(yt, type="o", col="seagreen"); lines(filtered,col="brown"); lines(smoothed) #################################################### # # # Apply to an AR(2) process with additive noise # # # #################################################### n <- 200 phi <-c(1.2,-0.8) p <- length(phi) psi <- c(1,ARMAtoMA(ar=phi, lag.max=p-1)) sigma.w <- 150; sigma.n <- 100; # compare to sd of yt, not to sd of wt # x_t process is ar2 xt <- arima.sim(list(ar=phi), n, sd=sigma.w) # y_t observed adds noise yt <- xt + sigma.n*rnorm(n) # --- How does noise affect the correlations? acf2(yt); ay <- acf(yt, plot=FALSE); py <- pacf(yt, plot=FALSE) # observed acf2(xt); ax <- acf(xt, plot=FALSE); px <- pacf(xt, plot=FALSE) # hidden state par(mfrow=c(1,1)) gamma[(1:2)+1]/gamma[0 + 1]; cbind(2:10,ax$acf[2:10], ay$acf[2:10], px$acf[1:9], py$acf[1:9]) # skip 1's in acf # --- mixed models look pretty good... maxp <- maxq <- 4 aic.table <- matrix(NA, nrow=1+maxp, ncol=1+maxq) rownames(aic.table)<-paste("p",0:maxp,sep="=") # nice labels help! colnames(aic.table)<-paste("q",0:maxq,sep="=") for (p in 0:maxp) for (q in 0:maxq) { aic.table[1+p,1+q] <- arima(yt, order=c(p,0,q), include.mean=TRUE, method="ML")$aic } round(aic.table,digits=0) # --- Only change in parameters cR <- sigma.n # --- Run the smoother (happy now since avoid singularity; not sure why) ks <- Ksmooth0(n,yt, H, mu.0,sigma.0, F, cQ, cR) names(ks) # one step ahead predictions of underlying state predicted <- as.vector(ks$xp[1,1,]) plot(yt, type="p", col="seagreen"); lines(predicted,col="tan"); # filtered filtered <- as.vector(ks$xf[1,1,]); lines(filtered,col="brown") # smoothed smoothed <- as.vector(ks$xs[1,1,]); lines(filtered,col="red") plot (yt-predicted, type="p", col="gray") points(yt-filtered, type="p", col="black") points(yt-smoothed, type="p", col="pink") # function to put correlations on the lower half of scatterplot matrix panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex.cor <- 0.5/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * r) # size of text prop to corr } pairs(cbind(predicted,filtered,smoothed,xt), lower.panel=panel.cor) #################################################### # # # Getting log-like from filter # # # #################################################### # input data, m_{t|t-1}, p_{t|t-1}; returns -2 log like log.like <- function(yt, yhat, var) { dev <- yt - yhat; n <- length(yt) sum(log(var)) + sum(((yt-yhat)^2)/var) } # input the correct values from the filter (depends on how state is defined) log.like(yt,kf$xp[1,1,],kf$Pp[1,1,]) # useful subroutine phi.to.covariances <- function(phi, sigma.w) { a <-matrix(c(1 ,-phi[1] ,-phi[2], -phi[1],1-phi[2], 0 , -phi[2], -phi[1], 1),nrow=3,ncol=3,byrow=TRUE) b <-c(sigma.w^2,0,0) solve(a,b) } # -2 log like from parameters, data ar2.log.like <- function(yt,phi, sigma.w, sigma.n) { n <- length(yt) p <- length(phi) # --- arma properties psi <- c(1,ARMAtoMA(ar=phi, lag.max=p-1)) gamma <- phi.to.covariances(phi,sigma.w) # cat("Covariance terms: phi=", phi[1],phi[2], "->", gamma[0],gamma[1],sigma.w^2,"\n") # --- components of filter F <- matrix(c(0,1,phi[p:1]),nrow=2,ncol=2,byrow=TRUE); G <- psi[1:p] H <- matrix(c(1,0), nrow=1, ncol=2) cQ <- sigma.w * matrix(c(psi,0,0),nrow=2,ncol=2,byrow=TRUE) # rank 1 cR <- sigma.n # --- initialize mu.0 <- rep(0,length(phi)) sigma.0 <- chol(matrix(c(gamma[0+1],gamma[1+1],gamma[1+1],gamma[0+1]-sigma.w^2),nrow=2,ncol=2)) # --- run filter kf <- Kfilter0(n,yt, H, mu.0,sigma.0, F, cQ, cR) log.like(yt,kf$xp[1,1,],kf$Pp[1,1,]) } # draw a graph of the function toplot <- matrix(NA, nrow=14, ncol=2) x<- 0.7 for(i in 1:14) { toplot[i,1] <- x; toplot[i,2] <- ar2.log.like(yt,c(x,phi[2]),sigma.w, sigma.n) x <- x + 0.05 } plot(toplot[,1], toplot[,2],type="o") toplot <- matrix(NA, nrow=12, ncol=2) x<- -0.95 for(i in 1:12) { toplot[i,1] <- x; toplot[i,2] <- ar2.log.like(yt,c(phi[1],x),sigma.w, sigma.n) x <- x + 0.05 } plot(toplot[,1], toplot[,2],type="o") # wiggle parameters around ar2.log.like(yt,phi-.01,1.02*sigma.w, 0 ) ar2.log.like(yt,phi,sigma.w,sigma.n) max.like.ar2 <- function(yt) { # --- initialize with ls init.fit <- ar(yt, aic=FALSE, method="ols", order.max=2) phi <- as.vector(init.fit$ar) sigma.w <- sqrt(init.fit$var.pred) sigma.n <- 1 # --- call to optim binds data; variances on log scale ll <- function(parms) { ll <- ar2.log.like(yt, parms[1:2], exp(parms[3]), exp(parms[4])) cat("-2 log like = ", ll, " @ ", parms,"\n") save[save.index,] <<- c(parms,ll); save.index <<- save.index+1 ll } optim(c(phi,log(sigma.w), sigma.n), ll, method="BFGS", hessian=TRUE, control=list(trace=TRUE, maxit=20, parscale=c(0.1,0.1,1,1))) } # place to save intermediate results save <- matrix(0,nrow=200,ncol=4+1) colnames(save) <- c("phi1", "phi2", "sigma.w", "sigma.n", "-2loglike") save.index <- 1 results <- max.like.ar2(yt) plot(save[1:save.index,"phi1"], save[1:save.index,"phi2"]) plot(save[1:save.index,"phi1"], save[1:save.index,"-2loglike"]) plot(save[1:save.index,"phi2"], save[1:save.index,"-2loglike"]) #################################################### # # # ML for an AR(1) process: S&S example # # Example 6.6 # # # #################################################### # --- Generate Data set.seed(999) num = 100 N = num+1 x = arima.sim(n=N, list(ar = .8, sd=1)) y = ts(x[-1] + rnorm(num,0,1)) # plus additive noise # --- Initial Estimates for ML u = ts.intersect(y, lag(y,-1), lag(y,-2)) varu = var(u) coru = cor(u) phi = coru[1,3]/coru[1,2] q = (1-phi^2)*varu[1,2]/phi r = varu[1,1] - q/(1-phi^2) (init.par = c(phi, sqrt(q), sqrt(r))) # bundle parameters # --- Function to evaluate the likelihood Linn=function(para){ phi = para[1]; sigw = para[2]; sigv = para[3] Sigma0 = (sigw^2)/(1-phi^2); Sigma0[Sigma0<0]=0 kf = Kfilter0(num,y,1,mu0=0,Sigma0,phi,sigw,sigv) return(kf$like) } # --- Estimation (est = optim(init.par, Linn, gr=NULL, method="BFGS", hessian=TRUE, control=list(trace=1,REPORT=1))) SE = sqrt(diag(solve(est$hessian))) cbind(estimate=c(phi=est$par[1],sigw=est$par[2],sigv=est$par[3]), SE)