######################################################################## #A few examples of fitting Hidden Markov Models ######################################################################## #Here there are two data sets I am using, sp500.r is #a vector containg roughly 20 years of daily sp500 returns, 1980-2002 #sp500.log is a vector of zeros and ones (log is for logistic). A zero indicates a #negative return, and one indicates a positive return ################################################################################### #Example 1 ####################### # fitting a Hidden Markov Model to the SP500.log data using initial transition matrix Pi # as the underlying conditional distribution for the states, we use # a binomial distribution # Note that all the parameters must be initialized prior to calibration of the model, # this is because the EM (given by function "BaumWelch" only takes obejcts of class "dthmm", # In this case, I choose random initial parameters of .5 for all entries in th matrix # calibration will give us a more realistic idea of where we should start the parameters # Also, someone should consider bootstraping the model with random initializations to optimize # and to get p-values Pi <- matrix(c(0.5, 0.5, 0.5, 0.5), byrow=TRUE, nrow=2) delta <- c(0, 1) #pm is a list object containing the (Markov dependent) parameter values associated with the distribution #of the observed process (see help file) #pn is a list object containing the observation dependent parameter values associated with the distribution #of the observed process (see help file) #I choose a default of the following distribution rbinom(n=1000, prob=.5, size=1) #The optimization will indicate the different #parameters for both states #pn is a vector the same length as the series indicating the parameters of the distribution over time pn <- list(size=rep(1,length(sp500.log))) x <- dthmm(sp500.log, Pi, delta, "binom", list(prob=c(0.5, 0.5)), pn, discrete=TRUE) # After specifying the initial parameter values for the model, they will then be optimized using the # BaumWelch function which deploys the EM algorithm sp500.EM_ALG = BaumWelch(x) print(summary(sp500.EM_ALG)) $Pi [,1] [,2] [1,] 0.5133433 0.4866567 [2,] 0.4904323 0.5095677 $distn [1] "binom" $pm $pm$prob [1] 0.5704765 0.4769290 $discrete [1] TRUE $n [1] 5801 # In this case, the states refer to positive vs. negative returns. #As such, the estimated values for Pi are not unreasonable # $pm$prob indicates the two parameters for the underlying binomial # distribution for the two states (i.e. p=0.5704765 in state 1, # and p=0.4769290 ######################################################################## #Example 2 ####################### # In the following example, we use the returns of the sp500 model # as the underlying conditional distribution for the states, we use # a normal distribution # Note that all the parameters must be initialized prior to calibration of the model, # this is because the EM (given by function "BaumWelch" only takes obejcts of class "dthmm", # In this case, I choose random initial parameters of .5 for all entries in th matrix # calibration will give us a more realistic idea of where we should start the parameters # Also, someone should consider bootstraping the model with random initializations to optimize # and to get p-values #Pi: m times m transition probability matrix of the homogeneous hidden Markov chain. Pi <- matrix(c(.5,.5,.5,.5),byrow=TRUE, nrow=2) n= length(sp500.log) #pm is a list object containing the (Markov dependent) parameter values associated with the distribution #of the observed process (see help file) #pn is a list object containing the observation dependent parameter values associated with the distribution #of the observed process (see help file) #I choose a default of mean=1, sd=1 #for the default distribution, I am using N(0,1) for both states. The optimization will indicate the different #parameters for both states pn <- list(mean=rep(mean(sp500.r), n)) sd1 = 1 sd2= 1 pm <- list(sd=c(sd1,sd2)) x <- dthmm(sp500.r, Pi, c(0,1), "norm", pm=pm, pn=pn, discrete=TRUE) # After specifying the initial parameter values for the model, they will then be optimized using the # BaumWelch function which deploys the EM algorithm sp500.EM_ALG = BaumWelch(x) print(summary(sp500.EM_ALG)) $Pi [,1] [,2] [1,] 0.95667019 0.04332981 [2,] 0.01173231 0.98826769 $nonstat [1] TRUE $distn [1] "norm" $pm $pm$sd [1] 0.018023357 0.007514857 # i.e. there is a high, and a low volatility state. # Notice that the states are HIGHLY persistent, which corresponds well # empirical stylised facts. # also, the daily standard deviations are 1.8% for the high volatility state, # and 0.7% for the low volatility state # this is also consistent with stylised empirical facts ##################################################################################### ######################################################################## #Example 3 ####################### # In the following example, we use the returns of the sp500 model # as the underlying conditional distribution for the states, we use # a normal distribution. In this case, both the mean and sd take on # different parameter values for each state. #Pi: m times m transition probability matrix of the homogeneous hidden Markov chain. Pi <- matrix(c(.5,.5,.5,.5),byrow=TRUE, nrow=2) #Initialize the parameters for the two states using the mean and sd of the entire series. #Not an unreasonable first guess. sd1 = sd(sp500.r) sd2= sd(sp500.r) mean1 = mean(sp500.r) mean2 = mean(sp500.r) pm <- list(sd=c(sd1,sd2),mean = c(mean1,mean2)) x <- dthmm(sp500.r, Pi, c(0,1), "norm", pm=pm, discrete=TRUE) # After specifying the initial parameter values for the model, they will then be optimized using the # BaumWelch function which deploys the EM algorithm sp500.EM_ALG = BaumWelch(x) print(summary(sp500.EM_ALG)) $Pi [,1] [,2] [1,] 0.95732552 0.04267448 [2,] 0.01241173 0.98758827 $distn [1] "norm" $pm $pm$mean [1] -0.0005950057 0.0006434609 $pm$sd [1] 0.017846200 0.007469496 # i.e. there is a high, and a low volatility state. And each state has a different mean. # As expected, the high volatility state has a lower (negative) mean, whearas the low # volatility state has a positive mean. # Notice that the states are HIGHLY persistent, which corresponds well # empirical stylised facts. # also, the daily standard deviations are 1.8% for the high volatility state, # and 0.75% for the low volatility state # the mean for the high volatility state is -0.0595% (half a percent), # and 0.064% for the low volatility state (also about half a percent. #These results are reasonably consistent with sp500 stylized facts. #####################################################################################