#  Spectral estimation

# SS code

load("/Users/bob/courses/stat910/rcode/tsa3.rda")


################################################################
# watch out for padding
#

# start with data with a fourier frequency
n  <- 101
xt <- 25 + sin(2 * pi * 10/n * (1:n)) +  .1*rnorm(n) 
plot(xt)

# check the pad and n.used component of result
s1 <- spec.pgram(xt, taper=0.0, demean=FALSE, detrend=FALSE)
names(s)
s1

# without the mean 
s2 <- spec.pgram(xt, taper=0.0, demean=TRUE, detrend=FALSE, fast=TRUE, plot=FALSE)
lines(s2$freq, s2$spec)

s2 <- spec.pgram(xt, taper=0.0, demean=TRUE, detrend=FALSE, fast=TRUE, plot=TRUE)

# without the mean and tapered to sharpen peak
s3 <- spec.pgram(xt, taper=0.1, demean=TRUE, detrend=FALSE, fast=TRUE, plot=FALSE)
lines(s3$freq, s3$spec, col="gray")

# some software will make the length a power of 2 by padding with zero or the mean
# R uses a so-called mixed radix, composite FFT if it can
yt <- c(xt,rep(0,128-length(xt))); length(yt)
plot(yt)

yt <- c(xt,rep(mean(xt),128-length(xt))); length(yt)
plot(yt)

# not at a FF anymore
s4 <- spec.pgram(yt, taper=0.0, demean=TRUE, detrend=FALSE)
lines(s2$freq,s2$spec, col="gray")


################################################################
# watch out for aliasing
#

# start with data with a fourier frequency, but too high
n  <- 128
xt <- 25 + sin(2 * pi * 120/n * (1:n)) +  .1*rnorm(n) 
plot(xt)


# check the pad and n.used component of result
s <- spec.pgram(xt, taper=0.1, demean=TRUE)
names(s)
s


################################################################
# simulated AR(4) with two peaks

n <- 1024
phi <- c(2.7607, -3.8106, 2.6535, -0.9238)
true.f <- spec.arma(phi)

ar4.data <- arima.sim(model=list(ar=phi), n=1024)


# raw periodogram shows leakage effects
pgram <- spec.pgram(ar4.data, taper=0.0, demean=FALSE, detrend=FALSE, plot=FALSE) 
lines(pgram$freq, pgram$spec)

# slight tapering improves, but still variable
pgram.tapered <- spec.pgram(ar4.data, taper=0.1, demean=FALSE, detrend=FALSE, plot=FALSE)
lines(pgram.tapered$freq, pgram.tapered$spec, col="red")

# smoothing improves estimate, but blurs peak
pgram.smth <- spec.pgram(ar4.data, taper=0.1, demean=FALSE, detrend=TRUE, plot=FALSE)
lines(pgram.tapered$freq, pgram.tapered$spec, col="red")

# alternative class of estimators fit an AR
s <- spec.ar(ar4.data, order=8, n.freq<-length(pgram$freq), plot=FALSE, method="burg")
lines(s$freq,s$spec,col="blue")


################################################################
# read data (variable star)

xt <- scan("http://www-stat.wharton.upenn.edu/~stine/stat910/data/varstar.dat")
plot(xt, type="o")

# spectral estimate

p2 <- spec.pgram(xt, demean=TRUE, taper=0.1, plot=FALSE); 

plot(p2$freq,p2$spec)
plot(p2$freq,p2$spec,log="y", type="l")

p3 <- spec.pgram(xt, demean=TRUE, taper=0.1, plot=FALSE, spans=c(5,3)); 
lines(p3$freq,p3$spec, col="red")


################################################################
#
# multitaper methods  (sapa package)
#
library(sapa)
#
#

n    <- 1024
time <- 0:(n-1)

# plot taper functions  (discrete prolate spheroid)

h.k <- taper(type="dpss", n.sample=n, n.taper=8, bandwidth=4)   # bw = number multiples of FF
dim(h.k)                                                        # each row is taper
round(h.k %*% t(h.k), digits=4)                                 # orthogonal

stackPlot(time, t(h.k[1:4,]))            # stackPlot loaded with sapa
stackPlot(time, t(h.k[5:8,]))                       


# FT of data tapers 

ft.k <- Mod(t(mvfft(t(h.k))))

freq <- 1:64
plot (time[freq],ft.k[1, freq],log="y",type="l")
lines(time[freq],ft.k[2, freq],      col="red")
lines(time[freq],ft.k[3, freq],      col="green")
lines(time[freq],ft.k[4, freq],      col="blue")


# FT of tapered data; note eventual presence of leakage

ar4.data <- arima.sim(model=list(ar=phi), n=1024)

true.f <- spec.arma(phi)
s1 <- spec.pgram(h.k[1,] * ar4.data, taper=0.0, plot=FALSE); lines(s1$freq, n * s1$spec)            
s2 <- spec.pgram(h.k[2,] * ar4.data, taper=0.0, plot=FALSE); lines(s2$freq, n * s2$spec, col="pink")
s3 <- spec.pgram(h.k[3,] * ar4.data, taper=0.0, plot=FALSE); lines(s3$freq, n * s3$spec, col="green")
s4 <- spec.pgram(h.k[4,] * ar4.data, taper=0.0, plot=FALSE); lines(s4$freq, n * s4$spec, col="blue")

true.f <- spec.arma(phi)
s5 <- spec.pgram(h.k[5,] * ar4.data, taper=0.0, plot=FALSE); lines(s5$freq, n * s5$spec)            
s6 <- spec.pgram(h.k[6,] * ar4.data, taper=0.0, plot=FALSE); lines(s6$freq, n * s6$spec, col="red")
s7 <- spec.pgram(h.k[7,] * ar4.data, taper=0.0, plot=FALSE); lines(s7$freq, n * s7$spec, col="green")            
s8 <- spec.pgram(h.k[8,] * ar4.data, taper=0.0, plot=FALSE); lines(s8$freq, n * s8$spec, col="blue")


# Estimator obtained by averaging

true.f <- spec.arma(phi)
s.bar <- (s1$spec+s2$spec+s3$spec+s4$spec+s5$spec)/5
lines(s1$freq, n*s.bar, col="seagreen")


# built-in routines  (see docs with sapa package)
# You can see the increasing variance due to leakage at higher frequency in the plot

h.k <- taper(type="dpss", n.sample=n, n.taper=6, bandwidth=4)   # bw = number multiples of FF
ff <- SDF(as.vector(ar4.data), method="multitaper", taper=h.k)
plot(ff, col="red")