### R commands for Lecture 5 on regressinon setwd("/Users/bob/work/courses/stat910/") ################################################# # # Functions # ################################################# mean.na <- function(x) { mean(x,na.rm = TRUE) } sd.na <- function(x) { sd (x,na.rm = TRUE) } lag.plot <- function(xt,lag) { n <- length(xt); plot(xt[-(n:(n-lag+1))],xt[-(1:lag)], xlab=paste("X[t-",lag,"]"), ylab="X_t") } generalized.difference <- function(xt, alpha) { n<-length(xt) c(NA, xt[-1]-alpha*xt[-n]) # jam a leading missing value } ################################################# # # Periodic Examples # ################################################# ### Variable Star data xt <- scan("data/varstar.dat") t <- 1:length(xt) plot(xt, type="o") # Guess the period x1 <- cos(2*pi*21*t/600) ; x2 <- sin(2*pi*21*t/600) r1 <- lm(xt~x1+x2) ; summary(r1) points(t,fitted.values(r1),col="red") # check out the residuals for pattern plot(t, residuals(r1)) # add another sinusoid to the regression x3 <- cos(2*pi*25*t/600) ; x4 <- sin(2*pi*25*t/600) r2 <- lm(xt~x1+x2+x3+x4, x="TRUE") ; summary(r2) # note "x" option plot(t,xt); points(t,fitted.values(r2),col="red") plot(t,residuals(r2)) # Notice the properties of the estimates, X'X x <- r2$x (t(x) %*% x) ### Hidden sinusoid (note pi is built-in constant) n <- 256 t <- 1:n yt <- cos(2 * pi * 35 * t/n) + rnorm(n) plot(t,yt,type="o") x1 <- cos(2 * pi * 35 * t/n); x2 <- sin(2 * pi * 35 * t/n) regr <- lm(yt ~ x1 + x2) ; summary(regr) # sums of squares anova(regr) coef <- coefficients(regr) a <- coef[2]; b <- coef[3] (n/2)*(a^2 + b^2) ################################################# # # Periodogram (caution about normalization and plot scale) # ################################################# plot(yt, type="o") # default plot on log scale pg <- spec.pgram(yt) # plot on regular scale plot(pg$freq, pg$spec) pg$spec[36] * 2 * pi * sqrt(n) # Discrete (fast) Fourier transform, compared to regression fft(1:4) # check how its normalized (or not) ft <- fft(yt) norm <- ft * Conj(ft) plot(1:256, norm) # repeated which.max(norm[1:129]) # j = 0,1,2,...,128 2*norm[36] /n # which is the regression SS for this frequency