### R commands for Lecture 5 on regressinon setwd("/Users/bob/work/courses/stat910/") ################################################# # # Functions # ################################################# generalized.difference <- function(xt, alpha) { n<-length(xt) c(NA, xt[-1]-alpha*xt[-n]) # jam a leading missing value } periodic.r2 <- function(period, xt) { n <- length(xt) t <- 0:(n-1) freq <- 2*pi/period X <- cbind(cos(freq*t),sin(freq*t)) regr <- lm(xt ~ X) summary(regr)$r.squared } ################################################# # # Periodic Examples # ################################################# ### Variable Star data xt <- scan("http://stat.wharton.upenn.edu/~stine/stat910/data/varstar.dat") t <- 0:(length(xt)-1) plot(xt, type="o") # Guess the period period <- 600/21 plot(t %% period, xt) # better? plot(t %% (period+0.1), xt) plot(t %% (period+0.5), xt) plot(t %% (period+1.0), xt) # --- plot R2 as function of period p <- seq(28,30,by=0.1) r2 <- mapply(function(per){periodic.r2(per,xt)},p) plot(p,r2) # --- fit regression, look at residuals x1 <- cos(2*pi*t/29) ; x2 <- sin(2*pi*t/29) r1 <- lm(xt~x1+x2) ; summary(r1) plot(t,xt); lines(t,fitted.values(r1),col="red") # check out the residuals for pattern plot(t, residuals(r1)) # add another sinusoid to the regression (period 24) x3 <- cos(2*pi*t/24) ; x4 <- sin(2*pi*t/24) r2 <- lm(xt~x1+x2+x3+x4, x="TRUE") ; summary(r2) # note "x" option plot(t,xt); lines(t,fitted.values(r2),col="red") plot(t,residuals(r2)) # Notice the properties of the estimates, X'X x <- r2$x (t(x) %*% x) ### Hidden sinusoid (note pi is built-in constant) n <- 512 t <- 0:(n-1) yt <- cos(2 * pi * 35 * t/n + 0.4) + rnorm(n) plot(t,yt,type="o") x1 <- cos(2 * pi * 35 * t/n); x2 <- sin(2 * pi * 35 * t/n) regr <- lm(yt ~ x1 + x2) ; summary(regr) # sums of squares explained by the sinusoid related to coeffcients anova(regr) coef <- coefficients(regr) a <- coef[2]; b <- coef[3] ss <- (n/2)*(a^2 + b^2); ss ################################################# # # Periodogram (caution about normalization and plot scale) # ################################################# plot(yt, type="o") # default plot on log scale pg <- spec.pgram(yt) # plot on regular scale plot(pg$freq, pg$spec) pg$spec[35] * sqrt(2) # --- move frequency slightly, then redo the above... # Note what happens in the periodogram if not exactly FF # Discrete (fast) Fourier transform, compared to regression fft(1:4) # check how its normalized (or not) ft <- fft(yt) norm <- ft * Conj(ft) plot(1:512, norm) # repeated which.max(norm[1:257]) # j = 0,1,2,...,256 2*norm[36] /n # which is the regression SS for this frequency