######## HOMEWORK 4, STAT 541, DUE FRI, OCT 14, 2011, 5pm ######## # This homework is an exercise in Least Squares computation. We will # build up R code that # 1) computes a Q-R decomposition of a matrix X, # 2) solves equations when the coefficients are upper triangular, # 3) inverts an upper triangular matrix R, and finally # 4) computes and returns most things we find useful in regression. # We will be negligent in the sense that we will ignore precautions # that a numerical analyst would build into his code, and we will not # consider measures to improve numerical conditioning such as pivoting # (switching the order of the variables) either. # BACKGROUND: Most LS solvers are based on decompositions of the # design matrix of the form X = Q R, where R is square and triangular # and Q is of the same size as X but has orthonormal columns: # Q^T Q = I (p+1)x(p+1) # Note that here the columns of Q form an orthonormal basis # of the column space of X, not residual space. The matrix # H = Q Q^T # is then the orthogonal projection onto the column space of X. # With this knowledge write 'yhat' in two ways, where 'b' as usual # is the vector of LS estimates: # yhat = X b = Q Q^T y # Q R b = Q Q^T y (left-multiply both sides with Q^T) # R b = Q^T y # The last equation is easily solved for b when R is triangular. # Furthermore, for inference we want the matrix (X^T X)^{-1}, # but with a Q-R decomposition this is easily gotten by # inverting R because # (X^T X)^{-1} = (R^T R)^{-1} = R^{-1} R^{-1}^T #---------------------------------------------------------------- # PROBLEM 1: Write a function 'gs(X)' to implement the so-called # modified Gram-Schmidt procedure for orthonormalizing the columns # of a matrix X. As this procedure orthonormalizes the columns # of X to form the matrix Q of the same size, it stores the # coefficients to reconstitute X from Q in an upper triangular # matrix R. # Here is the algorithm: # Initialize Q with a copy of X # Loop over the columns of Q (j=1,2,...,p-1) and do the following: # At stage j, # normalize Q[,j] in place # adjust (=orthogonalize) the columns of Q[,(j+1):p] w.r.t. Q[,j] # As this algorithm proceeds, store suitable norms and inner products # in the appropriate cells of R so as to reconstitute X from Q. # If you do this right, it should hold at the end: X = Q R. # Finish the function gs() with return(list(Q=Q, R=R)) # In this algorithm, use for-loops for looping over columns, # but use sum() to calculate inner products and squared norms, # but not lm() or any other high-level function. # Test your function on these data: X <- cbind(rep(1,10), 1:10, (1:10)^2) sol <- gs(X) # Show the following and comment on what they mean and what they should show: sol$Q %*% sol$R t(sol$Q) %*% sol$Q # YOUR SOLUTION: gs <- function(X){ p <- ncol(X) Q <- X R <- matrix(0, ncol=p, nrow=p) z <- rep(NA,p) for(j in 1:(p-1)) { norm <- sqrt(sum(Q[,j]^2)) Q[,j] <- Q[,j] / norm R[j,j] <- norm for(k in (j+1):p) { inner <- sum(Q[,j] * Q[,k]) R[j,k] <- inner Q[,k] <- Q[,k] - inner*Q[,j] } } norm <- sqrt(sum(Q[,p]^2)) Q[,p] <- Q[,p] / norm R[p,p] <- norm return(list(Q=Q, R=R)) } sol <- gs(X) sol$Q %*% sol$R [,1] [,2] [,3] [1,] 1 1 1 [2,] 1 2 4 [3,] 1 3 9 [4,] 1 4 16 [5,] 1 5 25 [6,] 1 6 36 [7,] 1 7 49 [8,] 1 8 64 [9,] 1 9 81 [10,] 1 10 100 # Correct: Q and R do form a Q-R decomposition of X. t(sol$Q) %*% sol$Q [,1] [,2] [,3] [1,] 1.000000e+00 2.031524e-17 2.019327e-18 [2,] 2.031524e-17 1.000000e+00 -1.772671e-17 [3,] 2.019327e-18 -1.772671e-17 1.000000e+00 # Correct: The columns of Q are orthonormal up to numerical inaccuracy. #---------------------------------------------------------------- # PROBLEM 2: Write a function 'tri.solve(R,z)' that accepts an upper # triangular matrix R and a column z and returns the solution b of # z = R b. Show the results for these inputs: z <- 10:1 R <- (row(diag(10)) <= col(diag(10))) # You may use loops, but you may also use the sum() function inside # tri.solve() to spare yourself an inner loop. You must not use # canned solvers such as solve() or ginv() (the latter in package MASS). # Loop over the elements of b (or almost all of them) but use sum() # for inner products. # YOUR SOLUTION: tri.solve <- function(R,z) { p <- nrow(R) b <- rep(NA, p) b[p] <- z[p]/R[p,p] for(j in (p-1):1) b[j] <- (z[j] - sum(R[j,(j+1):p]*b[(j+1):p])) / R[j,j] return(b) } tri.solve(R,z) [1] 1 1 1 1 1 1 1 1 1 1 # correct! #---------------------------------------------------------------- # PROBLEM 3: No programming, just thinking. # Assume R and R1 are upper triangular square matrices. # Questions: # a) If the vector b has non-zero values only in the first k entries, # what can one say about z=Rb? # b) When is R invertible? # c) If the vector z has non-zero values only in the first k entries, # and if R is invertible, what can one say about the vector b? # d) What can one say about R1%*%R? # e) What can one say about R^{-1}? # YOUR SOLUTION: # a) The vector z has also non-zero values in the first k entries only. # The reason is that for entries past k the lower triangle annihilates # all values of the vector. # b) R is invertible exactly if all diagonal elements are non-zero. # The reason is that solving works exactly if one can divide by # all diagonal elements. See the above code for tri.solve(). # c) Again, one gets a vector with non-zero values in the first k # entries only. This is obvious from the code of tri.solve(). # d) R1%*%R is also upper triangular because of a). # e) R^{-1} is also upper triangular because of c). The reason is # that forming inverses means solving for all columns of the # identity matrix, which is also upper triangular. #---------------------------------------------------------------- # PROBLEM 4: For inference about the regression coefficients, we need # the matrix (X^T X)^{-1}. In preparation for it, we need the # inversion of an upper triangular matrix R because from a Q-R # decomposition X=QR we can easily obtain (X^T X)^{-1} if we have # R^{-1}. Package the inversion in a function inv(R) using tri.solve(). # Try it out on the R matrix of the above 'sol': inv(sol$R) inv(sol$R) %*% sol$R # Show both and comment on both. # YOUR SOLUTION: inv <- function(R) { N <- nrow(R) I <- diag(N) for(j in 1:N) I[,j] <- tri.solve(R,I[,j]) return(I) } inv(sol$R) [,1] [,2] [,3] [1,] 0.3162278 -0.6055301 0.95742711 [2,] 0.0000000 0.1100964 -0.47871355 [3,] 0.0000000 0.0000000 0.04351941 # From the previous problem, part e., we know that inv(sol$R) must # also be upper triangular inv(sol$R) %*% sol$R [,1] [,2] [,3] [1,] 1 -3.508478e-16 -1.335737e-15 [2,] 0 1.000000e+00 -1.108488e-15 [3,] 0 0.000000e+00 1.000000e+00 # This is the identity matrix up to numerical inaccuracy. # The lower triangle is exactly zero because the matrix # product of two upper triangular matrices is also # upper triangular. #---------------------------------------------------------------- # PROBLEM 5: Using tri.solve(R,z), inv(R) and gs(X,y), write a # function reg(X,y) that computes a list with the following named # elements: # a) "Coeffs": a matrix with one row per regression coefficient and columns # named "Coefficient", "Std.Err.Est", "t-Statistic", "P-Value" # b) "Var": the estimated coeff variance/covariance matrix s^2*(X^T X)^{-1} # c) "RMSE": a number, s # d) "R2": a number, R2 # e) "Fstat": the value of the overall F statistic # f) "Fpval": the corresponding p-value # g) "Residuals": the vector of residuals # Assume that X does not have a column of 1's but an intercept is desired, # hence first thing in the function do X <- cbind(Icept=1,X) . # You may use matrix multiplication if convenient, but no high-level # functions other than things like sqrt() and pt(). # Try your solution on the following data and show the full result: X <- cbind(Pred1=1:10, Pred2=(1:10)^2) y <- rep(1,10) + 2*X[,1] + 3*X[,2] + resid(lm(rep(0:1,5)~X)) reg(X,y) # Compare with summary(lm(y~X)) # No need to comment, but you need to get the exact numbers. # YOUR SOLUTION: reg <- function(X,y) { n <- nrow(X) p <- ncol(X) X <- cbind(Icept=1,X) qr <- gs(X) b <- tri.solve(qr$R, t(qr$Q)%*%y) r <- y - X %*% b RSS <- sum(r^2) TSS <- sum((y-mean(y))^2) R2 <- (TSS-RSS)/TSS s2 <- RSS/(n-p-1) s <- sqrt(s2) Fstat <- ((TSS-RSS)/p) / s2 Fpval <- pf(Fstat,p,N-p-1,lower.tail=F) R.inv <- inv(qr$R) V.b <- R.inv %*% t(R.inv) * s2 coeffs <- matrix(NA, nrow=p+1, ncol=4, dimnames=list(colnames(X), c("Coefficient","Std.Err.Est","t-Statistic","P-Value"))) coeffs[,"Coefficient"] <- b coeffs[,"Std.Err.Est"] <- sqrt(diag(V.b)) coeffs[,"t-Statistic"] <- coeffs[,"Coefficient"] / coeffs[,"Std.Err.Est"] coeffs[,"P-Value"] <- 2*pt(-coeffs[,"t-Statistic"],df=n-p-1) return(list(Coeffs=coeffs, Var=V.b, RMSE=s, R2=R2, Fstat=Fstat, Fpval=Fpval, Residuals=r)) } # Demonstration: X <- cbind(Pred1=1:10, Pred2=(1:10)^2) y <- rep(1,10) + 2*X[,1] + 3*X[,2] + resid(lm(rep(0:1,5)~X)) reg(X,y) # Output: $Coeffs Coefficient Std.Err.Est t-Statistic P-Value Icept 1 0.69215351 1.444766 1.917550e-01 Pred1 2 0.28907249 6.918680 2.274824e-04 Pred2 3 0.02561073 117.138380 8.713479e-13 $Var [,1] [,2] [,3] [1,] 0.47907648 -0.181818182 0.0144300144 [2,] -0.18181818 0.083562902 -0.0072150072 [3,] 0.01443001 -0.007215007 0.0006559097 $RMSE [1] 0.5884899 $R2 [1] 0.999977 $Fstat [1] 152769.7 $Fpval [1] 2.118288e-170 $Residuals [,1] [1,] -0.3636364 [2,] 0.6060606 [3,] -0.4242424 [4,] 0.5454545 [5,] -0.4848485 [6,] 0.4848485 [7,] -0.5454545 [8,] 0.4242424 [9,] -0.6060606 [10,] 0.3636364 # Comparison with lm(): agreement in everything, except we got R2 more precise. summary(lm(y~X)) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.00000 0.69215 1.445 0.191755 XPred1 2.00000 0.28907 6.919 0.000227 *** XPred2 3.00000 0.02561 117.138 8.71e-13 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.5885 on 7 degrees of freedom Multiple R-Squared: 1, Adjusted R-squared: 1 F-statistic: 1.528e+05 on 2 and 7 DF, p-value: < 2.2e-16 #----------------------------------------------------------------