#--------------------------------------------------------------------------- # # PCA and SVD # #--------------------------------------------------------------------------- source("~/courses/mich/text_analytics/R_scripts/text_utils.R") # make_pca_data # --- principal components, synthetic data set.seed(1621) X <- make_pca_data(200, 20, k=2, sd.signal=2) X <- t(t(X) - colMeans(X)) # center and scale data for easier comparison c( mean(X[,1]), mean(X[,3])) X <- t( t(X) / apply(X,2,sd) ) # R idiom for fast matrix calculations c( sd( X[,1]), sd( X[,2])) pca <- princomp(X); pca # built-in R plot(pca) # scree plot names(pca) pca$sd dim(pca$loadings) # 20 x 20 ... coefs of PCA dim(pca$scores) # n x 20 ... orthogonal features sd( X %*% pca$loadings[,1] ) sd( X %*% pca$loadings[,2] ) round( cor(pca$scores[,1:5]) ,3) # uncorrelated # loadings are the weights plot(X %*% pca$loadings[,1], pca$scores[,1]) # same thing abline(a=0,b=1) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # --- PCA via singular value decomp udv <- svd(X) names(udv) udv$d udv$d/pca$sd # different scaling convention plot(udv$u[,1], pca$scores[,1]) #--------------------------------------------------------------------------- # # Example why PCA/SVD is useful in regression (PCR) # #--------------------------------------------------------------------------- # --- synthetic data, no evident connection between y and x's set.seed(2467) n <- 400; Data <- make_hidden_pca_data(n) names(Data) dim(Data$x) par(mfrow=c(2,2)) for(j in c(1,4,7,14)) plot(Data$y ~ Data$x[,j], main=paste("X[",j,"]")) reset() # standardize x X <- scale(Data$x) colMeans(X)[1:5] apply(X,2,sd)[1:5] # use svd to obtain singular vector (PC) udv <- svd(X) plot(udv$d) plot(Data$y ~ udv$u[,1], xlab="First Singular Vector") # do you see a pattern? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ################################################################################## # # SVD via alternating least squares ALS # # Example shows first one # ################################################################################## normalize <- function(x) { return (x / sqrt(x%*%x)) } als.svd <- function(X, max.it = 5) { ss0 <- sum(X^2) u <- normalize(rowMeans(X)) # init with row means it <- 0; while(it < max.it) { it <- it + 1 v <- normalize(drop(u %*% X)) # u'u=1 u <- drop(v %*% t(X)) # v'v=1 s <- drop(sqrt( u%*%u )) u <- u/s ss1 <- sum((X - s*outer(u,v))^2) cat(ss1,"\n") if (((ss0-ss1)/ss0) < .001) break else ss0 <- ss1 } return(list(d=s,u=u,v=v)) } X <- matrix( rnorm(1000*200, mean=3), nrow=1000, ncol=200) cs <- colSums(X) rs <- rowSums(X) X <- 1/sqrt(rs) * t( 1/sqrt(cs) * t(X) ) uv <- als.svd(X) plot(sqrt(rs)/sqrt(sum(rs)), uv$u) plot(sqrt(cs)/sqrt(sum(cs)), uv$v) udv <- svd(X) c(udv$d[1], uv$d) plot(udv$u[,1], -uv$u, xlab="SVD", ylab="ALS", main="First U"); abline(a=0,b=1,col="gray") plot(udv$v[,1], -uv$v, xlab="SVD", ylab="ALS", main="First V"); abline(a=0,b=1,col="gray")