SE.comparisons <- function(x, y, Nboot=10000, digits=3) { # x must be a matrix or dataframe, even if it's just a column X <- cbind(icept=1,as.matrix(x)); Y <- y; N <- nrow(X); p <- ncol(X) if(nrow(X) != length(y)) { cat("Number of X rows and length of y are different!\n"); return() } ## X <- scale(x); Y <- scale(y) ## mod.lm <- summary(lm(Y ~ . - 1, data=as.data.frame(X))) mod.lm <- summary(lm(Y ~ ., data=as.data.frame(X))) slopes.bs <- rbind(sapply(1:Nboot, function(i) { sel <- sample(nrow(X), replace=T); if(i%%1000==0) cat(i,"") ## Xb <- scale(X[sel,],scale=F); Yb <- Y[sel] Xb <- X[sel,]; Yb <- Y[sel] solve(t(Xb)%*%Xb)%*%t(Xb)%*%Yb } )); cat("\n") # rbind() for matrix slopes.bs.SE <- apply(slopes.bs, 1, sd) # sandwich estimator, corrected for design: X3 <- X%*%solve(t(X)%*%X) Hdiag <- diag(X%*%solve(t(X)%*%X)%*%t(X)) # SE.sandw <- sqrt(apply(resid(mod.lm)^2 * X3^2, 2, sum) * N/(N-p)) # global correction SE.sandw <- sqrt(apply(resid(mod.lm)^2 * X3^2 / (1-Hdiag), 2, sum)) # local correction # root of RAV sample estimate: X.adj <- sapply(1:ncol(X), function(j) resid(lm(X[,j] ~ ., as.data.frame(X[,-j])))) X.adj[,1] <- resid(lm(X[,1] ~ .-1, data=as.data.frame(X[,-1]))) colnames(X.adj) <- colnames(X) y.res <- resid(lm(y ~ X)) root.RAV <- sapply(colnames(X.adj), function(j) sqrt(mean(y.res^2 * X.adj[,j]^2)/mean(y.res^2)/mean(X.adj[,j]^2)) ) # List: round(cbind("Slope"=mod.lm$coeff[,"Estimate"], "SE.conv"=mod.lm$coeff[,"Std. Error"], "SE.boot"=slopes.bs.SE, "SE.sandw"=SE.sandw, "boot/conv"=slopes.bs.SE/mod.lm$coeff[,"Std. Error"], "sandw/conv"=SE.sandw/mod.lm$coeff[,"Std. Error"], "sandw/boot"=SE.sandw/slopes.bs.SE, "root.RAV"=root.RAV, "t.conv"=mod.lm$coeff[,"t value"], "t.boot"=mod.lm$coeff[,"t value"] * mod.lm$coeff[,"Std. Error"] / slopes.bs.SE, "t.sand"=mod.lm$coeff[,"t value"] * mod.lm$coeff[,"Std. Error"] / SE.sandw, NULL), digits) } ## SE.comparisons <- function(x, y, Nboot=10000, digits=3) { # x must be a matrix or dataframe or a column ## X <- cbind(icept=1,as.matrix(x)); Y <- y; N <- nrow(X); p <- ncol(X) ## if(nrow(X) != length(y)) { cat("Number of X rows and length of y are different!\n"); return() } ## mod.lm <- summary(lm(Y ~ ., data=as.data.frame(X))) ## slopes.bs <- rbind(sapply(1:Nboot, function(i) { sel <- sample(nrow(X), replace=T); if(i%%1000==0) cat(i,"") ## ## Xb <- scale(X[sel,],scale=F); Yb <- Y[sel] ## Xb <- X[sel,]; Yb <- Y[sel] ## solve(t(Xb)%*%Xb)%*%t(Xb)%*%Yb } )); cat("\n") # rbind() for matrix ## slopes.bs.SE <- apply(slopes.bs, 1, sd) ## # sandwich estimator, corrected for design: ## X3 <- X%*%solve(t(X)%*%X) ## Hdiag <- diag(X%*%solve(t(X)%*%X)%*%t(X)) ## # SE.sandw <- sqrt(apply(resid(mod.lm)^2 * X3^2, 2, sum) * N/(N-p)) # global correction ## SE.sandw <- sqrt(apply(resid(mod.lm)^2 * X3^2 / (1-Hdiag), 2, sum)) # local correction ## # root of RAV sample estimate: ## X.adj <- sapply(1:ncol(X), function(j) resid(lm(X[,j] ~ X[,-j]))) ## colnames(X.adj) <- colnames(X) ## y.res <- resid(lm(y ~ X)) ## root.RAV <- sapply(colnames(X.adj), function(j) sqrt(mean(y.res^2 * X.adj[,j]^2)/mean(y.res^2)/mean(X.adj[,j]^2)) ) ## # List: ## round(cbind("Slope"=mod.lm$coeff[,"Estimate"], ## "SE.conv"=mod.lm$coeff[,"Std. Error"], ## "SE.boot"=slopes.bs.SE, ## "SE.sandw"=SE.sandw, ## "boot/conv"=slopes.bs.SE/mod.lm$coeff[,"Std. Error"], ## "sandw/conv"=SE.sandw/mod.lm$coeff[,"Std. Error"], ## "sandw/boot"=SE.sandw/slopes.bs.SE, ## "t.conv"=mod.lm$coeff[,"t value"], ## "t.boot"=mod.lm$coeff[,"t value"] * mod.lm$coeff[,"Std. Error"] / slopes.bs.SE, ## "t.sand"=mod.lm$coeff[,"t value"] * mod.lm$coeff[,"Std. Error"] / SE.sandw, ## "root.RAV"=root.RAV, ## NULL), digits) ## }