# Demo code for Bootstrap 4 # --- Access a collection of supplied routines search() library(boot) search() # --- Example of bootstrapping correlation with 15 law schools # Get the data law <- read.table(":Data:lsat.dat",header=T) attach(law) plot(lsat, gpa) cor(lsat, gpa) # Bootstrap by hand data <- cbind(lsat, gpa) cor.bs <- vector(mode="numeric", length=1000) for(b in 1:length(cor.bs)) { data.star <- data[sample(1:15,15,replace=T),] cor.bs[b] <- cor(data.star[,"gpa"],data.star[,"gpa"]) } hist(cor.bs,probability=T); lines(density(cor.bs)) quantile(cor.bs,c(.025,.975)) # Using the boot library functions library(boot) # get the library of bootstrap funs ?boot my.cor <- function(data, indices) { # needs these two args data <- data[indices,]; # random resampling cor(data[,"gpa"],data[,"lsat"]); # corr of these columns } boot.cor <- boot(law, my.cor, 1000) # data frame, stat, B boot.cor # note it includes a bias estimate summary(boot.cor) plot(boot.cor) who <- boot.array(boot.cor) who[1:2,] plot(lsat,gpa, cex=who[1,]) # who got picked ?boot.ci boot.ci(boot.cor, type=c("norm", "perc", "bca")) detach(law) # --- Load and attach data on Florida voting in 2000 US election Florida <- read.table(":Data:florida2000.dat", header=T) names(Florida) attach(Florida) # --- Example of regression, with an outlier (Palm Beach in row 50) plot(regReform,buchanan) identify(regReform,buchanan,labels=dimnames(Florida)[[1]]) # give data set explicitly for labels regr <- lm(buchanan ~ regReform, data=Florida) summary(regr) abline(regr) # refit without Palm Beach regr.del <- update(regr,subset=-50) abline(regr.del, col="red") # function to give to boot function my.regr <- function(data, indices) { data <- data[indices,] model <- lm(buchanan ~ regReform,data=data) coefficients(model) } # bootstrap using boot function boot.regr <- boot(Florida, my.regr, 1000) boot.regr plot(boot.regr, index=1) plot(boot.regr, index=2) plot(boot.regr$t[,1], boot.regr$t[,2]) # slope vs intercept # make sense to do a confidence interval??? boot.ci(boot.regr,index=1, type=c("norm", "perc", "bca")) boot.ci(boot.regr,index=2, type=c("norm", "perc", "bca")) # --- Alternative residual resampling method x <- regReform yHat <- regr$fitted.values resid <- regr$residuals # function to give to boot function my.regr.fix <- function(data, indices) { y.star <- yHat + resid[indices] model <- lm(y.star ~ x) coefficients(model) } # bootstrap using boot function boot.regr.fix <- boot(Florida, my.regr.fix, 400) boot.regr.fix plot(boot.regr.fix, index=2) boot.ci(boot.regr,index=2, type=c("norm", "perc", "bca")) # draw a bootstrap sample plot(x, yHat) plot(x, yHat + resid[sample(1:67,67,replace=T)]) # --- Robust regression alternative plot(regReform,buchanan) abline(regr) regr.del <- update(regr,subset=-50) abline(regr.del, col="red") # open a library of additional routines library(MASS) # MASS contains a tool for fitting robust regressions (Fox appendix on web) rob.regr <- rlm(buchanan ~ regReform, data=Florida) summary(rob.regr) abline(rob.regr, col="blue") names(rob.regr) n <- length(buchanan) plot(1:n, rob.regr$w) identify(1:n, rob.regr$w, labels=dimnames(Florida)[[1]]) # each robust regression is a sequence of weighted regressions my.rregr <- function (data,indices) { data <- data[indices,] fit <- rlm(buchanan ~ regReform, data=data, maxit=50) coefficients(fit) } boot.rregr <- boot(Florida, my.rregr, 400) boot.rregr # bootstrap gives rather different SE's as well as CI for the slope plot(boot.rregr, index=2) boot.ci(boot.rregr, index=2, type=c("norm", "perc", "bca")) detach(Florida) # --- Multiple regression Duncan <- read.table(":Data:duncan.dat", header=T) names(Duncan) labels(Duncan) attach(Duncan) pairs(Duncan) plot(income, prestige) identify(income, prestige, labels=labels(Duncan)[[1]]) regr <- lm(prestige ~ income + education, data=Duncan) regr summary(regr) plot(regr) # make sure MASS still on search path search() # library(MASS) rob.regr <- rlm(prestige ~ income + education, data=Duncan) summary(rob.regr) n <- length(prestige) plot(1:n, rob.regr$w) identify(1:n, rob.regr$w, labels=labels(Duncan)[[1]]) # each robust regression is a sequence of weighted regressions my.rregr <- function (data,indices) { data <- data[indices,] fit <- rlm(prestige ~ income + education, data=data, maxit=50) coefficients(fit) } boot.rregr <- boot(Duncan, my.rregr, 400) boot.rregr # look close to normal plot(boot.rregr, index=2) boot.ci(boot.rregr, index=2, type=c("norm", "perc", "bca")) # differences - significant? hist( boot.rregr$t[,2]-boot.rregr$t[,3]) quantile( boot.rregr$t[,2]-boot.rregr$t[,3], c(.025,.975) )