##################################################################### # # Generate a sample of 50 normals with mean 10, sd 1, and compare to bootstrap # ##################################################################### n <- 50 data <- 10 + rnorm(n); data xbar <- mean(data) ; xbar ### usual confidence interval tstat <- qt(0.975,49); tstat se <- sd(data)/sqrt(n); se c(xbar - tstat * se, xbar + tstat * se) ### sampling with replacement sample(1:10, 10, replace=TRUE) ### example with average mean(sample(data,n,replace=TRUE)) ### R Program to do bootstrap resampling ( 'stat' is a function ) ### and to summarize data bootstrap <- function(x, stat, B) { result <- rep(0,B); for(i in 1:B) result[i] <- stat(sample(x,size=length(data),replace=TRUE)); return (result); } describe <- function(x) { print(paste("Mean ",mean(x)," SD ", sd(x))); quantile(x,c(.025,.05,.25,.5,.75,.95,.975)); } ### does not take long to compute means of lots of samples B <- 2000 bsMean <- bootstrap(data, mean, B) bsMedian <- bootstrap(data, median, B) ### compare sd of bootstrap to se from theory sd(data)/sqrt(50) sd(bsMean) sd(bsMedian) ### CDF plot of results along with normal target lo <- 0.99*min(bsMean); hi <- 1.01*max(bsMean) x <- seq(lo,hi,length.out=1000) plot(x,pnorm(sqrt(50)*(x-xbar)/sd(norm)), type="l", col="red", ylab="CDF") # normal cdf lines(sort(bsMean),(1:B)/B) ### Summarize the results, comparison of percentiles to theory-based t-interval quantile(bsMean, c(.025,.975)) describe(bsMean)