# Demo code for Bootstrap 1 # Now look at the osteoporosis data sample osteo <- read.table(":data:osteo.dat", header=T) # --- pull out the hip data (lots of ways to get at it) names(osteo) osteo[,"hip"] attach(osteo) hip # --- kernel density estimate plot(density(hip)) points (hip,rep(0,length(hip))) # --- confidence interval for the average hip score summary(hip) mean(hip); sd(hip) mean(hip) + 2 * c(-1,1) * sd(hip)/sqrt(length(hip)) # --- confidence interval for normal sample norm <- rnorm(64) mean(norm); sd(norm) mean(norm) + 2 * c(-1,1) * sd(norm)/sqrt(length(norm)) # Bootstrap version of confidence interval for average avg.bs <- c() for(b in 1:1000) avg.bs <- c(avg.bs, mean(sample(norm,64,replace=T))) sd(avg.bs) # bs se, compare to s/sqrt(n) plot(densityh(avg.bs) quantile(avg.bs,c(.025,.975)) # Bootstrap version of the osteo confidence interval avg.bs <- c() for(b in 1:1000) avg.bs <- c(avg.bs, mean(sample(hip,64,replace=T))) sd(avg.bs) quantile(avg.bs,c(.025,.975)) # Check the coverage of a bootstrap interval... boostrap calibration # - first write a program that computes a bootstrap ci (80% coverage) # - then use it just like we used the mean above bootci <- function(x,B) { n<-length(x); avg.bs<-c(); for (b in 1:B) avg.bs <- c(avg.bs, mean(sample(x,n,replace=T))); quantile(avg.bs,c(0.1,0.9)) } ints <- c() for (rep in 1:50) { xStar <- sample(hip,length(hip),replace=T); # sample the "population" ints <- c(ints, bootci(xStar,1000)); # find BS interval } ints <- drop(cbind(ints[1+2 * 0:49],ints[2+2 * 0:49])) # two columns # - count the ones that did not cover cover <- (ints[,1] < mean(hip)) & (mean(hip) < ints[,2]) ints[!cover,]; length(ints[!cover,]) length(hip[useEstrogen=="yes"]) hip.yes <- hip[useEstrogen=="yes"] points(hip.yes,rep(0.05,length(hip.yes)),col="red") hip.no <- hip[useEstrogen=="no"] points(hip.no,rep(0.10,length(hip.no)),col="blue")