## Read in the dataset and attach it
pollute <- read.table("pollute.txt",header=T)
attach(pollute)
# Check out post versus pre prices.
plot(pre82price,post82price)
# Run the regression
pollute.lm <- lm(pre82price~post82price)
# Have a look at some diagnostics
plot(pollute.lm)
# Do a residual plot
plot(predict(pollute.lm),residuals(pollute.lm))
# Now the squared residuals
plot(predict(pollute.lm),residuals(pollute.lm)^2)
# Smooth it, maybe somewhat quadratic?
lines(smooth.spline(predict(pollute.lm),residuals(pollute.lm)^2,df=10),col=2)
lines(smooth.spline(predict(pollute.lm),residuals(pollute.lm)^2,df=4),col=2)
# Make a model for the squared residuals
summary(lm(residuals(pollute.lm)^2 ~ pre82price + pre82price^2 + years))
# Estimate the variances.
predict(lm(residuals(pollute.lm)^2 ~ pre82price + pre82price^2 + years))
# Invert them to get the weights.
myweights <- 1/predict(lm(residuals(pollute.lm)^2 ~
pre82price + pre82price^2 + years))
# Rerun the regression with the weights.
lm(pre82price~post82price,weights = myweights)
## Problem: this treats weights as if they were fixed.
## They are in fact random -- another source of uncertainty.
## Could bootstrap the entire weighted least squares procedure
## including the weight estimation to get standard errors.