## Read in the dataset and attach it

pollute <- read.table("pollute.txt",header=T)

attach(pollute)

# Check out post versus pre prices.

plot(pre82price,post82price)

# Run the regression

pollute.lm <- lm(pre82price~post82price)

# Have a look at some diagnostics

plot(pollute.lm)

# Do a residual plot

plot(predict(pollute.lm),residuals(pollute.lm))

# Now the squared residuals

plot(predict(pollute.lm),residuals(pollute.lm)^2)

# Smooth it, maybe somewhat quadratic?

lines(smooth.spline(predict(pollute.lm),residuals(pollute.lm)^2,df=10),col=2)

lines(smooth.spline(predict(pollute.lm),residuals(pollute.lm)^2,df=4),col=2)

# Make a model for the squared residuals

summary(lm(residuals(pollute.lm)^2 ~ pre82price + pre82price^2 + years))

# Estimate the variances.

predict(lm(residuals(pollute.lm)^2 ~ pre82price + pre82price^2 + years))

# Invert them to get the weights.

myweights <- 1/predict(lm(residuals(pollute.lm)^2 ~
pre82price + pre82price^2 + years))

# Rerun the regression with the weights.

lm(pre82price~post82price,weights = myweights)

## Problem: this treats weights as if they were fixed.
## They are in fact random -- another source of uncertainty.
## Could bootstrap the entire weighted least squares procedure
## including the weight estimation to get standard errors.