real <- read.table("C://COURSES/STA4164/WORK/realestate.dat", header=F, row.names=NULL, col.names=c("property", "y", "x1", "x2", "x3")) real y <- log(real$y) x1 <- log(real$x1) x2 <- log(real$x2) x3 <- log(real$x3) real1 <- data.frame(y, x1, x2, x3) #============ # EDA # ========== cor(real1) pairs(real1) # ============================= # LS Estimates and ANOVA Table # ============================= fit1 <- lm(y~x1); summary(fit2); anova(fit2) fit2 <- lm(y~x1+x2+x3, data=real); summary(fit1) fit3 <- lm(y~x1+x2+x3); summary(fit1); anova(fit1) fit1 <- lm(y~x1+x2+x3); summary(fit1); anova(fit1, test="F") sse <- 0.403305 mse <- 0.025207 df <- 3 sst <- sum((y-mean(y))^2) ssr <- sst - sse msr <- ssr/df f <- msr/mse p <- 1-pf(f, df, 19-df) cbind(df, ssr, sse, sst, msr, f, p) # ========================= # Test beta1 = beta2 =0 # ========================= anova(fit1, fit3, test="F") cv <- qf(.95, 2, 16); cv # 3.634 # ========================= # Test beta3 = .8 # ========================= y1 <- y - .8*x3 fit1 <- lm(y1~x1+x2) summary(fit1); anova(fit1) F2 <- (.513467-.403305)/.025207; F2 qf(.95, 1, 16) cbind(SSE.reduced=.513467, SSE.full=.403305, F2, qf(.95, 1, 16)) # 0.513467 0.403305 4.370294 4.493998 t <- (.481-.8)/.1526 cbind(t, df=16, cv=qt(.975, 16)) # t df cv #[1,] -2.090433 16 2.119905 # ========================= # Test beta1 = beta2 # ========================= x1.1 <- x1 + x2 fit2 <- lm(y1~x1.1+x3) summary(fit2); anova(fit2) F2 <- (0.4538164-.403305)/.025207; F2 qf(.95, 1, 16) cbind(F2, df1=1, df2=16, cv=qf(.95, 1, 16)) # F2 df1 df2 cv # 2.003864 1 16 4.493998 # ========================================================== # Confounding When Controlled for One Extraneous Variable # ========================================================== fit0 <- lm(y~x3); summary(fit0); anova(fit0) fit1 <- lm(y~x1 + x3); summary(fit1); anova(fit1) fit1 <- lm(y~x1 + x1^2 + x3); summary(fit1); anova(fit1) fit1 <- lm(y~x1 + x2 + x3); summary(fit1); anova(fit1) fit1 <- lm(y~x1 + x2 + x1:x2 + x3); summary(fit1); anova(fit1) fit1 <- lm(y~x2 + x3); summary(fit1); anova(fit1) x3 0.9636 0.1734 0.5993001 1.3278999 c(0.9636 - qt(.975, 18)*0.1734, 0.9636 + qt(.975, 18)*0.1734) x3 0.6438 0.1732 0.2783799 1.0092201 c(0.6438 - qt(.975, 17)*0.1732, 0.6438 + qt(.975, 17)*0.1732) x3 0.6648 0.1914 0.2590501 1.0705499 c(0.6648 - qt(.975, 16)*0.1914, 0.6648 + qt(.975, 16)*0.1914) x3 0.4810 0.1526 0.1575025 0.8044975 c(0.4810 - qt(.975, 16)*0.1526, 0.4810 + qt(.975, 16)*0.1526) x3 0.3552 0.1857 -0.04061018 0.75101018 c(0.3552 - qt(.975, 15)*0.1857, 0.3552 + qt(.975, 15)*0.1857) x3 0.5004 0.1491 0.1858265 0.8149735 c(0.5004 - qt(.975, 17)*0.1491, 0.5004 + qt(.975, 17)*0.1491) # ==================== # Interaction # ==================== fit1 <- lm(y~ (x1+x2+x3)^2); summary(fit1); anova(fit1) fit1 <- lm(y~ x2:x3); summary(fit1); anova(fit1) # =============== # Prediction # =============== fit1 <- lm(y~x1+x2+x3); summary(fit1); anova(fit1) mean(x1); mean(x2); mean(x3) pred <- predict(fit1, newdata=data.frame(x1=9, x2=10, x3=7), ci.fit = T, pi.fit = T, se.fit = T, conf.level = 0.95) pred qt(.975, 16) # 95% CI exp(c(10.53521, 10.78016)) # 37616.95 48057.81 # 95% PI exp(c(10.29952, 11.01584)) # 29718.35 60830.10 pred <- predict(fit1, newdata=data.frame(x1=log(60000), x2=log(8000), x3=log(1800)), ci.fit = T, pi.fit = T, se.fit = T, conf.level = 0.95) pred exp(c(9.953758, 11.41808)) exp(c(9.880104, 11.49174))