########First Example##### dat = read.table('cell.txt') X1 = dat[,2] X2 = dat[,3] Y = dat[,1] x1 = (X1-mean(X1))/0.4 ####0.4 is the absolute difference between adjacent levels of the variable x2 = (X2-mean(X2))/10 ####10 is the absolute difference between adjacent levels of the variable cor(X1, X1^2) cor(x1, x1^2) cor(X2, X2^2) cor(x2, x2^2) x1sq = x1^2 x2sq = x2^2 x1x2 = x1*x2 fit = lm(Y ~ x1 + x2 + x1sq + x2sq + x1x2) resi = residuals(fit) yhat = fitted(fit) par(mfrow=c(2,2)) plot( yhat, resi) plot(x1 , resi) plot(x2, resi) qqnorm(resi) summary(fit) ####Partial F-Test ###test whether a first-order model would be sufficient fit1 = lm(Y~x1+x2) ###one way of testing anotab = anova(fit) anotab[3:5,2] Fstar = sum(anotab[3:5,2])/3/1048 qf(0.95, 3,5) ####Means first oder model is adequate ###an easier way to do it anova(fit1,fit) ########################### ###transfer back fito = lm(Y~X1+X2) summary(fito) qt(.975, 8) ########Second Example##### dat = read.table('fat.txt') X1 = dat[,1] X2 = dat[,2] X3 = dat[,3] Y = dat[,4] x1 = X1 - mean(X1) x2 = X2 - mean(X2) x3 = X3 - mean(X3) x12 = x1*x2 x13 = x1*x3 x23 = x2*x3 fit = lm(Y~x1+x2+x3+x12+x13+x23) fito = lm(Y~x1+x2+x3) anova(fito,fit) qf(.95, 3, 13) ###########Third Example (soap data) dat = read.table('soap.txt') X1 = dat[,2] X2 = dat[,3] Y = dat[,1] plot(X1[X2==1],Y[X2==1],xlim=c(100,350),ylim=c(100,500),xlab='Line Speed', ylab='Amount of Scrap') points(X1[X2==0],Y[X2==0],pch=19) legend("bottomright",c('Production Line 1','Production Line 2'),pch=c(1,19)) X12 = X1*X2 fit = lm(Y ~ X1+X2+X12) resi1 = fit$residuals[X2==1] resi2 = fit$residuals[X2==0] yhat1 = fit$fitted[X2==1] yhat2 = fit$fitted[X2==0] par(mfrow=c(1,2)) plot(yhat1, resi1) abline(0,0) plot(yhat2, resi2) abline(0,0) ###Brown-Forsythe Test Y1 = Y[X2==1] Y2 = Y[X2==0] T1 = X1[X2==1] T2 = X1[X2==0] fit1 = lm(Y1~T1) fit2 = lm(Y2~T2) n1 = length(Y1) n2 = length(Y2) resi1 = fit1$residuals resi2 = fit2$residuals d1 = abs(resi1-median(resi1)) d2 = abs(resi2-median(resi2)) s = sqrt((sum((d1-mean(d1))^2)+sum((d2-mean(d2))^2))/(n1+n2-2)) tBF = (mean(d1)-mean(d2))/s/sqrt(1/n1+1/n2) qt(0.975,n1+n2-2) ###Cannot reject the null hypothesis, (variance do not differ) ###Inference about two regression lines fit0 <- lm(Y~X1) anova(fit0,fit) ###Reject! regression function not identical fit12 <- lm(Y~X1+X2) anova(fit12, fit) ####Cannot reject! Slopes for two production lines are the same...