/* Let's look at collinearity diagnostics using the Hald data, in SAS. Note that we get the same results as in the R code, which is a GOOD THING. */ options linesize=78 ovp nocenter nodate; data hald; input ta ts taf ds heat; sum = ta + ts + taf + ds; /* label ta = '% tricalcium aluminate' ts = '% tricalcium silicate' taf = '% tetracalcium alumino ferrite' ds = '% dicalcium silicate' heat = 'heat (calories /g cement)'; */ ; datalines; 7.0000000 26.000000 6.0000000 60.000000 78.500000 1.0000000 29.000000 15.000000 52.000000 74.300000 11.000000 56.000000 8.0000000 20.000000 104.30000 11.000000 31.000000 8.0000000 47.000000 87.600000 7.0000000 52.000000 6.0000000 33.000000 95.900000 11.000000 55.000000 9.0000000 22.000000 109.20000 3.0000000 71.000000 17.000000 6.0000000 102.70000 1.0000000 31.000000 22.000000 44.000000 72.500000 2.0000000 54.000000 18.000000 22.000000 93.100000 21.000000 47.000000 4.0000000 26.000000 115.90000 1.0000000 40.000000 23.000000 34.000000 83.800000 11.000000 66.000000 9.0000000 12.000000 113.30000 10.000000 68.000000 8.0000000 12.000000 109.40000 ; proc print data = hald; run; proc gplot data = hald; plot heat* (ta ts taf ds); run; proc corr data = hald; var ta ts taf ds heat; run; proc reg data=hald; model heat = ta ts /r influence xpx i collin collinoint tol vif; run; /* Look at multicollinearity in full model */ proc reg data = hald ; model heat = ta ts taf ds / r influence xpx i ss1 ss2 pcorr1 pcorr2 partial collin collinoint tol vif; run;