* SAS Lab 7; * Solutions for the two exercises; * Exercise 1; PROC IMPORT out=S3A3.csdata datafile="CSData.txt" DBMS=DLM REPLACE; Getnames=yes; Datarow=2; run; PROC REG Data=S3A3.CSData plots=none; Model GPA=HSM HSS HSE; Title "CSData Multiple Regression Model"; * Output the ANOVA table from the model; ODS output anova=csdata_anova; * Output the Parameter Estimates Table; ODS output ParameterEstimates=csdata_parests; * We also need the raw residuals and Leverages; Output Out=CSData_Out Residual=Resid_raw H=Leverage; run; Data CSData_anova; set CSData_anova; If source='Error' then call symput('sse', ss); If source='Model' then call symput('p', df); run; Data CSData_parests; Set CSData_parests; If Variable='HSM' then call symput('Beta_HSM', Estimate); If Variable='HSS' then call symput('Beta_HSS', Estimate); If Variable='HSE' then call symput('Beta_HSE', Estimate); run; Data CSData_out; Set CSData_out; id=_N_; d=resid_raw/sqrt(&SSE); potential=Leverage/(1-Leverage); residual_fun=(&p+1)*d**2/((1-Leverage)*(1-d**2)); Hadi=potential+residual_fun; res_comp_HSM=resid_raw+HSM*&Beta_HSM; res_comp_HSS=resid_raw+HSM*&Beta_HSS; res_comp_HSE=resid_raw+HSM*&Beta_HSE; run; PROC GPLOT Data=CSData_out; Plot Hadi*id; Plot Potential*Residual_fun; Plot HSM*res_comp_HSM; Plot HSS*res_comp_HSS; Plot HSE*res_comp_HSE; run; * Exercise 2; PROC IMPORT out=S3A3.NYRivers datafile="C:\Users\Angelo\Documents\McMaster Teaching\stat3a03\Fall 2015\Data\NYRivers.txt" DBMS=DLM REPLACE; Getnames=yes; Datarow=2; run; PROC REG Data=S3A3.NYRivers plots=none; Model Nitrogen=ComIndl; Title "NYRivers Simple Regression"; * Output the ANOVA table from the model; ODS output anova=NYRivers_anova; * We output the raw residuals, leverages, Cook's distances and DFITS measures; Output Out=NYRivers_Out Residual=Resid_raw H=Leverage CookD=Cook DFFITS=DFITS; run; Data NYRivers_anova; set NYRivers_anova; If source='Error' then call symput('sse', ss); If source='Model' then call symput('p', df); run; Data NYRivers_out; Set NYRivers_out; index=_N_; d=resid_raw/sqrt(&SSE); potential=Leverage/(1-Leverage); residual_fun=(&p+1)*d**2/((1-Leverage)*(1-d**2)); Hadi=potential+residual_fun; run; PROC GPLOT Data=NYRivers_out; * The three plots in Figure 4.7; Plot Cook*Index; Plot DFITS*Index; Plot Hadi*Index; run;