* STAT 3A03 Fall 2017 SAS Lab 11; * Solution to the Exercises; * Exercise 1; PROC IMPORT out=S3A3.physics1 datafile="Data\Physics1.txt" DBMS=DLM REPLACE; Getnames=yes; Datarow=2; run; * Run the regression without weights; PROC REG Data=S3A3.Physics1 plots=none; Model y=x; Plot y*x; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; * Construct the transformed variables; * Note that the transformed intercept is now a variable!; Data S3A3.Physics1; Set S3A3.Physics; z=y/SD; x1=1/SD; x2=x/SD; run; * Fit the transformed regression with no intercept; PROC REG Data=S3A3.Physics1 plots=none; * Adding / noint to the model statement fits a model with no intercept; Model z=x1 x2 / noint; Plot z*x2; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; * Construct the weights as proportional to the reciprocal of the variance; Data S3A3.Physics1; Set S3A3.Physics; w=1/SD**2; run; * Run the weighted regression; PROC REG Data=S3A3.Physics1 plots=none; Model y=x; Weight w; Plot y*x; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; * Exercise 2; PROC IMPORT out=S3A3.ed75a datafile="Data\Education75a.txt" DBMS=DLM REPLACE; Getnames=yes; Datarow=2; run; * Set up the dummy variables taking Region 1 as the reference; Data S3A3.Ed75a; Set S3A3.Ed75a; if Region=2 then Reg2=1; else Reg2=0; if Region=3 then Reg3=1; else Reg3=0; if Region=4 then Reg4=1; else Reg4=0; run; * Fit the regression with region included; * We store the residuals and studentized residuals in an output dataset; PROC REG Data=S3A3.ED75a plots=none; Model Y=X1 X2 X3 Reg2 Reg3 Reg4; Var Region; Plot Y*Predicted.; Plot Student.*Predicted.; Plot Student.*nqq.; Plot Student.*Region; Output Out=S3A3.ED75_out1 Predicted=Fitted Student=Res_stud Residual=Res; run; quit; * Now get the mean and variance of the studentized residuals by region; PROC MEANS Data=S3A3.ED75_out1 NWAY noprint; CLASS Region; VAR Res_stud; OUTPUT Out=temp2 Mean=Mean Var=Var; run; * Extract the variances by region and store in variables; Data temp2; set temp2; IF (_N_=1) THEN call symput('var1', Var); IF (_N_=2) THEN call symput('var2', Var); IF (_N_=3) THEN call symput('var3', Var); IF (_N_=4) THEN call symput('var4', Var); run; * Now store the appropriate variance for each observation; Data S3A3.ED75b; Set S3A3.ED75a; IF (Region=1) THEN var=&var1; IF (Region=2) THEN var=&var2; IF (Region=3) THEN var=&var3; IF (Region=4) THEN var=&var4; run; * Get the overall variance of the studentized residuals and save; PROC MEANS Data=S3A3.Ed75_out1; Var Res_stud; Output Out=temp3 Mean=Mean Var=Var; run; Data temp3; set temp3; Call symput('vare', Var); run; * Get the weights as the overall variance divided by the region-specific variance; Data S3A3.ED75b; Set S3A3.ED75b; w=&vare/var; run; * Run the weighted regression; PROC REG Data=S3A3.ED75b plots=none; Model Y=X1 X2 X3 Reg2 Reg3 Reg4; Weight w; Var Region; Plot Y*Predicted.; Plot Student.*Predicted.; Plot Student.*nqq.; PLOT Student.*Region; run; quit;