* Examples of Interactions done in class on Nov 13 & 14, 2017; * Interactions between categorical variables require products of; * dummy variables; Data S3A3.Salary1; Set S3A3.Salary1; E1M=E1*Manage; E2M=E2*Manage; E3M=E3*Manage; run; PROC REG Data=S3A3.Salary1 plots=none; Model Salary=E2 E3 Manage E2M E3M; Output Out=Salary1_out Predicted=Fitted Residual=Resid_raw Student=Resid_stud; run; quit; * Or in PROC GLM we use the * operator on two CLASS variables; PROC GLM Data=S3A3.Salary; Class Educ (ref='1') Manage (ref='0'); Model Salary=Educ Manage Educ*Manage /solution; run; * We can include continuous covariates also; * The following gives 6 parallel lines but without additive structure; * in the intercepts; PROC GLM Data=S3A3.Salary; Class Educ (ref='1') Manage (ref='0'); Model Salary=Educ Manage Educ*Manage Exp/solution; run; * The saturated model would have 6 different intercepts and 6 different slopes; * We do this using interactions between the continuous and categoical covariates; PROC GLM Data=S3A3.Salary; Class Educ (ref='1') Manage (ref='0'); Model Salary=Educ Manage Educ*Manage Exp Educ*Exp Manage*Exp Educ*Manage*Exp /solution; run; * With PROC REG we need to construct the new interaction variables; Data S3A3.Salary1; Set S3A3.Salary1; E1Exp=E1*Exp; E2Exp=E2*Exp; E3Exp=E3*Exp; MExp=Manage*Exp; E1MExp=E1*Manage*Exp; E2MExp=E2*Manage*Exp; E3MExp=E3*Manage*Exp; run; PROC REG Data=S3A3.Salary1 plots=none; Model Salary=E2 E3 Manage E2M E3M Exp E2Exp E3Exp MExp E2MExp E3MExp; run; * We can use an F test to compare the parallel lines model and the full model; * Full Model; PROC GLM Data=S3A3.Salary; Class Educ (ref='1') Manage (ref='0'); Model Salary=Educ Manage Educ*Manage Exp Educ*Exp Manage*Exp Educ*Manage*Exp; ODS output OverallAnova=full_anova; run; Data full_anova; set full_anova; If source='Error' then call symput ('full_sse', ss); If source='Error' then call symput ('full_df', df); run; * Reduced Model; PROC GLM Data=S3A3.Salary; Class Educ (ref='1') Manage (ref='0'); Model Salary=Educ Manage Educ*Manage Exp; ODS output OverallAnova=red_anova; run; Data red_anova; set red_anova; If source='Error' then call symput ('red_sse', ss); If source='Error' then call symput ('red_df', df); run; * Now calculate the F statistic and p-value; Data Ftest; num=(&red_sse-&full_sse)/(&red_df-&full_df); num_df=&red_df-&full_df; den=&full_sse/&full_df; den_df=&full_df; F=num/den; pvalue=1-cdf('F', F, num_df, den_df); run; PROC PRINT data=Ftest; run; * Can we combine the upper two education levels; Data Salary1; Set S3A3.Salary1; Educ1=E2+E3; run; PROC GLM Data=Salary1; Class Educ1 (ref='0') Manage (ref='0'); Model Salary=Educ1 Manage Educ1*Manage Exp /solution; ODS output OverallAnova=red1_anova; run; Data red1_anova; set red1_anova; If source='Error' then call symput ('red1_sse', ss); If source='Error' then call symput ('red1_df', df); run; * Now calculate the F statistic and p-value; Data Ftest1; num=(&red1_sse-&red_sse)/(&red1_df-&red_df); num_df=&red1_df-&red_df; den=&red_sse/&red_df; den_df=&red_df; F=num/den; pvalue=1-cdf('F', F, num_df, den_df); run; PROC PRINT data=Ftest1; run;