* Examples for transformations done in class on November 20, 2017; * Example 1; * Simulated Poisson Count Data; PROC IMPORT Out=S3A3.Poisson Datafile="Data\PoisData.txt" DBMS=DLM REPLACE; Getnames=YES; Datarow=2; run; * Untransformed model - Problem with heteroscedasticity; PROC REG Data=S3A3.Poisson plots=none; Model Y=X; Plot Y*X; Plot Student.*Pred.; Plot Student.*nqq.; run; * Transform Y to square root and run new model; Data S3A3.Poisson; Set S3A3.Poisson; SqrtY=sqrt(Y); LogY=log(Y); run; PROC MEANS Data=S3A3.Poisson Mean Var; Var Y SqrtY LogY; By X; run; PROC REG Data=S3A3.Poisson plots=none; Model SqrtY=X; Plot SqrtY*X; Plot Student.*Pred.; Plot Student.*nqq.; run; * In this model the heteroscedasticity has been solved but there; * is now evidence of non-linearity; * Try taking the square root of the covariate too; Data S3A3.Poisson; Set S3A3.Poisson; SqrtX=sqrt(X); run; PROC REG Data=S3A3.Poisson plots=none; Model SqrtY=SqrtX; Plot SqrtY*SqrtX; Plot Student.*Pred.; Plot Student.*nqq.; run; * This seems to solve the problems; * What if we used a different transformation, such as log?; Data S3A3.Poisson; Set S3A3.Poisson; LogY=log(Y); LogX=log(X); run; PROC REG Data=S3A3.Poisson plots=none; Model LogY=X; Plot LogY*X; Plot Student.*Pred.; Plot Student.*nqq.; run; PROC REG Data=S3A3.Poisson plots=none; Model LogY=LogX; Plot LogY*LogX; Plot Student.*Pred.; Plot Student.*nqq.; run; * Example 2; * Simulated data with the standard deviation proportional to the covariate; PROC IMPORT Out=S3A3.Hetero Datafile="Data\HeteroData.txt" DBMS=DLM REPLACE; Getnames=YES; Datarow=2; run; * Untransformed model - clear heteroscedasticity; PROC REG Data=S3A3.Hetero plots=none; Model Y=X; Plot Y*X; Plot Student.*Pred.; Plot Student.*nqq.; run; * Transformed model mentioned in class notes; DATA S3A3.Hetero; Set S3A3.Hetero; Xprime=1/X; Yprime=Y/X; run; PROC REG Data=S3A3.Hetero plots=none; Model Yprime=Xprime; Plot Yprime*Xprime; Plot Student.*Pred.; Plot Student.*nqq.; PLOT H.*Xprime; run; * Note that this does result in some observations having high leverage! * A Real Data Example; PROC IMPORT OUT=S3A3.Cedar Datafile="Data\Cedar.txt" DBMS=DLM REPLACE; Getnames=YES; Datarow=2; run; PROC REG Data=S3A3.Cedar plots=none; Model Height=Dbh; Plot Height*Dbh; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; Data S3A3.Cedar; Set S3A3.Cedar; LogDbh=log(Dbh); SqrtDbh=sqrt(Dbh); RecSqrtDbh=1/SqrtDbh; run; PROC REG Data=S3A3.Cedar plots=none; Model Height=SqrtDbh; PLOT Height*SqrtDbh; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; PROC REG Data=S3A3.Cedar plots=none; Model Height=LogDbh; Plot Height*LogDbh; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; PROC REG Data=S3A3.Cedar plots=none; Model Height=RecSqrtDbh; PLOT Height*RecSqrtDbh; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; Data S3A3.Cedar; Set S3A3.Cedar; LogHeight=log(Height); SqrtHeight=sqrt(Height); RecSqrtHeight=1/SqrtHeight; run; PROC REG Data=S3A3.Cedar plots=none; Model SqrtHeight=sqrtDbh; Plot SqrtHeight*sqrtDbh; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; PROC REG Data=S3A3.Cedar plots=none; Model LogHeight=sqrtDbh; Plot LogHeight*sqrtDbh; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; PROC REG Data=S3A3.Cedar plots=none; Model LogHeight=LogDbh; Plot LogHeight*LogDbh; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit; PROC REG Data=S3A3.Cedar plots=none; Model sqrtHeight=logDbh; Plot sqrtHeight*logDbh; Plot Student.*Predicted.; Plot Student.*nqq.; run; quit;