Libname S3A3 'D:\STAT 3A03\Fall 2017\Data';

* Exercise;
* Part (a);
PROC IMPORT out=S3A3.Heights2
   datafile="D:\STAT 3A03\Fall 2017\Data\Heights2.txt"
       DBMS=DLM
       REPLACE;
   Getnames=yes;
   Datarow=2;
run;

* Part (b);
PROC GPLOT Data=S3A3.Heights2;
     Plot Husband*Wife;
     Symbol Value=CIRCLE Color=GREEN;
     Title "Heights of Married Couples";
     Label Husband="Husband's Height";
     Label Wife="Wife's Height";
 run;

* Part (c);
PROC CORR Data=S3A3.Heights2;
    Var Husband;
	WITH Wife;
run;

* Part (d);
PROC REG Data=S3A3.Heights2 Plots=none;
  Model Wife=Husband;
run;
quit;

* Part (e);
* From the output of (d) we get the p-value<0.0001 and so is highly significant;

*Part (f);
* The easiest way to do this is to take the estimate and standard error from (d);
* You can get the required t-value from Table A.2;
* With n=96 we get t=2 (use df=60, alpha/2=0.025);
* Hence the confidence interval limits are;
* Lower: 0.69965-2*0.06106=0.57753;
* Upper: 0.69965+2*0.06106=0.82177;

* An alternative way to do parts (e) and (f) is as follows;
PROC REG Data=S3A3.Heights2 Plots=none Outest=Heights2_est Tableout;
  Model Wife=Husband;
run;
quit;

PROC PRINT Data=Heights2_est;
 Var _TYPE_ Husband;
 format Husband e.;
run;
* From this we see that the p-value for part (e) is 1.536E-19;
* And the exact 95% confidence interval is (0.578414, 0.820893);
* The interval is slightly narrower because the correct t-value to use is 1.9855 rather than 2;

* Part (g);
* A slope of 1 would imply that husbands tended to choose wives who are their own height on avaerage;

* Additional Example;
PROC REG data=S3A3.heights Plots=none;
     Model Dheight=Mheight;
run;
quit;

PROC SORT data=heights_out;
      BY=MHeight;
run;

symbol1 interpol=none value=dot color=black;

symbol2 interpol=join value=none color=red;

symbol3 interpol=join value=none color=blue;

PROC GPLOT data=heights_out;
     Plot Dheight*Mheight Mheight*Mheight Fitted*Mheight / overlay;
     Title "Mother and Daughter Heights";
     Label Dheight="Daughter's Height";
     Label Mheight="Mother's Height";
run;
quit;