# Spatial analysis of GBR data # # December 5, 2007 # # Mike Antolin # Department of Biology # Colorado State University # Fort Collins, CO 80523-1878 # # michael.antolin@colostate.edu #### Clean up before you start # clear the memory deck rm(list=ls(all=TRUE)) # clear out graphing frames for (z in 1:20) {dev.off()} ### ### import data coral<-read.table("GBR_WS_data.csv",sep=",",header=T) attach(coral) names(coral) plot(LAT_DD,LON_DD) # Make some fancy plots for exploratory data analysis library(geoR) library(scatterplot3d) # make data columsn used by points.geodata coral$coords = cbind(LAT_DD,LON_DD) coral$data = CASES X11() plot(LAT_DD, LON_DD, type= "n",main="Number of Cases", ylab="Latitude",xlab="Longitude") text(jitter(LAT_DD,factor=1000), jitter(LON_DD,factor=1000),CASES,cex=0.75) X11() points.geodata(coral,cex.var=CORAL_COVER,cex.min=.1,cex.max=3,main="Coral Cover", ylab="Latitude",xlab="Longitude") X11() points.geodata(coral,cex.var=WSSTA,cex.min=.1,cex.max=3, main="Ocean Temperature", ylab="Latitude",xlab="Longitude") X11() scatterplot3d(LAT_DD, y=LON_DD, CASES, type="h", main="Number of Cases", ylab="Latitude",xlab="Longitude") ### ### Check the distribution of the data against the negative binomial distribution # Make some definitions of the data varcases<-var(CASES) # variance of flea load defined varcases sd(CASES) summary(CASES) sample_size=length(CASES) sample_size # Create data tables for analysis frequencies<-table(CASES) frequencies avecases<-mean(CASES) # average cases defined ucases<-as.vector(names(frequencies)) # get levels from the table into a vector ycases<-as.numeric(ucases) # change them from a vector to a number maxlev<-nlevels(as.factor(ycases)) # number of levels in frequency table maxlev # this value will be used for plotting and binning # plot the fit to to a poisson distribution X11() # create a graphics window for each plot par(mfrow=c(1,2)) barplot(frequencies,ylab="Frequency",xlab="Obs. Cases") barplot(dpois(0:(maxlev-1),avecases)*sample_size,names=as.character(0:(maxlev-1)), ylab="Frequency",xlab="Exp Cases") varmean=varcases/avecases varmean par(new=T) par(mfrow=c(1,1)) if (varmean>1) text(12,25,"Looks like this is NOT Poisson distributed!", cex=1,col="RED",font=4) # plot the fit to to a negative binomial X11() par(mfrow=c(1,2)) barplot(frequencies,ylab="Frequency",xlab="Obs. Cases") barplot(dnbinom(0:(maxlev-1),1,mu=avecases)*sample_size,names=as.character(0:(maxlev-1)),ylab="Frequency",xlab="Exp. Cases") k<-avecases^2/(varcases-avecases) par(new=T) par(mfrow=c(1,1)) if (k<1) text(12,20,"Looks like this could be negative binomial!", cex=1,col="RED",font=4) # plot double histogram with both observed and expected on it X11() exp<-dnbinom(0:(maxlev-1),1,mu=avecases)*sample_size both<-numeric(2*maxlev) both[1:(2*maxlev) %% 2!=0]<-frequencies both[1:(2*maxlev) %% 2==0]<-exp lab<-character(2*maxlev) lab[1:(2*maxlev) %% 2==0]<-as.character(0:(maxlev-1)) par(mfrow=c(1,1)) barplot(both,col=rep(c("white","grey"),maxlev),names=lab,ylab="Frequency",xlab="Cases") legend(90,70,c("obs","exp"),fill=c("white","grey")) par(new=T) # make fancy X-axis axis(1,at=c(1:(2*maxlev)), label=labels[1:(2*maxlev) %% 2==0]<-as.character(0:((2*maxlev)-1)), lwd=2,pos=-1.5,cex.axis=0.5) ## ## Do an approximate test of goodness of fit to the negative binomial distribution # make categories cs<-factor(0:(maxlev-1)) #levels(cs) # this will check how many "bins" you've made # make your expected and observed categories, and calculate chi-squared ef<-as.vector(tapply(exp,cs,sum)) of<-as.vector(tapply(frequencies,cs,sum)) chisq<-sum((of-ef)^2/ef) ef of Prob<-1-pchisq(chisq,(nlevels(cs)-3)) Prob # Write the result of your test on your plot par(new=T) par(mfrow=c(1,1)) mtext(paste("Chi-squared = ",chisq," with probability P =",Prob), side=3,cex=1,col="RED",font=4) ### Install Spatial Stats library S_HOME="RSpatial/" source(paste(S_HOME,"Spatial_start.R",sep="")) Spatial_start() .First<-function(){Spatial_start() } # Create spatial weights matrix spt.wt<-spwtdist(LAT_DD,LON_DD,rescale=T) spt.wt[1:4,] # Use glm to test effects of temperature on whiting, between regions # original model library(MASS) fit1 = glm(CASES ~ WSSTA + CORAL_COVER + WSSTA*CORAL_COVER + factor(SECTOR), family=quasipoisson) coef(fit1) summary(fit1) # Estimate the "k" parameter for the negative binomial distribution ybar=predict(fit1,type="response") theta.ml(CASES, ybar) theta1=0.3052719 # Now fit a glm with negative binomial errors fit2=glm(CASES ~ WSSTA + CORAL_COVER + WSSTA*CORAL_COVER +factor(SECTOR), family=negative.binomial(theta=theta1)) coef(fit2) summary(fit2) # chek your residuals from the model x11() plot(WSSTA,fit2$resid,xlab="WSSTA") # Test Moran's I for spatial autocorrelation in the model morani(fit2$resid,spt.wt) # Plots to test for spatial autocorrelation x11() par(mfrow=c(2,2)) plot(WSSTA,fit2$resid,xlab="WSSTA") plot(fit2$resid, CASES -fit2$resid,xlab="Residual",ylab="Predicted value") title("Predicted values vs residuals",cex=.7) plot(fit2$resid,spt.wt %*% fit2$resid, xlab="Residual",ylab="Weighted residuals") title("W * RESIDUALS VS. RESIDUALS",cex=.7) cor(fit2$resid,spt.wt %*% fit2$resid) ####### # Using just the reef characterisitcs for a spatial analysis library(MASS) coral_R<-read.table("GBR_WS_data_reefs.csv",sep=",",header=T) attach(coral_R) names(coral_R) x11() plot(LAT_DD_R,LON_DD_R) # Make some fancy plots for exploratory data analysis library(geoR) library(scatterplot3d) # make data columsn used by points.geodata coral_R$coords = cbind(LAT_DD_R,LON_DD_R) coral_R$data = CASES_R X11() plot(LAT_DD_R, LON_DD_R, type= "n",main="Number of Cases", ylab="Latitude",xlab="Longitude") text(jitter(LAT_DD_R,factor=1000), jitter(LON_DD_R,factor=1000),CASES_R,cex=0.75) X11() points.geodata(coral_R,cex.var=CORAL_COVER_R,cex.min=.1,cex.max=3,main="Coral Cover", ylab="Latitude",xlab="Longitude") X11() points.geodata(coral_R,cex.var=WSSTA_R,cex.min=.1,cex.max=3, main="Ocean Temperature", ylab="Latitude",xlab="Longitude") X11() scatterplot3d(LAT_DD_R, y=LON_DD_R, CASES_R, type="h", main="Number of Cases", ylab="Latitude",xlab="Longitude") # Create spatial weights matrix spt.wt.R<-spwtdist(LAT_DD_R,LON_DD_R,rescale=T) spt.wt.R[1:4,] fit1.R = glm(CASES_R ~ WSSTA_R + CORAL_COVER_R + WSSTA_R*CORAL_COVER_R + factor(SECTOR_R), family=quasipoisson) ybar=predict(fit1.R,type="response") theta.ml(CASES_R, ybar) theta1.R= 1.257505 fit2.R = glm(CASES_R ~ WSSTA_R + CORAL_COVER_R + WSSTA_R*CORAL_COVER_R + factor(SECTOR_R), family=negative.binomial(theta=theta1.R)) coef(fit2.R) summary(fit2.R) x11() plot(WSSTA_R,fit2.R$resid,xlab="WSSTA") morani(fit2.R$resid,spt.wt.R) #Plots to test for spatial autocorrelation X11() par(mfrow=c(2,2)) plot(WSSTA_R,fit2.R$resid,xlab="WSSTA") plot(fit2.R$resid, CASES_R -fit2.R$resid,xlab="Residual",ylab="Predicted value") title("Predicted values vs residuals",cex=.7) plot(fit2.R$resid,spt.wt.R %*% fit2.R$resid, xlab="Residual",ylab="Weighted residuals") title("W * RESIDUALS VS. RESIDUALS",cex=.7) cor(fit2.R$resid,spt.wt.R %*% fit2.R$resid)