#    Spatial analysis of GBR data
#
#    December 5, 2007
#
#    Mike Antolin
#    Department of Biology
#    Colorado State University
#    Fort Collins, CO 80523-1878
#
#    michael.antolin@colostate.edu

####  Clean up before you start
#     clear the memory deck
rm(list=ls(all=TRUE))
#     clear out graphing frames
for (z in 1:20) {dev.off()}


###
### import data

coral<-read.table("GBR_WS_data.csv",sep=",",header=T)
attach(coral)
names(coral)
plot(LAT_DD,LON_DD)

# Make some fancy plots for exploratory data analysis
library(geoR)
library(scatterplot3d)

# make data columsn used by points.geodata 
coral$coords = cbind(LAT_DD,LON_DD)
coral$data = CASES

X11()
plot(LAT_DD, LON_DD, type= "n",main="Number of Cases",
ylab="Latitude",xlab="Longitude")
text(jitter(LAT_DD,factor=1000), jitter(LON_DD,factor=1000),CASES,cex=0.75)
X11()
points.geodata(coral,cex.var=CORAL_COVER,cex.min=.1,cex.max=3,main="Coral Cover",
ylab="Latitude",xlab="Longitude")
X11()
points.geodata(coral,cex.var=WSSTA,cex.min=.1,cex.max=3, main="Ocean Temperature",
ylab="Latitude",xlab="Longitude")
X11()
scatterplot3d(LAT_DD, y=LON_DD, CASES, type="h", main="Number of Cases",
ylab="Latitude",xlab="Longitude")

###
### Check the distribution of the data against the negative binomial distribution

# Make some definitions of the data

varcases<-var(CASES)                  # variance of flea load defined
varcases
sd(CASES)
summary(CASES)
sample_size=length(CASES)
sample_size

#  Create data tables for analysis
frequencies<-table(CASES)
frequencies

avecases<-mean(CASES)                 # average cases defined
ucases<-as.vector(names(frequencies)) # get levels from the table into a vector
ycases<-as.numeric(ucases)            # change them from a vector to a number
maxlev<-nlevels(as.factor(ycases))    # number of levels in frequency table 
maxlev                                # this value will be used for plotting and binning

#  plot the fit to to a poisson distribution
X11()                                 # create a graphics window for each plot 
par(mfrow=c(1,2))
barplot(frequencies,ylab="Frequency",xlab="Obs. Cases")
barplot(dpois(0:(maxlev-1),avecases)*sample_size,names=as.character(0:(maxlev-1)),
ylab="Frequency",xlab="Exp Cases")
varmean=varcases/avecases
varmean
par(new=T)
par(mfrow=c(1,1))                               
if (varmean>1) text(12,25,"Looks like this is NOT Poisson distributed!", cex=1,col="RED",font=4)


#  plot the fit to to a negative binomial
X11()                                  
par(mfrow=c(1,2))
barplot(frequencies,ylab="Frequency",xlab="Obs. Cases")
barplot(dnbinom(0:(maxlev-1),1,mu=avecases)*sample_size,names=as.character(0:(maxlev-1)),ylab="Frequency",xlab="Exp. Cases")
k<-avecases^2/(varcases-avecases)
par(new=T)
par(mfrow=c(1,1))                               
if (k<1) text(12,20,"Looks like this could be negative binomial!", cex=1,col="RED",font=4)

#  plot double histogram with both observed and expected on it
X11()                                  
exp<-dnbinom(0:(maxlev-1),1,mu=avecases)*sample_size
both<-numeric(2*maxlev)
both[1:(2*maxlev) %% 2!=0]<-frequencies
both[1:(2*maxlev) %% 2==0]<-exp
lab<-character(2*maxlev)
lab[1:(2*maxlev) %% 2==0]<-as.character(0:(maxlev-1))
par(mfrow=c(1,1))
barplot(both,col=rep(c("white","grey"),maxlev),names=lab,ylab="Frequency",xlab="Cases")
legend(90,70,c("obs","exp"),fill=c("white","grey"))
par(new=T)
# make fancy X-axis
axis(1,at=c(1:(2*maxlev)),
     label=labels[1:(2*maxlev) %% 2==0]<-as.character(0:((2*maxlev)-1)),
     lwd=2,pos=-1.5,cex.axis=0.5)

##
##  Do an approximate test of goodness of fit to the negative binomial distribution

# make categories
cs<-factor(0:(maxlev-1))

#levels(cs)   # this will check how many "bins" you've made

#  make your expected and observed categories, and calculate chi-squared
ef<-as.vector(tapply(exp,cs,sum))
of<-as.vector(tapply(frequencies,cs,sum))
chisq<-sum((of-ef)^2/ef)
ef
of

Prob<-1-pchisq(chisq,(nlevels(cs)-3))
Prob

# Write the result of your test on your plot
par(new=T)
par(mfrow=c(1,1))                               
mtext(paste("Chi-squared  = ",chisq," with probability P =",Prob),
side=3,cex=1,col="RED",font=4)


### Install Spatial Stats library
S_HOME="RSpatial/"
source(paste(S_HOME,"Spatial_start.R",sep=""))
Spatial_start()
.First<-function(){Spatial_start()
                  }

# Create spatial weights matrix
spt.wt<-spwtdist(LAT_DD,LON_DD,rescale=T)
spt.wt[1:4,]

#  Use glm to test effects of temperature on whiting, between regions
#  original model
library(MASS)
fit1 = glm(CASES ~ WSSTA + CORAL_COVER + WSSTA*CORAL_COVER + factor(SECTOR), family=quasipoisson)
coef(fit1)
summary(fit1)

#  Estimate the "k" parameter for the negative binomial distribution
ybar=predict(fit1,type="response")
theta.ml(CASES, ybar)

theta1=0.3052719
# Now fit a glm with negative binomial errors 
fit2=glm(CASES ~ WSSTA + CORAL_COVER + WSSTA*CORAL_COVER +factor(SECTOR), family=negative.binomial(theta=theta1))
coef(fit2)
summary(fit2)

#  chek your residuals from the model
x11()
plot(WSSTA,fit2$resid,xlab="WSSTA")

# Test Moran's I for spatial autocorrelation in the model
morani(fit2$resid,spt.wt)

#  Plots to test for spatial autocorrelation
x11()
par(mfrow=c(2,2))
plot(WSSTA,fit2$resid,xlab="WSSTA")
plot(fit2$resid, CASES -fit2$resid,xlab="Residual",ylab="Predicted value")
title("Predicted values vs residuals",cex=.7)
plot(fit2$resid,spt.wt %*% fit2$resid, xlab="Residual",ylab="Weighted residuals")
title("W * RESIDUALS VS. RESIDUALS",cex=.7)
cor(fit2$resid,spt.wt %*% fit2$resid)


#######
# Using just the reef characterisitcs for a spatial analysis

library(MASS)
coral_R<-read.table("GBR_WS_data_reefs.csv",sep=",",header=T)
attach(coral_R)
names(coral_R)
x11()
plot(LAT_DD_R,LON_DD_R)

# Make some fancy plots for exploratory data analysis
library(geoR)
library(scatterplot3d)

# make data columsn used by points.geodata 
coral_R$coords = cbind(LAT_DD_R,LON_DD_R)
coral_R$data = CASES_R

X11()
plot(LAT_DD_R, LON_DD_R, type= "n",main="Number of Cases",
ylab="Latitude",xlab="Longitude")
text(jitter(LAT_DD_R,factor=1000), jitter(LON_DD_R,factor=1000),CASES_R,cex=0.75)
X11()
points.geodata(coral_R,cex.var=CORAL_COVER_R,cex.min=.1,cex.max=3,main="Coral Cover",
ylab="Latitude",xlab="Longitude")
X11()
points.geodata(coral_R,cex.var=WSSTA_R,cex.min=.1,cex.max=3, main="Ocean Temperature",
ylab="Latitude",xlab="Longitude")
X11()
scatterplot3d(LAT_DD_R, y=LON_DD_R, CASES_R, type="h", main="Number of Cases",
ylab="Latitude",xlab="Longitude")


# Create spatial weights matrix

spt.wt.R<-spwtdist(LAT_DD_R,LON_DD_R,rescale=T)
spt.wt.R[1:4,]


fit1.R = glm(CASES_R ~ WSSTA_R + CORAL_COVER_R + WSSTA_R*CORAL_COVER_R + factor(SECTOR_R), family=quasipoisson)
ybar=predict(fit1.R,type="response")
theta.ml(CASES_R, ybar)


theta1.R= 1.257505
fit2.R = glm(CASES_R ~ WSSTA_R + CORAL_COVER_R + WSSTA_R*CORAL_COVER_R + factor(SECTOR_R), family=negative.binomial(theta=theta1.R))

coef(fit2.R)
summary(fit2.R)
x11()
plot(WSSTA_R,fit2.R$resid,xlab="WSSTA")

morani(fit2.R$resid,spt.wt.R)


#Plots to test for spatial autocorrelation
X11()
par(mfrow=c(2,2))
plot(WSSTA_R,fit2.R$resid,xlab="WSSTA")
plot(fit2.R$resid, CASES_R -fit2.R$resid,xlab="Residual",ylab="Predicted value")
title("Predicted values vs residuals",cex=.7)
plot(fit2.R$resid,spt.wt.R %*% fit2.R$resid, xlab="Residual",ylab="Weighted residuals")
title("W * RESIDUALS VS. RESIDUALS",cex=.7)
cor(fit2.R$resid,spt.wt.R %*% fit2.R$resid)