# S4CI3 Winter 2019 Assignment 4 

# Question 1

gibbs.pumps <- function(N, x, t, alpha, beta) {
  # A function to run a Gibbs Sampler for the
  # Pumps problem.
  #
  # The arguments are the total chain length,
  # the data x and t and the hyperparameter alpha
  # I will also allow inputting an initial value
  # for beta if desired.
  
  # Check to ensure valid data x and t
  if (any((x<0)|(x%%1!=0)))
    stop("Invalid data in x")
  if (any (t<0)) stop("Invalid Observation Times")
  n <- length(x)
  if (length(t)!=n) 
    stop("x and t must be the same length")
  
  # Initial value for beta using option 3 in 
  # the solution if no value is given
  if (missing(beta))
    beta <- alpha/mean(x/t)
  chain <- matrix(NA, ncol=n+1, nrow=N)
  for (i in 1:N){
    lambda <- rgamma(n, x+alpha, scale=1/(t+1/beta))
    beta <- 1/rgamma(1, n*alpha, scale=1/sum(lambda))
    chain[i,] <- c(lambda, beta)
  }
  chain
}

pumps <- data.frame(Fails=c(5,1,5,14,3,19,1,1,4,22),
                    Times=c(94.32, 15.72, 62.88, 
                            125.76, 5.24, 31.44,
                            1.05, 1.05, 2.1, 10.48))
set.seed(20190408)
pumps.out <- gibbs.pumps(15000, pumps$Fails,
                         pumps$Times, 1.8)

mcmc.interval <- function(x, prob=0.95)
  quantile(x, c((1-prob)/2, 1-(1-prob)/2))
t(round(apply(pumps.out[-(1:5000),], 2, mcmc.interval),4))


# Question 2
q2.x <- c(0.580, 0.730, 0.986, 0.691, 0.532, 0.819, 0.382, 0.706,
          0.310, 0.791, 0.462, 0.653, 0.589, 0.664, 0.750, 0.786,
          0.590, 0.892, 0.712, 0.713)
q2.n <- length(q2.x)
q2.xbar <- mean(q2.x)
q2.theta.hat <- q2.xbar/(1-q2.xbar)

# Q2(a) (i)
R <- 100000
set.seed(20190408)
q2.xstar.1 <- matrix(rbeta(R*q2.n, q2.theta.hat, 1), ncol=q2.n)
q2.xbar.star.1 <- rowMeans(q2.xstar.1)
q2.theta.star.1 <- q2.xbar.star.1/(1-q2.xbar.star.1)
q2.bias.1 <- mean(q2.theta.star.1)-q2.theta.hat
q2.se.1 <- sd(q2.theta.star.1)

# Q2(a) (ii)
set.seed(20190408)
q2.xstar.2 <- matrix(sample(q2.x, R*q2.n, replace=T), ncol=q2.n)
q2.xbar.star.2 <- rowMeans(q2.xstar.2)
q2.theta.star.2 <- q2.xbar.star.2/(1-q2.xbar.star.2)
q2.bias.2 <- mean(q2.theta.star.2)-q2.theta.hat
q2.se.2 <- sd(q2.theta.star.2)

# Q2 (b)
q2.CI.norm.1 <- q2.theta.hat-q2.bias.1-qnorm(c(0.975,0.025))*q2.se.1
q2.CI.norm.2 <- q2.theta.hat-q2.bias.2-qnorm(c(0.975,0.025))*q2.se.2

# Q2 (c)
q2.ahat.1 <- sort(q2.theta.star.1)[c(0.025*R, 0.975*R)]
q2.CI.basic.1 <- 2*q2.theta.hat-rev(q2.ahat.1)
q2.CI.perc.1 <- q2.ahat.1

q2.ahat.2 <- sort(q2.theta.star.2)[c(0.025*R, 0.975*R)]
q2.CI.basic.2 <- 2*q2.theta.hat-rev(q2.ahat.2)
q2.CI.perc.2 <- q2.ahat.2

# Histograms of the bootstrap distributions (not required)
par(mfrow=c(1,2))
hist(q2.theta.star.1, breaks=100,
     prob=T, main="Parametric", 
     xlab=expression(widehat(theta^"*")))
abline(v=q2.theta.hat, col="red", lwd=2)
hist(q2.theta.star.2, breaks=100,
     prob=T, main="Non-parametric", 
     xlab=expression(widehat(theta^"*")))
abline(v=q2.theta.hat, col="red", lwd=2)
dev.off()

# Question 4
library(boot)
q4.n <- nrow(cd4)
q4.r <- cor(cd4$baseline, cd4$oneyear)

# 3(a) 
# Parametric bootstrap
mu.hat <- colMeans(cd4)
Sigma.hat <- var(cd4) # cov(cd4) gives the same result!

library(MASS)
set.seed(20190408)
R <- 100000
q4.r.star.norm <- rep(NA, R)
for (j in 1:R) {
   cd4.star <- mvrnorm(q4.n, mu.hat, Sigma.hat)
   q4.r.star.norm[j] <- cor(cd4.star[,1], cd4.star[,2])
}

q4.b.boot.norm <- mean(q4.r.star.norm)-q4.r
q4.se.boot.norm <- sd(q4.r.star.norm)
q4.CI.boot.norm <- q4.r-q4.b.boot.norm-qnorm(c(0.975,0.025))*q4.se.boot.norm


# 4(b)
# Parametric bootstrap on transformed scale
fisher <- function(r) 0.5*log((1+r)/(1-r))
fisher.inv <- function(p) (exp(2*p)-1)/(exp(2*p)+1)

q4.psi.hat <- fisher(q4.r)
q4.psi.star.norm <- fisher(q4.r.star.norm)
q4.b.boot.psi.norm <- mean(q4.psi.star.norm)-q4.psi.hat
q4.se.boot.psi.norm <- sd(q4.psi.star.norm)
q4.CI.boot.psi.norm <- q4.psi.hat-q4.b.boot.psi.norm-
                          qnorm(c(0.975,0.025))*q4.se.boot.psi.norm
q4.CI.boot.rho.norm <- fisher.inv(q4.CI.boot.psi.norm)

# 4(c)
# Nonparametric bootstrap
set.seed(20190408)
inds <- matrix(sample(1:q4.n, R*q4.n, replace=TRUE), ncol=q4.n)
q4.r.star.np <- rep(NA, R)
for (j in 1:R) {
   i <- inds[j,]
   q4.r.star.np[j] <- cor(cd4$baseline[i], cd4$oneyear[i])
}
q4.b.boot.np <- mean(q4.r.star.np)-q4.r
q4.se.boot.np <- sd(q4.r.star.np)
q4.CI.boot.np <- q4.r-q4.b.boot.np-qnorm(c(0.975,0.025))*q4.se.boot.np

q4.psi.star.np <- fisher(q4.r.star.np)
q4.b.boot.psi.np <- mean(q4.psi.star.np)-q4.psi.hat
q4.se.boot.psi.np <- sd(q4.psi.star.np)
q4.CI.boot.psi.np <- q4.psi.hat-q4.b.boot.psi.np-
                        qnorm(c(0.975,0.025))*q4.se.boot.psi.np
q4.CI.boot.rho.np <- fisher.inv(q4.CI.boot.psi.np)