# S4CI3 Winter 2019 Assignment 4 # Question 1 gibbs.pumps <- function(N, x, t, alpha, beta) { # A function to run a Gibbs Sampler for the # Pumps problem. # # The arguments are the total chain length, # the data x and t and the hyperparameter alpha # I will also allow inputting an initial value # for beta if desired. # Check to ensure valid data x and t if (any((x<0)|(x%%1!=0))) stop("Invalid data in x") if (any (t<0)) stop("Invalid Observation Times") n <- length(x) if (length(t)!=n) stop("x and t must be the same length") # Initial value for beta using option 3 in # the solution if no value is given if (missing(beta)) beta <- alpha/mean(x/t) chain <- matrix(NA, ncol=n+1, nrow=N) for (i in 1:N){ lambda <- rgamma(n, x+alpha, scale=1/(t+1/beta)) beta <- 1/rgamma(1, n*alpha, scale=1/sum(lambda)) chain[i,] <- c(lambda, beta) } chain } pumps <- data.frame(Fails=c(5,1,5,14,3,19,1,1,4,22), Times=c(94.32, 15.72, 62.88, 125.76, 5.24, 31.44, 1.05, 1.05, 2.1, 10.48)) set.seed(20190408) pumps.out <- gibbs.pumps(15000, pumps$Fails, pumps$Times, 1.8) mcmc.interval <- function(x, prob=0.95) quantile(x, c((1-prob)/2, 1-(1-prob)/2)) t(round(apply(pumps.out[-(1:5000),], 2, mcmc.interval),4)) # Question 2 q2.x <- c(0.580, 0.730, 0.986, 0.691, 0.532, 0.819, 0.382, 0.706, 0.310, 0.791, 0.462, 0.653, 0.589, 0.664, 0.750, 0.786, 0.590, 0.892, 0.712, 0.713) q2.n <- length(q2.x) q2.xbar <- mean(q2.x) q2.theta.hat <- q2.xbar/(1-q2.xbar) # Q2(a) (i) R <- 100000 set.seed(20190408) q2.xstar.1 <- matrix(rbeta(R*q2.n, q2.theta.hat, 1), ncol=q2.n) q2.xbar.star.1 <- rowMeans(q2.xstar.1) q2.theta.star.1 <- q2.xbar.star.1/(1-q2.xbar.star.1) q2.bias.1 <- mean(q2.theta.star.1)-q2.theta.hat q2.se.1 <- sd(q2.theta.star.1) # Q2(a) (ii) set.seed(20190408) q2.xstar.2 <- matrix(sample(q2.x, R*q2.n, replace=T), ncol=q2.n) q2.xbar.star.2 <- rowMeans(q2.xstar.2) q2.theta.star.2 <- q2.xbar.star.2/(1-q2.xbar.star.2) q2.bias.2 <- mean(q2.theta.star.2)-q2.theta.hat q2.se.2 <- sd(q2.theta.star.2) # Q2 (b) q2.CI.norm.1 <- q2.theta.hat-q2.bias.1-qnorm(c(0.975,0.025))*q2.se.1 q2.CI.norm.2 <- q2.theta.hat-q2.bias.2-qnorm(c(0.975,0.025))*q2.se.2 # Q2 (c) q2.ahat.1 <- sort(q2.theta.star.1)[c(0.025*R, 0.975*R)] q2.CI.basic.1 <- 2*q2.theta.hat-rev(q2.ahat.1) q2.CI.perc.1 <- q2.ahat.1 q2.ahat.2 <- sort(q2.theta.star.2)[c(0.025*R, 0.975*R)] q2.CI.basic.2 <- 2*q2.theta.hat-rev(q2.ahat.2) q2.CI.perc.2 <- q2.ahat.2 # Histograms of the bootstrap distributions (not required) par(mfrow=c(1,2)) hist(q2.theta.star.1, breaks=100, prob=T, main="Parametric", xlab=expression(widehat(theta^"*"))) abline(v=q2.theta.hat, col="red", lwd=2) hist(q2.theta.star.2, breaks=100, prob=T, main="Non-parametric", xlab=expression(widehat(theta^"*"))) abline(v=q2.theta.hat, col="red", lwd=2) dev.off() # Question 4 library(boot) q4.n <- nrow(cd4) q4.r <- cor(cd4$baseline, cd4$oneyear) # 3(a) # Parametric bootstrap mu.hat <- colMeans(cd4) Sigma.hat <- var(cd4) # cov(cd4) gives the same result! library(MASS) set.seed(20190408) R <- 100000 q4.r.star.norm <- rep(NA, R) for (j in 1:R) { cd4.star <- mvrnorm(q4.n, mu.hat, Sigma.hat) q4.r.star.norm[j] <- cor(cd4.star[,1], cd4.star[,2]) } q4.b.boot.norm <- mean(q4.r.star.norm)-q4.r q4.se.boot.norm <- sd(q4.r.star.norm) q4.CI.boot.norm <- q4.r-q4.b.boot.norm-qnorm(c(0.975,0.025))*q4.se.boot.norm # 4(b) # Parametric bootstrap on transformed scale fisher <- function(r) 0.5*log((1+r)/(1-r)) fisher.inv <- function(p) (exp(2*p)-1)/(exp(2*p)+1) q4.psi.hat <- fisher(q4.r) q4.psi.star.norm <- fisher(q4.r.star.norm) q4.b.boot.psi.norm <- mean(q4.psi.star.norm)-q4.psi.hat q4.se.boot.psi.norm <- sd(q4.psi.star.norm) q4.CI.boot.psi.norm <- q4.psi.hat-q4.b.boot.psi.norm- qnorm(c(0.975,0.025))*q4.se.boot.psi.norm q4.CI.boot.rho.norm <- fisher.inv(q4.CI.boot.psi.norm) # 4(c) # Nonparametric bootstrap set.seed(20190408) inds <- matrix(sample(1:q4.n, R*q4.n, replace=TRUE), ncol=q4.n) q4.r.star.np <- rep(NA, R) for (j in 1:R) { i <- inds[j,] q4.r.star.np[j] <- cor(cd4$baseline[i], cd4$oneyear[i]) } q4.b.boot.np <- mean(q4.r.star.np)-q4.r q4.se.boot.np <- sd(q4.r.star.np) q4.CI.boot.np <- q4.r-q4.b.boot.np-qnorm(c(0.975,0.025))*q4.se.boot.np q4.psi.star.np <- fisher(q4.r.star.np) q4.b.boot.psi.np <- mean(q4.psi.star.np)-q4.psi.hat q4.se.boot.psi.np <- sd(q4.psi.star.np) q4.CI.boot.psi.np <- q4.psi.hat-q4.b.boot.psi.np- qnorm(c(0.975,0.025))*q4.se.boot.psi.np q4.CI.boot.rho.np <- fisher.inv(q4.CI.boot.psi.np)