# STATS 4CI3/6CI3 Winter 2019

# Examples done in class on March 28, and April 1 2019

# Code to plot the true distribution function and two estimates
# the parametric mle and the empirical distribution function
# for the exponential and normal cases.

plot.exp.edf <- function(x, lambda=1, eps=1e-3) {
# Function to plot the true cdf of the exponential
# distribution with rate lambda and the empirical 
# distribution function based on a sample x from
# the exponential(lambda) distribution as well as
# the parametric mle of the cdf. 
    x <- sort(x)
    n <- length(x)
    upper <- max(ceiling(max(x)+1), qexp(eps,lambda,lower=F))
    curve(pexp(x, lambda), 0,upper, col="blue")
    lambda.hat <- 1/mean(x)
    curve(pexp(x, lambda.hat), 0, upper, add=T, col="orange")
    segments(x0=c(0,x),x1=c(x,upper), y0=(0:n)/n, col="red")
    curve(pexp(x, lambda), 0,upper, col="blue", add=T, lty=2)
}

plot.exp.edf(rexp(10))
plot.exp.edf(rexp(20))
plot.exp.edf(rexp(50))
plot.exp.edf(rexp(100))

plot.norm.edf <- function(x, mu=0, sigma=1, eps=1e-3) {
# Function to plot the true cdf of the normal(mu, sigma^2)
# distribution and the empirical distribution function based
# on a sample x from the normal(mu,sigma^2) distribution as
# well as the parametric mle of the cdf. 
    x <- sort(x)
    n <- length(x)
    lower <- min(floor(min(x)-1), qnorm(eps, mu, sigma))
    upper <- max(ceiling(max(x)+1), qnorm(eps,mu, sigma,lower=F))
    curve(pnorm(x, mu, sigma), lower,upper, col="blue")
    xbar <- mean(x)
    s <- sqrt((n-1)/n)*sd(x)
    curve(pnorm(x, xbar, s), lower,upper, col="orange", add=T)
    segments(x0=c(lower,x),x1=c(x,upper), y0=(0:n)/n, col="red")
    curve(pnorm(x, mu, sigma), lower,upper, col="blue", add=T, lty=2)
}
plot.norm.edf(rnorm(10))
plot.norm.edf(rnorm(20))
plot.norm.edf(rnorm(50))
plot.norm.edf(rnorm(100))

# Example of the non-parametric bootstarp
#
# For illustration let us use an old dataset recording
# time between failures of air conditioning units on an
# airplane.

library(boot)
times <- aircondit$hours

# We shall define the statistic to be the failure rate
# We estimate this by

lambda.hat <- 1/mean(times)

# Now we draw R bootstrap samples

n <- length(times)
R <- 10000
xstar <- matrix(sample(times, R*n, replace=TRUE), ncol=n)


# Now calculate the statistic on these bootstrap samples
lambda.hat.star <- 1/rowMeans(xstar)
hist(lambda.hat.star, breaks=100, prob=TRUE)
abline(v=lambda.hat, col="red", lwd=2)
qqnorm(lambda.hat.star-lambda.hat)
qqline(lambda.hat.star-lambda.hat, col="red", lwd=2)

mean(lambda.hat.star-lambda.hat)
sd(lambda.hat.star-lambda.hat)

# Simulation Study.
# Previously we did a simulation for the parametric bootstrap
# (see code file Mar27.R)
# Let us use the same samples and statistic but now we will
# assume we do not know that the data actually come from an
# exponential distribution.

R <- 1000
bias.boot.np <- rep(NA, N)
se.boot.np <- rep(NA,N)
for (i in 1:N) {
   x.boot.np <- matrix(sample(samples[i,],R*n,replace=TRUE), ncol=n)
   lambda.star <- 1/rowMeans(x.boot.np)
   bias.boot.np[i] <- mean(lambda.star)-estimates[i]
   se.boot.np[i] <- sd(lambda.star)
}
bias.true
mean(bias.boot.np)
sd.true
mean(se.boot.np)