# STATS 4CI3/6CI3 Winter 2019 # Examples done in class on March 28, and April 1 2019 # Code to plot the true distribution function and two estimates # the parametric mle and the empirical distribution function # for the exponential and normal cases. plot.exp.edf <- function(x, lambda=1, eps=1e-3) { # Function to plot the true cdf of the exponential # distribution with rate lambda and the empirical # distribution function based on a sample x from # the exponential(lambda) distribution as well as # the parametric mle of the cdf. x <- sort(x) n <- length(x) upper <- max(ceiling(max(x)+1), qexp(eps,lambda,lower=F)) curve(pexp(x, lambda), 0,upper, col="blue") lambda.hat <- 1/mean(x) curve(pexp(x, lambda.hat), 0, upper, add=T, col="orange") segments(x0=c(0,x),x1=c(x,upper), y0=(0:n)/n, col="red") curve(pexp(x, lambda), 0,upper, col="blue", add=T, lty=2) } plot.exp.edf(rexp(10)) plot.exp.edf(rexp(20)) plot.exp.edf(rexp(50)) plot.exp.edf(rexp(100)) plot.norm.edf <- function(x, mu=0, sigma=1, eps=1e-3) { # Function to plot the true cdf of the normal(mu, sigma^2) # distribution and the empirical distribution function based # on a sample x from the normal(mu,sigma^2) distribution as # well as the parametric mle of the cdf. x <- sort(x) n <- length(x) lower <- min(floor(min(x)-1), qnorm(eps, mu, sigma)) upper <- max(ceiling(max(x)+1), qnorm(eps,mu, sigma,lower=F)) curve(pnorm(x, mu, sigma), lower,upper, col="blue") xbar <- mean(x) s <- sqrt((n-1)/n)*sd(x) curve(pnorm(x, xbar, s), lower,upper, col="orange", add=T) segments(x0=c(lower,x),x1=c(x,upper), y0=(0:n)/n, col="red") curve(pnorm(x, mu, sigma), lower,upper, col="blue", add=T, lty=2) } plot.norm.edf(rnorm(10)) plot.norm.edf(rnorm(20)) plot.norm.edf(rnorm(50)) plot.norm.edf(rnorm(100)) # Example of the non-parametric bootstarp # # For illustration let us use an old dataset recording # time between failures of air conditioning units on an # airplane. library(boot) times <- aircondit$hours # We shall define the statistic to be the failure rate # We estimate this by lambda.hat <- 1/mean(times) # Now we draw R bootstrap samples n <- length(times) R <- 10000 xstar <- matrix(sample(times, R*n, replace=TRUE), ncol=n) # Now calculate the statistic on these bootstrap samples lambda.hat.star <- 1/rowMeans(xstar) hist(lambda.hat.star, breaks=100, prob=TRUE) abline(v=lambda.hat, col="red", lwd=2) qqnorm(lambda.hat.star-lambda.hat) qqline(lambda.hat.star-lambda.hat, col="red", lwd=2) mean(lambda.hat.star-lambda.hat) sd(lambda.hat.star-lambda.hat) # Simulation Study. # Previously we did a simulation for the parametric bootstrap # (see code file Mar27.R) # Let us use the same samples and statistic but now we will # assume we do not know that the data actually come from an # exponential distribution. R <- 1000 bias.boot.np <- rep(NA, N) se.boot.np <- rep(NA,N) for (i in 1:N) { x.boot.np <- matrix(sample(samples[i,],R*n,replace=TRUE), ncol=n) lambda.star <- 1/rowMeans(x.boot.np) bias.boot.np[i] <- mean(lambda.star)-estimates[i] se.boot.np[i] <- sd(lambda.star) } bias.true mean(bias.boot.np) sd.true mean(se.boot.np)