# STATS 4CI3/6CI3 Winter 2019 # R code for solutions to Assignment 1 # Question 1(a) sum.geom <- function(x,r,n) { # # Function to give finite or infinite sums of # a geometric series. # Arguments: # x: the first term in the series # r: the ratio between successive terms # n: the number of terms can be a positive integer for # finite sums or Inf for infinite sums. # if (n<=0) stop("n must be postive") if (x==0) return(0) if (r==0) return(x) if (is.finite(n)) { if (n!=floor(n)) { n <- floor(n) warning(paste("Non-integer n rounded down to",n)) } if (r != 1) return(x*(1-r^n)/(1-r)) else return(n*x) } else { if (r >= 1) return(Inf) else if (r <= -1) return(NaN) else return(x/(1-r)) } } # Question 1(b) choose.rec <- function(n,r) { # # Recursive function to calculate the number of ways to # choose r objects from n # if (n != floor(n)) { warning("non-integer n rounded down") n <- floor(n) } if (r != floor(r)) { warning("non-integer r rounded down") r <- floor(r) } if (n==0) stop("n must be greater than 0") if (n=alpha) return(model=lm(y~1), Results=Results$Stage1) # Now the first is the added variable provided we did # not exit the function at the last line. v.add <- Results$Stage1[1,1] k <- 1 continue <- T # Now we use a while loop for the remaining stages while(continue){ # The current data for the model is set up # Also we update the potential variables to be added names.mod <- c(names(Data.model), names.all[v.add]) Data.model <- cbind(Data.model,X[,v.add]) names(Data.model) <- names.mod names <- names[Vars!=v.add] Vars <- Vars[Vars!=v.add] k <- k+1 # The Stage Number j <- 0 # A counter for the variables tried at this stage Stage <- paste("Stage",k,sep="") Results[[Stage]] <- matrix(NA, ncol=5, nrow=length(Vars)) for (i in Vars){ # Loop over the potential variables j <- j+1 Data.fit <- cbind(Data.model,X[,i]) fit <- coef(summary(lm(y~., data=Data.fit))) Results[[Stage]][j,] <- c(i,fit[k+1,]) } # Order the results for this stage and select # the variable with the smallest p-value Results[[Stage]] <- Results[[Stage]][order(Results[[Stage]][,5]),] v.add <- Results[[Stage]][1,1] # Decide if any variable will be added continue <- Results[[Stage]][1,5] < alpha } # Now summarize all of the results Res <- t(sapply(Results, function(x) x[1,])[,-k]) v.add <- names.all[Res[,1]] Summary <- data.frame(v.add, Res) names(Summary) <- c("Variable", "Index", "Estimate", "Std Error", "t stat", "p-value") row.names(Summary) <- paste("Stage",1:nrow(Res)) # Get the final fitted model and print it out. fit <- lm(y~., data=Data.model) cat("Final Fitted Model is:\n") print(fit) # Return the model summary of stages and full results. invisible(list(model=fit, Summary=Summary, Results=Results)) } # Question 3(b) dice.gen <- function(n) { # Function to generate n random observations of the total in a # roll of two fair dice. u <- runif(n) x <- rep(7,n) x[u<=15/36] <- 6 x[u<=10/36] <- 5 x[u<=6/36] <- 4 x[u<=3/36] <- 3 x[u<=1/36] <- 2 x[u>21/36] <- 8 x[u>26/36] <- 9 x[u>30/36] <- 10 x[u>33/36] <- 11 x[u>35/36] <- 12 x } set.seed(1022019) dice.out <- dice.gen(360000) rbind(table(dice.out),10000*c(1:6,5:1)) # Question 4(a) box.muller <- function(n, mu=0, sigma=1) { # # Implementation of the Box-Muller algorithm to # generate normal random variates. # # Arguments: # n - required sample size # mu - mean (default 0) # sigma - the standard deviation (default 1) # if ((n>0) && (n%%2==0)) m <- n/2 else if ((n>0) && (n%%2==1)) m <- (n+1)/2 else stop("n must be a postive integer") U1 <- runif(m) U2 <- runif(m) temp <- sqrt(-2*log(U1)) Y1 <- temp*sin(2*pi*U2) Y2 <- temp*cos(2*pi*U2) mu + sigma*c(Y1,Y2)[1:n] } # Exercise 4(b) rand.geom <- function(n, p) { # # Function to generate a random sample from the geometric distribution. # Arguments: # n: the required sample size # p: the success probability of the geometric distribution # if ((p<=0) | (p>=1)) stop("Success probability must be in the interval (0,1)") if (n<1) stop("n must be a positive integer") U <- runif(n) floor(log(U)/log(1-p)) } # Exercise 5(a) rchi.1 <- function(n, r) { # Function to generate chi-squared random variables with # even degrees of freedom. # The arguments are the required sample size and the # degrees of freedom. if (r%%2 != 0) stop("Only even degrees of freedom allowed") U <- matrix(runif(r/2*n), nrow=n) X <- -2*log(U) rowSums(X) } # Question 5(b) rchi.2 <- function(n, r) { #Function to generate chi-squared random variables with # any degrees of freedom. # The arguments are the required sample size and the # degrees of freedom. Z <- matrix(box.muller(n*r), nrow=n) rowSums(Z^2) } # Question 5(c) rchisq.nc <- function(n, k, lambda) { # # Function to generate from a non-central chi-squared # distribution with degrees of freedom k and non-centrality # parameter lambda. # # Arguments # n: number of random variates to generate # k: degrees of freedom # lambda: non-centrality parameter # # Ensure that k is positive if (k<=0) error("Degrees of Freedom must be positive") # Make sure the non-centrality parameter is non-negative. if (lambda < 0) error("Non-centrality parameter cannot be negative") # Generate the Poisson random variates Y <- rpois(n, lambda/2) # Now generate from the appropriate central chi-squared # distributions. Note that here we are using the fact that # rchisq is vectorized in its parameters. rchisq(n, k+2*Y) }