require(MASS)


covmat <- function(vars,cors,tol=1e-7) {
  n <- length(vars)
  cors <- rep(cors,length.out=n*(n-1)/2)
  m <- outer(sqrt(vars),sqrt(vars))
  ## is there a more efficient way to create a symmetric matrix?
  m[lower.tri(m)] <- m[lower.tri(m)]*cors
  m <- t(m)
  m[lower.tri(m)] <- m[lower.tri(m)]*cors
  if (any(eigen(m)$values<(-tol)))
    warning("non-positive definite matrix")
  m
}

strip.blanks <- function(str) {
 sub(' +$', '', sub('^ +', '', str))
}

skip.blank.lines <- function(con) {
  junk <- readLines(con,1)
  n <- 0
  while(nchar(junk)==0 || length(grep("[^ ]",junk))==0) {
    junk <- readLines(con,1)
    n <- n+1
  }
  pushBack(junk,con)
  n
}

phillips.getpmat <- function(con,n,hdrlines=1) {
  readLines(con,hdrlines) ## strip header
  lines <- readLines(con,n)
  s1 <- strsplit(lines,":")
  labs <- sapply(s1, function(z)strip.blanks(z[1]))
  vals <- t(sapply(sapply(s1,"[",2),
                   function(z)
                   as.numeric(strsplit(z,"[:(),=]")[[1]][c(1,3,5)])))
  dimnames(vals) <- list(labs,c("chi-square","df","p"))
  vals
}

phillips.getvec <- function(con,n=3,hdrlines=1) {
  readLines(con,hdrlines)
  vec = numeric()
  while(length(vec)<n) {
    readLines(con,1)
    vec = c(vec,scan(con,n=n, nlines=1,quiet = TRUE))
  }
  vec
}

phillips.getsubmat <- function(con,n=3) {
  readLines(con,1)
  mstr <- readLines(con,n=n)
  t(sapply(strsplit(mstr," +"),
         function(z)as.numeric(z[-(1:2)])))
}

phillips.getmat <- function(con,n=3,hdrlines=1) {
  readLines(con,hdrlines)
  m = matrix(nrow=n,ncol=0)
  while(ncol(m)<n) {
    m = cbind(m,phillips.getsubmat(con,n))
    skip.blank.lines(con)
  }
  m
}

phillips.getcolonval <- function(con) {
 str <- readLines(con,1)
 as.numeric(strsplit(str,":")[[1]][2])
}

run.cpc <- function(covs,npts,
                    progdir=NULL,
                    progname=NULL,
                    ansfn=NULL,datfn=NULL,
                    outfn=NULL) {
  if (is.null(progdir))
    progdir <- file.path(.path.package("cpcbp"),"exec")
  if (is.null(progname))
      progname=paste("phillips-cpc-",.Platform$OS.type,".exe",sep="")
  ngrp = length(covs)
  nvar = ncol(covs[[1]])
  npts = rep(npts,length.out=ngrp)
  outf <- file(description=datfn,open="w") ## open for  writing
  writeLines(c(paste("group",1:ngrp,sep="",collapse=" "),
               paste("var",1:nvar,sep="",collapse=" ")),con=outf)
  for (i in 1:ngrp) {
    writeLines(as.character(npts[i]),outf)
    write.table(covs[[i]],file=outf,
              row.names=FALSE,col.names=FALSE,quote=FALSE)
  }
  close(outf)
  progpath = file.path(progdir,progname)
  if (.Platform$OS.type=="unix") {
    ans <- file(description=ansfn,open="w") ## open for writing
    writeLines(c(datfn,outfn,"y","n","n",""),con=ans)
    close(ans)
    system(paste(progpath,"<",ansfn,"1> /dev/null 2>&1"))
  } else {  ## assume Windows!
    system(progpath,input=c(basename(datfn),basename(outfn),
                      "y","n","n",""))
  }
}

phillips.cpcvec <- function(x=NULL,f,covs=NULL,
                     npts=NULL,
                     progdir=NULL,
                     progname=NULL,
                     ansfn=NULL,
                     datfn=NULL,
                     outfn=NULL,
                     unlink.temp=TRUE,
                     use="complete.obs") {
  phillips.cpc(x=x,f=f,covs=covs,
               npts=npts,progdir=progdir,progname=progname,
               ansfn=ansfn,
               datfn=datfn,
               outfn=outfn,unlink.temp=unlink.temp,
               use=use)$evecs.CPC
}

phillips.cpc <- function(x=NULL,f,covs=NULL,
                     npts=NULL,
                     progdir=NULL,
                     progname=NULL,
                     ansfn=NULL,
                     datfn=NULL,
                     outfn=NULL,
                     unlink.temp=TRUE,
                     use="complete.obs") {
  if (is.null(progdir))
    progdir = file.path(.path.package("cpcbp"),"exec")
  if (is.null(progname))
      progname=paste("phillips-cpc-",.Platform$OS.type,".exe",sep="")
  tmpdirf = function(x,base) {
    if(is.null(x)) switch(.Platform$OS.type,
                          unix=tempfile(base),
                          windows=tempfile(base,tmpdir=getwd())) else x
  }
  ansfn = tmpdirf(ansfn,"cpcans")
  datfn = tmpdirf(datfn,"cpcdat")
  outfn = tmpdirf(outfn,"cpcout")
  if (is.null(covs)) {
    if (is.null(x) || missing(f))
      stop("must specify either covariances or data and grouping factor")
    datalist = split.data.frame(x,f)
    covs <- lapply(datalist,cov,use=use)
    npts <- sapply(datalist,nrow)
  } else {
    if (is.null(npts)) {
      if (is.null(x) || missing(f)) {
        stop("must specify either number of points per group or data and grouping factor")
        datalist = split.data.frame(x,f)
        npts <- sapply(datalist,nrow)
      }
    }
  }
  run.cpc(covs,npts,progdir,progname,ansfn,datfn,outfn)
  ngrp = length(covs)
  nvar = ncol(covs[[1]])
  r = read.cpc(outfn,ngrp,nvar)
  if (unlink.temp) {
    unlink(ansfn)
    unlink(outfn)
    unlink(datfn)
  }
  return(r)
}

read.cpc <- function(outfn,ngrp,nvar) {
  con <- file(outfn,open="r")
  readLines(con,4)
  ## Extract data for equality
  eqcrit <- phillips.getcolonval(con)
  eqpar <- phillips.getcolonval(con)
  skip.blank.lines(con)
  eqtestmat <- phillips.getpmat(con,n=nvar+1)
  skip.blank.lines(con)
  evals.pool <- phillips.getvec(con,n=nvar)
  skip.blank.lines(con)
  evecs.pool <- phillips.getmat(con,n=nvar)
  skip.blank.lines(con)
  cov.pool <- phillips.getmat(con,n=nvar)
  skip.blank.lines(con)
  ## Extract data for proportionality
  readLines(con,1)
  propcrit <- phillips.getcolonval(con)
  proppar <- phillips.getcolonval(con)
  skip.blank.lines(con)
  proptestmat <- phillips.getpmat(con,n=nvar)
  skip.blank.lines(con)
  evals.prop <- phillips.getvec(con,n=nvar)
  skip.blank.lines(con)
  evecs.prop <- phillips.getmat(con,n=nvar)
  skip.blank.lines(con)
  cov.prop.grps <- lapply(1:ngrp,
                          function(x) {
                            readLines(con,1)  ## skip prop const
                            m <- phillips.getmat(con,n=nvar)
                            skip.blank.lines(con)
                            m
                          })
  readLines(con,1)
  ## Extract data for common principal components
  CPCcrit <- phillips.getcolonval(con)
  CPCpar <- phillips.getcolonval(con)
  skip.blank.lines(con)
  CPCtestmat <- phillips.getpmat(con,nvar-1)
  skip.blank.lines(con)
  readLines(con,1)
  evals.cpc.grps <- lapply(1:ngrp,
                           function(x) {
                             m <- phillips.getvec(con,n=nvar)
                             skip.blank.lines(con)
                             m
                           })
  skip.blank.lines(con)
  evecs.CPC <- phillips.getmat(con,n=nvar)
  rest <- readLines(con)
  lastline <- rest[length(rest)]
  s = strsplit(lastline," ")[[1]]
  cpc1.pval <- as.numeric(s[length(s)])
  close(con)
  return(list(evecs.CPC=evecs.CPC,cpc1.pval=cpc1.pval))
}

bpmat <- function(evec) {
  return(diag(length(evec)) - evec %*% t(evec))
}

bpfun <- function(x,evec=NULL,f) {
  if (is.null(evec)) {
    if (missing(f)) stop("must specify either CPC1 or a grouping factor")
    evec =   cpc.options()$cpcvecfun(x,f)[,1]
  }
  t(bpmat(evec) %*% t(x))
}

bp.means <- function(x,f,center=FALSE,
                     byvar=FALSE) {
  x <- x[complete.cases(x),]
  cpcvecfun = cpc.options()$cpcvecfun
  cpc.evecs = cpcvecfun(x,f)
  cpc1 = cpc.evecs[,1]
  if (length(levels(f))>2 && !byvar)
    stop("can't do differences between vars with more than two groups")
  if (center) {
      ctr = colMeans(x)
      x = meancorrect(x)
    } else {
      ctr = rep(0,ncol(x))
    }
  bpx = bpfun(x,cpc1)
  datalist = split.data.frame(bpx,f)
  m.bp = t(sapply(datalist,colMeans))
  bpe <- bp.error(x,f,cpcmat=cpc.evecs,byvar=byvar)
  varm <- t(sapply(datalist,apply,2,function(x){var(x)/length(x)}))
  if (!byvar) {
    varm=colSums(varm)
    m.bp = m.bp[2,]-m.bp[1,]
    label1="meandiffs"
  } else {
    m.bp = t(apply(m.bp,1,"+",ctr))
    label1="groupmeans"
  }
  tote = sqrt(varm+bpe)
  L = list(m.bp,sqrt(varm),tote)
  names(L) = c(label1,"sd.raw","sd.corr")
  L
}

pooled.cpc <- function (x, f, use="complete.obs") 
{
  datalist = split.data.frame(as.data.frame(x),f)
  Xsc <- do.call("rbind", lapply(datalist, scale, scale = FALSE))
  eigen(cov(Xsc,use=use))$vectors
}

calc.cpcerr <- function(x,f,
                        cpcmat=NULL,
                        calc.cov=TRUE,
                        use="complete.obs") {
  cpcvecfun = cpc.options()$cpcvecfun
  ngrp = length(levels(f))
  ngrpn = table(f)
  nvar = ncol(x)
  N = nrow(x)
  datalist = split.data.frame(x,f)
  e <- lapply(datalist,function(z) {eigen(cov(z,use=use))$values})
  theta.hat <- matrix(0,nrow=nvar,ncol=nvar)
  for (i in 1:ngrp) { ## fancy: uses "outer"
    theta.i = outer(e[[i]],e[[i]],
      function(x,y) { (x*y)/(x-y)^2})*(N/ngrpn[i])
    theta.hat = theta.hat + 1/theta.i
  }
  theta.hat = 1/theta.hat ## calc. harmonic mean
  if (is.null(cpcmat))
    cpcmat <- cpcvecfun(x,f) ## get common principal components
  ## fancy: sum with j!=h is equivalent to matrix mult. with
  ## diag(theta)==0
  diag(theta.hat) <- 0
  if (!calc.cov) {
    ## VARIANCES of all elements with themselves
    return(1/N*(cpcmat^2 %*% theta.hat))
  } else {
    ## VAR-COV matrix of eig. 1 only
    covarr <- matrix(0,nrow=nvar,ncol=nvar)
    for (j in 1:nvar) {
      covarr <- covarr + theta.hat[1,j]*(cpcmat[,j] %*% t(cpcmat[,j]))
    }
    return(covarr/N)
  }
}

bp.error <- function(x,f,
                     cpcmat=NULL,m=NULL,eigvar=NULL,
                     use="complete.obs",
                     debug=FALSE,byvar=FALSE) {
  crossterms=TRUE
  cpcvecfun = cpc.options()$cpcvecfun
  negtol=cpc.options()$neg.bpvar.tol
  datalist=split.data.frame(x,f)
  nvar = ncol(x)
  if (is.null(cpcmat)) cpcmat <- cpcvecfun(x,f)
  if (is.null(m)) m <- t(sapply(datalist,apply,2,mean))
  if (!byvar) {
    ## find *total* deviation in each trait (ignore NAs)
    totdev <- matrix(apply(abs(m),2,sum,na.rm=TRUE,drop=FALSE))
  } else {
    totdev <- t(m)
  }
  if (is.null(eigvar)) 
    eigvar <- calc.cpcerr(x,f,calc.cov=TRUE,
                          cpcmat=cpcmat)
  bpvar <- matrix(nrow=nvar,ncol=nvar)
  cpcv1 <- cpcmat[,1] ## principal eigenvector
  for (i in 1:nvar) {
    for (j in 1:nvar) {
      ## fixed bug: square product
      ## added cov. term
      ## fixed bug: transposed cpcmat correctly
      bpvar[i,j] <- (cpcv1[i]*cpcv1[j])^2*
        (eigvar[i,i]/cpcv1[i]^2+eigvar[j,j]/cpcv1[j]^2)+
          2*cpcv1[i]*cpcv1[j]*eigvar[i,j]
    }
  }
  ##  if (return.bpvar) return(bpvar)
  bpcov = array(dim=c(nvar,nvar,nvar))
  ## (i,j,k) = covariance of b_ij with b_ik
  for (i in 1:nvar) {
    for (j in 1:(nvar-1)) {
      for (k in (j+1):nvar) {
        bpcov[i,j,k] = 2*cpcv1[i]*cpcv1[j]*eigvar[i,k]+
          2*cpcv1[i]*cpcv1[k]*eigvar[i,j]+
            cpcv1[i]^2*eigvar[j,k]+
              cpcv1[j]*cpcv1[k]*eigvar[i,i]
        if (crossterms)
           bpcov[i,j,k] = bpcov[i,j,k] -
             cpcv1[i]*cpcv1[j]*eigvar[i,k] -
               cpcv1[i]*cpcv1[k]*eigvar[i,j] -
                 eigvar[i,j]*eigvar[i,k]
        ##        if (debug) cat(i,j,k,bpcov[i,j,k],"\n")
      }
    }
  }
  if (debug) print(bpcov)
  bperr <- bpvar %*% totdev^2
  for (i in 1:nvar) {
    for (j in 1:(nvar-1))
      for (k in (j+1):nvar) {
        covterm <- 2*totdev[j,]*totdev[k,]*bpcov[i,j,k]
        if (debug) cat(i,j,k,bperr,covterm,"\n")
        bperr[i,] <- bperr[i,]+covterm
      }
  }
  if (any(bperr<(-negtol))) warning("estimated BP variance <0:",min(bperr))
  if (debug) cat(dim(bperr),"\n",names(dat),"\n",colnames(x),"\n")
  indiv.errs <- FALSE
  if (indiv.errs) {
    rownames(bperr) <- names(dat)
    colnames(bperr) <- colnames(x)
  } else {
    rownames(bperr) <- colnames(x)
  }
  drop(t(bperr))
}

bp.anova <- function(x,f,verbose=FALSE,
                     use="complete.obs",
                     debug=FALSE) {
  cpcvecfun = cpc.options()$cpcvecfun
  nvar <- ncol(x)
  datalist = split.data.frame(x,f)
  ngrpn <- as.numeric(table(f))
  N <- sum(ngrpn)
  if (sd(ngrpn)>0) warning("haven't yet figured out bp.anova with unbalanced groups")
  ngrpn = ngrpn[1]
  cpc.evecs = cpcvecfun(x,f)
  bpssq <- bp.error(x,f,cpcmat=cpc.evecs,
                    use=use,debug=debug)*ngrpn^2
  e11 = cpc.evecs[,1]
  bpx = lapply(datalist,bpfun,e11)
  X <- matrix(ncol=nvar,nrow=N)
  alist <- list()
  for (i in 1:nvar) {
    ## flatten back-projected variables from different groups
    X[,i] <- do.call("c",lapply(bpx,function(z)z[,i]))
    a <- anova(lm(X[,i] ~ f))
    ## add bp error to sum of squares
    a$"Sum Sq"[2] <- a$"Sum Sq"[2] + bpssq[i]
    a$"Mean Sq"[2] <- a$"Sum Sq"[2]/a$"Df"[2]
    a$"F value"[1] <- a$"Mean Sq"[1]/a$"Mean Sq"[2]
    a$"Pr(>F)"[1] <- pf(a$"F value"[1],
                        a$"Df"[1],
                        a$"Df"[2],lower.tail=FALSE)
    alist[[i]] <- a
  }
  names(alist) <- colnames(x)
  if (!verbose) alist else
  list(alist=alist,bp=X,cpc.evecs=cpc.evecs)
}

plot.multigrp <- function(x,f,vars=1:ncol(x),
  cols=1:ncol(x),eqsc=FALSE,xlim=NULL,ylim=NULL,...) {
  ngrp=length(levels(f))
  xlist = split.data.frame(as.data.frame(x),f)
  npts = as.numeric(table(f))
  x = x[,vars]
  if (length(vars)==2) {
    plot(x,col=cols[f],xlim=xlim,ylim=ylim,...)
  } else {
    pairs(x,col=cols[f],xlim=xlim,ylim=ylim,...)
  }
}

plot.dat.theor <- function(x,f,vars=1:2,cols=1:length(levels(f)),theor,
                           lines=TRUE,ellipses=TRUE,...) {
  if (ellipses) require(ellipse)
  plot(x[,vars[1]],x[,vars[2]],
       xlab=colnames(x)[vars[1]],
       ylab=colnames(x)[vars[2]],col=cols[f],...)
  points(theor$mean[,vars[1]],theor$mean[,vars[2]],pch=16,col=cols)
  slope=theor$eigs[vars[2],1]/theor$eigs[vars[1],1]
  int = theor$mean[,vars[2]]-slope*theor$mean[,vars[1]]
  if (lines) mapply(function(int,col) abline(a=int,b=slope,col=col), int,cols)
  if (ellipses) invisible(mapply(function(ctr,col) {
    lines(ellipse(theor$varcov,
                  centre=as.numeric(ctr)),
                  col=col)},
                                 split.data.frame(theor$mean[,vars],1:2),
                                 as.list(cols)))
}

sim.theor <- function(vars=c(10,10,10),cors=.8,
                    m1a=rep(0,length(vars)),
                    offset=1, offset2=0) {
  ndim <- length(vars)
  VC0 <- covmat(vars,cors)
  e0 <- eigen(VC0)
  offset = c(0,offset)
  offset2 = c(0,offset2)
  ngrp = length(offset)
  meanvals = t(mapply(function(O1,O2) {
    m1a+O1*e0$vectors[,1]+O2*e0$vectors[,2]
  },offset,offset2))
  list(mean=meanvals,varcov=VC0,eigs=e0$vec)
}
  
simdata <- function(vars=c(10,10,10),cors=.8,npts=200,
                    seed=NULL,m1a=rep(0,length(vars)),
                    offset=1, offset2=0) {
  ndim <- length(vars)
  VC0 <- covmat(vars,cors)
  e0 <- eigen(VC0)
  offset = c(0,offset)
  offset2 = c(0,offset2)
  ngrp = length(offset)
  npts = rep(npts,ngrp)
  if (!is.null(seed)) set.seed(seed)
  x = do.call("rbind",mapply(function(n,O1,O2) {
    mvrnorm(n,mu=m1a+O1*e0$vectors[,1]+O2*e0$vectors[,2],Sigma=VC0)
  },npts,offset,offset2,SIMPLIFY=FALSE))
  data.frame(x,f=factor(rep(1:length(npts),npts)))
}

meancorrect <- function(x) {
  scale(x,scale=FALSE,center=colMeans(x,na.rm=TRUE))
}

coverfun <- function(p,alpha=0.05,one.tailed=FALSE,na.rm=TRUE) {
  if (na.rm) p <- p[!is.na(p)]
  if (!one.tailed) {
    return(sum(p>alpha/2 & p<(1-alpha/2))/length(p))
  } else {
    return(sum(p>alpha)/length(p))
  }
}

.cpc.options = list(cpcvecfun=phillips.cpcvec,
  neg.bpvar.tol = 5e-3)

cpc.options <- function(cpcvecfun,neg.bpvar.tol) {
  if (!missing(cpcvecfun)) { .cpc.options$cpcvecfun = cpcvecfun }
  if (!missing(neg.bpvar.tol)) { .cpc.options$neg.bpvar.tol = neg.bpvar.tol }
  invisible(.cpc.options)
}

