R/projectF.R

Defines functions .make.sym.matrix .n.to.pois .getCum .cor.NN.N .cor.NN.NN .cor.O.N .cor.P.N .cor.P.P .numless .rord .n.to.ord .getMaxMin .rnonn .n.to.nonn .testC4 .getBCD .BBsolveWrapper .fleishman .checkMoments .getMaxMinCorr find.cor.mat.star validate.cor.mat lower.upper.cors .check.cor.mat check.params .addMeanVar

Documented in check.params find.cor.mat.star lower.upper.cors validate.cor.mat

genPBONN <- function (n, cmat.star, no.pois=0, no.bin=0, no.ord=0, no.nonn=0, 
                      pois.list=list(), bin.list=list(), ord.list=list(), is.ord.list.cum=FALSE, nonn.list=list()) {
  no.total = no.pois+no.bin+no.ord+no.nonn
  if(no.total <= 0) stop("There must be at least one variable!")
  nonn.list = lapply(nonn.list, .addMeanVar)
  .check.cor.mat(no.total, cmat.star)
  if(no.ord > 0 && !is.ord.list.cum) {
    ordbin.cum.list = lapply(c(bin.list, ord.list), .getCum)
  } else {
    ordbin.cum.list =  c(lapply(c(bin.list), .getCum), ord.list)
  }
  nonn.bcd.list = lapply(nonn.list, .getBCD)
  all.list = c(pois.list,ordbin.cum.list,nonn.bcd.list)
  z = mvrnorm(n, rep(0, no.total), cmat.star)
  x = z
  for(k in 1:no.total) {
    arg = all.list[[k]]
    gen = .n.to.nonn
    if(k <= no.pois) gen = .n.to.pois
    else if(k <= no.pois+no.bin+no.ord) gen = .n.to.ord
    x[,k] = sapply(z[,k], gen, arg) 
  }
  return(x)  
}

.addMeanVar <- function(mvector) {
  if(length(mvector) > 4) stop("Only the first four moments considered for continuous variables!")
  if(length(mvector) < 2) stop("At least two moments must be specified!")
  if(length(mvector)==4) return(mvector)
  if(length(mvector)==3) return(c(0,mvector))
  return(c(0,1,mvector))
}

check.params <- function(no.pois=0, no.ordbin=0, no.nonn=0, pois.list=list(), ordbin.list=list(), nonn.list=list()) {
  check.pois <- function(no.pois, pois.list) {
    if (length(pois.list) != no.pois) {
      stop("Dimension of the Poisson list does not match the number of Poisson variables!\n")
    }
    if (!all(pois.list > 0) ) {
      stop("Poisson rate parameters must be greater than zero!\n")
    }
    return(TRUE)
  }
  check.nonn <- function(no.nonn, nonn.list) {
    if (length(nonn.list) != no.nonn) {
      stop("Dimension of the continuous list does not match the number of variables!\n")
    }
    nonn.list = lapply(nonn.list,.addMeanVar)
    if(!all(unlist(lapply(nonn.list, .checkMoments)))) {
      stop("Non-normal skewness and excess kurtosis values must follow inequality: \nexcess kurtosis >= skew^2-2, and variance must be greater than 0!\n")
    }
  }
  check.ord <- function(no.ord, ord.list) {
    if (length(ord.list) != no.ord) {
      stop("Dimension of the ordinal and/or binary list does not match \nthe number of ordinal and/or binary variables!\n")
    }
    if(no.ord > 0) {
      #Does not apply when list is cumulative
      #      if(!all(lapply(ord.list, sum)==1)) {
      #        stop("Ordinal rates must sum to 1!\n")
      #      }
      if(!all(unlist(lapply(ord.list, min) >= 0)) || !all(unlist(lapply(ord.list,max)<=1))) {
        stop("Ordinal probabilities must be between 0 and 1!\n")
      }
      if(any(unlist(lapply(ord.list, is.unsorted)))) {
        stop("Each entry of list must be cumulative! \ncumulative ordinal probabilities must be nondecreasing!")
      }
    }
  }
  check.pois(no.pois, pois.list)
  check.ord(no.ordbin, ordbin.list)
  check.nonn(no.nonn, nonn.list)
}

.check.cor.mat <- function(no.total, cor.mat) {
  if(nrow(cor.mat)!= no.total) {
    stop("Dimension of cmat.star and number of variables do not match!\n")
  }
  if(!isSymmetric(cor.mat)) {
    stop("Correlation matrix is not square symmetric!\n")
  }
  if (!is.positive.definite(cor.mat)) {
    stop("Correlation matrix is not positive definite!\n")
  }
  if (!all(cor.mat <= 1)) stop("Correlation values cannot be greater than 1! \n")
  if (!all(cor.mat >= -1)) stop("Correlation values cannot be less than -1! \n")
  if (!all(diag(cor.mat) == 1)) stop("All diagonal elements of the correlation matrix must be 1! \n")  
}

lower.upper.cors <- function(no.pois=0, no.bin=0, no.ord=0, no.nonn=0, pois.list=list(), 
                             bin.list=list(), ord.list=list(), is.ord.list.cum=FALSE, nonn.list=list()) {
  no.total = no.pois+no.bin+no.ord+no.nonn
  if(no.total <= 0) stop("There must be at least one variable!")
  if(no.ord > 0 && !is.ord.list.cum) {
    ordbin.cum.list = lapply(c(bin.list, ord.list), .getCum)
  } else {
    ordbin.cum.list =  c(lapply(bin.list, .getCum), ord.list)
  }
  nonn.list = lapply(nonn.list, .addMeanVar)
  check.params(no.pois, no.bin+no.ord, no.nonn, pois.list, ordbin.cum.list, nonn.list)
  nonn.bcd.list = lapply(nonn.list, .getBCD)
  all.list = c(pois.list,ordbin.cum.list,nonn.bcd.list)
  
  allcombos = combn(no.total, 2)
  maxMins = apply(allcombos, 2, .getMaxMin, no.pois, no.bin, no.ord, no.nonn, all.list)
  mins = .make.sym.matrix(no.total, maxMins[2,])
  maxes = .make.sym.matrix(no.total, maxMins[1,])
  return(list(min=mins, max=maxes))
}

validate.cor.mat <- function(cor.mat, no.pois=0, no.bin=0, no.ord=0, no.nonn=0, pois.list=list(), 
                             bin.list=list(), ord.list=list(), is.ord.list.cum=FALSE, nonn.list=list()) {
  no.total = no.pois+no.bin+no.ord+no.nonn
  if(no.total <= 0) stop("There must be at least one variable!")
  .check.cor.mat(no.total, cor.mat)
  if(no.ord > 0 && !is.ord.list.cum) {
    ordbin.cum.list = lapply(c(bin.list, ord.list), .getCum)
  } else {
    ordbin.cum.list =  c(lapply(c(bin.list), .getCum), ord.list)
  }
  nonn.list = lapply(nonn.list, .addMeanVar)
  check.params(no.pois, no.bin+no.ord, no.nonn, pois.list, ordbin.cum.list, nonn.list)
  .check.cor.mat(no.total, cor.mat)
  nonn.bcd.list = lapply(nonn.list, .getBCD)
  all.list = c(pois.list,ordbin.cum.list,nonn.bcd.list)
  min.max.list = lower.upper.cors(no.pois, no.bin, no.ord, no.nonn, pois.list, bin.list, ord.list, is.ord.list.cum, nonn.list)
  tf = min.max.list[[1]] <= cor.mat & cor.mat <= min.max.list[[2]]
  if(all(tf)) return(TRUE)
  entries = which(!tf)-1
  cols = entries %/% nrow(tf)+1
  rows = entries %% ncol(tf)+1
  min = min.max.list[[1]][entries+1]
  max = min.max.list[[2]][entries+1]
  for(k in 1:sum(!tf)) {
    cat(paste(paste0("Cell [", rows[k], ",", cols[k], "] is invalid, must be between"), round(min[k],2), "and", round(max[k],2),"\n"))
  }
  return(FALSE)
}

find.cor.mat.star <- function(cor.mat, no.pois=0, no.bin=0, no.ord=0, no.nonn=0, pois.list=list(), 
                              bin.list=list(), ord.list=list(), is.ord.list.cum=FALSE, nonn.list=list()) {
  no.total = no.pois+no.bin+no.ord+no.nonn
  if(no.total <= 0) stop("There must be at least one variable!")
  if(validate.cor.mat(cor.mat, no.pois, no.bin, no.ord, no.nonn, pois.list, bin.list, ord.list, is.ord.list.cum, nonn.list) != TRUE) {
    stop("Cor.mat is not valid!")
  }
  if(no.ord > 0 && !is.ord.list.cum) {
    ordbin.cum.list = lapply(c(bin.list, ord.list), .getCum)
  } else {
    ordbin.cum.list =  c(lapply(c(bin.list), .getCum), ord.list)
  }
  nonn.list = lapply(nonn.list, .addMeanVar)
  nonn.bcd.list = lapply(nonn.list, .getBCD)
  all.list = c(pois.list,ordbin.cum.list,nonn.bcd.list)
  cor.mat.star = cor.mat
  if(no.pois > 1) {
    pp.combo = combn(no.pois, 2)
    for(k in 1:ncol(pp.combo)) {
      i=pp.combo[1,k]
      j=pp.combo[2,k]
      cor.mat.star[i,j] = .cor.P.P(all.list[[i]], all.list[[j]], cor.mat[i,j])
      cor.mat.star[j,i] = cor.mat.star[i,j]
    }
  }
  if(no.bin+no.ord > 1) {
    start = no.pois+1
    end = no.bin+no.ord+no.pois
    cor.mat.star[start:end,start:end] = ordcont(all.list[start:end], cor.mat[start:end,start:end])$SigmaC
  }
  if(no.nonn > 1) {
    nonn.combo = combn(no.nonn, 2)+no.pois+no.bin+no.ord
    for(k in 1:ncol(nonn.combo)) {
      i=nonn.combo[1,k]
      j=nonn.combo[2,k]
      cor.mat.star[i,j] = .cor.NN.NN(all.list[[i]], all.list[[j]], cor.mat[i,j])
      cor.mat.star[j,i] = cor.mat.star[i,j]
    }
  }
  
  if(no.pois > 0 && no.bin+no.ord > 0) {
    po.combo = expand.grid(1:no.pois, 1:(no.bin+no.ord)+no.pois)
    for(k in 1:nrow(po.combo)) {
      i=po.combo[k,1]
      j=po.combo[k,2]
      temp = .cor.P.N(all.list[[i]]) * .cor.O.N(all.list[[j]])
      cor.mat.star[i,j] = cor.mat[i,j] / temp
      cor.mat.star[j,i] = cor.mat.star[i,j]
    }
  }
  if(no.pois > 0 && no.nonn > 0) {
    pnn.combo = expand.grid(1:no.pois, 1:no.nonn+no.bin+no.ord+no.pois)
    for(k in 1:nrow(pnn.combo)) {
      i=pnn.combo[k,1]
      j=pnn.combo[k,2]
      temp = .cor.P.N(all.list[[i]]) * .cor.NN.N(all.list[[j]])
      cor.mat.star[i,j] = cor.mat[i,j]/temp
      cor.mat.star[j,i] = cor.mat.star[i,j]
    }
  }
  if(no.nonn > 0 && no.bin+no.ord > 0) {
    onn.combo = expand.grid(1:(no.bin+no.ord)+no.pois,1:no.nonn+no.bin+no.ord+no.pois)
    for(k in 1:nrow(onn.combo)) {
      i=onn.combo[k,1]
      j=onn.combo[k,2]
      temp = .cor.O.N(all.list[[i]]) * .cor.NN.N(all.list[[j]])
      cor.mat.star[i,j] = cor.mat[i,j] / temp
      cor.mat.star[j,i] = cor.mat.star[i,j]
    }
  }
  if(any(is.na(cor.mat.star))) {
    stop("Oops! Something went wrong!")
  }
  if(!is.positive.definite(cor.mat.star)) {
    cat("Warning! \nFinal correlation matrix had to be coerced to be positive definite!")
    cor.mat.star = nearPD(cor.mat.star, corr=TRUE)$mat
    cor.mat.star = as.matrix(cor.mat.star)
  }
  return(cor.mat.star)
}

#Follows the method of Demirtas and Hedeker, 2011.
.getMaxMinCorr <- function(gen1,arg1,gen2,arg2,n=100000) {
  x=gen1(n,arg1)
  y=gen2(n,arg2)
  xs=sort(x)
  ys=sort(y)
  yd=sort(y,decreasing=TRUE)
  maxCor=cor(xs,ys,use="all.obs")
  minCor=cor(xs,yd,use="all.obs")
  return(c(maxCor,minCor))
}

.checkMoments <- function(mvector) {
  return(mvector[2] > 0 && (mvector[4] >= mvector[3]^2-2))
}

.fleishman <- function(x, r1, r2) {
  b <- x[1]
  c <- x[2]
  d <- x[3]
  f <- rep(NA, 3)
  f[1] <- b^2 + 6 * b * d + 2 * c^2 + 15 * d^2 - 1
  f[2] <- 2*c * (b^2 + 24*b*d + 105*d^2 + 2) - r1
  f[3] <- b*d + c^2 * (1 + b^2 + 28 * b * d) + d^2 * (12 + 48 * b* d + 141 * c^2 + 225 * d^2) - r2/24
  f
}

.BBsolveWrapper <- function(arg1,arg2) {
  ans=BBsolve(par=runif(3),fn=.fleishman,r1=arg1,r2=arg2,quiet=TRUE)
  if(arg1*ans$par[2] < 0) ans$par = -1*ans$par
  return(ans)
}

#Solves system of equations to find (b,c,d) using BB package and tacks on mean and variance
.getBCD <- function(rs, maxIters=100) {
  rmean = rs[1]
  rvar = rs[2]
  r1 = rs[3]
  r2 = rs[4]
  ans=.BBsolveWrapper(r1, r2)
  iters = 0
  while(iters < maxIters && (ans$convergence != 0 || !.testC4(ans$par))) {
    iters = iters + 1
    ans = .BBsolveWrapper(r1,r2)
  }
  return(c(ans$par,rmean,rvar))
}

.testC4 <- function(bcd1) {
  return(bcd1[3] > sqrt((5+7*bcd1[1]^2)/75)-2*bcd1[1]/5)
}

.n.to.nonn <- function(z, bcd1) {
  bcd4 = c(-bcd1[2],bcd1[1:3])
  y = bcd4 %*% rbind(rep(1,length(z)),z,z^2,z^3)
  return(bcd1[4]+sqrt(bcd1[5])*as.vector(y))
}

.rnonn <- function(n, bcd1) {
  z = rnorm(n)
  bcd4 = c(-bcd1[2],bcd1[1:3])
  y = bcd4 %*% rbind(rep(1,n),z,z^2,z^3)
  return(bcd1[4]+sqrt(bcd1[5])*as.vector(y))
}

.getMaxMin <- function(x, no.pois, no.bin, no.ord, no.nonn, all.list) {
  n1 = x[1]
  n2 = x[2]
  arg1 = all.list[[n1]]
  arg2 = all.list[[n2]]
  gen1 = .rnonn
  gen2 = .rnonn
  if(n1 <= no.pois) gen1 = rpois
  else if(n1 <= no.pois+no.bin+no.ord) gen1 = .rord
  if(n2 <= no.pois) gen2 = rpois
  else if(n2 <= no.pois+no.bin+no.ord) gen2 = .rord
  return(.getMaxMinCorr(gen1,arg1,gen2,arg2))
}

.n.to.ord <- function(z, cum.vec) {
  u = pnorm(z)
  return(sapply(u, .numless, cum.vec))
}

.rord <- function(n, cum.vec) {
  u = runif(n)
  return(sapply(u, .numless, cum.vec))
}

.numless <- function(u, cum.vec) {
  return(sum(cum.vec<=u))
}

.cor.P.P <- function(lambda1, lambda2, cor12, n=10000) {
  u = runif(n)
  maxcor = cor(qpois(u, lambda1), qpois(u, lambda2))
  mincor = cor(qpois(u, lambda1), qpois(1 - u, lambda2))
  a = -maxcor * mincor/(maxcor + mincor)
  b = log((maxcor + a)/a, exp(1))
  c = -a
  corrected = log((cor12 + a)/a, exp(1))/b
  corrected = ifelse((corrected > 1 | corrected < (-1)), 
                     NA, corrected)
  return(corrected)
}

.cor.P.N <- function(lambda, n=10000) {
  y = rnorm(n)
  x = pnorm(y)
  p = qpois(x, lambda)
  return(cor(p,y))
}

.cor.O.N <- function(cum.vec, n=10000) {
  x = rnorm(n)
  u = pnorm(x)
  o = sapply(u, .numless, cum.vec)-1
  return(cor(x,o))
}

.cor.NN.NN <- function(bcd1, bcd2, rho) {
  coeff=c(-rho,bcd1[1]*bcd2[1]+3*bcd1[1]*bcd2[3]+3*bcd1[3]*bcd2[1]+9*bcd1[3]*bcd2[3],2*bcd1[2]*bcd2[2],6*bcd1[3]*bcd2[3])
  y=Re(polyroot(coeff))
  tf = y<=1 & y>=-1
  if(sum(tf) > 1) {
    b = coeff%*%rbind(rep(1,3),y,y^2,y^3)
    tf = tf & abs(b) < 10^-6
  }
  if(sum(tf) > 1) {
    tf = tf & abs(y-rho)==min(abs(y-rho)) & !duplicated(y)
  }
  rhoN = y[tf]
  return(rhoN)
}

.cor.NN.N <- function(bcd) {
  return(1/(bcd[1]+3*bcd[3]))
}

.getCum <- function(p.vec) {
  cum.vec = vector(length(p.vec)-1,mode="numeric")
  for (k in 1:length(cum.vec) )
    cum.vec[k] = sum(p.vec[1:k])
  return(cum.vec)
}

.n.to.pois <- function(z, lambda) {
  u = pnorm(z)
  return(qpois(u, lambda))
}

.make.sym.matrix  = function(numVars, rhos) {
  sigma = matrix(0, numVars, numVars)
  sigma[upper.tri(sigma)] = rhos
  sigma = t(sigma) + sigma + diag(1, numVars)
  return(sigma)
}

Try the PoisBinOrdNonNor package in your browser

Any scripts or data that you put into this service are public.

PoisBinOrdNonNor documentation built on March 22, 2021, 9:06 a.m.