R/mvdistributions.R

Defines functions cor2cov rmvnorm .fixInf rmvzinegbin .zinegbin_getK .zinegbin_getP .zinegbin_getLam rmvnegbin .negbin_getK rmvpois rmvzipois .zipois_getP .zipois_getLam rzipois

Documented in cor2cov rmvnegbin rmvnorm rmvpois rmvzinegbin rmvzipois rzipois

#' Draw samples from a zero-inflated poisson distribution
#'
#' @param n the number of samples to draw
#' @param lambda The poisson rate parameter
#' @param pstr0 probability of drawing a zero
#' @return Poisson counts of length \eqn{n}
#' @importFrom stats qpois dpois runif
#' @export
rzipois <- function(n, lambda, pstr0 = 0) {
    ans <- rpois(n, lambda)
    ans <- ifelse(runif(n) < pstr0, 0, ans)
    prob0 <- exp(-lambda)
    deflat.limit <- -1/expm1(lambda)
    ind0 <- (deflat.limit <= pstr0) & (pstr0 < 0)
    if (any(ind0)) {
        pobs0 <- pstr0[ind0] + (1 - pstr0[ind0]) * pstr0[ind0]
        ans[ind0] <- qpois(p = runif(sum(ind0),
                    min = dpois(0, lambda[ind0])), lambda[ind0])
        ans[ind0] <- ifelse(runif(sum(ind0)) < pobs0, 0, ans[ind0])
    }
    ans[pstr0 < deflat.limit] <- NaN
    ans[pstr0 > 1] <- NaN
    ans
}


#' @noRd
#' @keywords internal
.zipois_getLam <- function(mu, S) {
    S <- max(sqrt(mu), S)
    (S^2/mu) + mu - 1
}

#' @noRd
#' @keywords internal
.zipois_getP <- function(mu, S) {
    S <- max(sqrt(mu), S)
    (S^2 - mu) / (mu^2 - mu + S^2)
}

#' Generate multivariate, Zero-inflated poisson data,
#' with counts approximately correlated according to Sigma
#'
#' @param n number of samples to draw
#' @param mu mean vector for variables (of length \eqn{D})
#' @param Sigma \eqn{DxD} covariance or correlation matrix
#' @param lambdas supply rate parameter (instead of mu)
#' @param ps probability of zeros (instead of mu)
#' @param ... arguments passed to \code{VGAM::qzipois}
#' @return \eqn{Dxn} matrix with zi-poisson data
#' @importFrom VGAM qzipois
#' @export
rmvzipois <- function(n, mu, Sigma=diag(length(mu)), lambdas, ps, ...) {
    d   <- ncol(Sigma)
    Cor <- cov2cor(Sigma)
    SDs <- sqrt(diag(Sigma))

    if (missing(lambdas) || missing(ps)) {
        if (missing(mu)) stop("Need to supply mu")
        if (length(mu) != length(SDs)) stop("Sigma and mu dimensions don't match")
        lambdas <- unlist(lapply(1:length(SDs), function(i) .zipois_getLam(mu[i], SDs[i])))
        ps   <- unlist(lapply(1:length(SDs), function(i) .zipois_getP(mu[i], SDs[i])))
    }
    if (length(lambdas) != length(SDs)) stop("Sigma and mu/lambdas dimensions don't match")
    if (length(lambdas) == 1) stop("Need more than 1 variable")

    normd  <- rmvnorm(n, rep(0, d), Cor)
    unif   <- pnorm(normd)
    data <- t(matrix(VGAM::qzipois(t(unif), lambdas, pstr0=ps, ...), d, n))
    data <- .fixInf(data)
    return(data)
}


#' Generate multivariate poisson data,
#' with counts approximately correlated according to Sigma
#'
#' @param n number of samples to draw
#' @param mu mean vector for variables (of length \eqn{D})
#' @param Sigma \eqn{DxD} covariance or correlation matrix
#' @param ... Arguments passed to \code{qpois}
#' @return \eqn{Dxn} matrix with zi-poisson data
#' @importFrom stats qpois
#' @export
rmvpois <- function(n, mu, Sigma, ...) {
    Cor <- cov2cor(Sigma)
    SDs <- sqrt(diag(Sigma))
    d   <- length(SDs)
    if (length(mu) != length(SDs)) stop("Sigma and mu/lambdas dimensions don't match")
    if (length(mu) == 1) stop("Need more than 1 variable")
    normd  <- rmvnorm(n, rep(0, d), Cor)
    unif   <- pnorm(normd)
    data <- t(matrix(qpois(t(unif), mu, ...), d, n))
    data <- .fixInf(data)
    return(data)
}

#' @noRd
#' @keywords internal
.negbin_getK <- function(mu, S) {
    S <- max(sqrt(mu), S)
    mu^2/((S^2+1e-3)-mu)
}

#' Generate multivariate, Zero-inflated negative binomial data,
#' with counts approximately correlated according to Sigma
#'
#' @param n number of samples to draw
#' @param mu mean vector for variables (of length \eqn{D})
#' @param Sigma \eqn{DxD} covariance or correlation matrix
#' @param ks shape parameter
#' @param ... other arguments to the negative binomial distribution
#' @return \eqn{Dxn} matrix with zi-poisson data
#' @importFrom stats qnbinom
#' @export
rmvnegbin <- function(n, mu, Sigma, ks, ...) {
# Generate an NxD matrix of Zero-inflated poisson data,
# with counts approximately correlated according to Sigma
    Cor <- cov2cor(Sigma)
    SDs <- sqrt(diag(Sigma))
    if (missing(mu)) stop('mu is required')
    if (length(mu) != length(SDs)) stop("Sigma and mu dimensions don't match")
    if (missing(ks)) {
        ks   <- unlist(lapply(1:length(SDs), function(i) .negbin_getK(mu[i], SDs[i])))
    }
    d   <- length(mu)
    normd  <- rmvnorm(n, rep(0, d), Sigma=Cor)
    unif   <- pnorm(normd)
    data <- t(qnbinom(t(unif), mu=mu, size=ks, ...))
    data <- .fixInf(data)
    return(data)
}


#' @noRd
#' @keywords internal
.zinegbin_getLam <- function(mu, S) {
    S   <- max(sqrt(mu)+1e-3, S)
    (mu + (mu^2 - mu + S^2) / mu) / 2
}

#' @noRd
#' @keywords internal
.zinegbin_getP <- function(mu, lam) {
    1 - (mu / lam)
}

#' @noRd
#' @keywords internal
.zinegbin_getK <- function(mu, S, lam) {
    S   <- max(sqrt(mu)+1e-3, S)
    (mu * lam) / (mu^2 - (mu * (lam + 1)) + S^2)
}


#' Generate multivariate, negative binomial data,
#' with counts approximately correlated according to Sigma
#'
#' @param n number of samples to draw
#' @param mu mean vector for variables (of length \eqn{D})
#' @param Sigma \eqn{DxD} covariance or correlation matrix
#' @param ps probability of zero inflation
#' @param munbs Rate/mean parameter (instead of mu)
#' @param ks shape parameter
#' @param ... other arguments to the negative binomial distribution
#' @return \eqn{Dxn} matrix with zi-poisson data
#' @importFrom VGAM qzinegbin
#' @export
rmvzinegbin <- function(n, mu, Sigma, munbs, ks, ps, ...) {
# Generate an NxD matrix of Zero-inflated poisson data,
# with counts approximately correlated according to Sigma
    Cor <- cov2cor(Sigma)
    SDs <- sqrt(diag(Sigma))
    if (missing(munbs) || missing(ps) || missing(ks)) {
        if (length(mu) != length(SDs)) stop("Sigma and mu dimensions don't match")
        munbs <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getLam(mu[i], SDs[i])))
        ps   <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getP(mu[i], munbs[i])))
        ks   <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getK(mu[i], SDs[i], munbs[i])))
    }
    if (length(munbs) != length(SDs)) stop("Sigma and mu dimensions don't match")
    d   <- length(munbs)
    normd  <- rmvnorm(n, rep(0, d), Sigma=Cor)
    unif   <- pnorm(normd)
    data <- t(matrix(VGAM::qzinegbin(t(unif), munb=munbs, size=ks, pstr0=ps, ...), d, n))
    data <- .fixInf(data)
    return(data)
}



#' @keywords internal
.fixInf <- function(data) {
    # hacky way of replacing infinite values with the col max + 1
    if (any(is.infinite(data))) {
       data <-  apply(data, 2, function(x) {
              if (any(is.infinite(x))) {
                   x[ind<-which(is.infinite(x))] <- NA
                   x[ind] <- max(x, na.rm=TRUE)+1
                 }
                x
                })
    }
    data
}


#' Draw samples from multivariate, correlated normal distribution
#' with counts correlated according to Sigma
#'
#' @param n number of samples to draw
#' @param mu mean vector for variables (of length \eqn{D})
#' @param Sigma \eqn{DxD} covariance or correlation matrix
#' @param tol numerical tolerance for a zero eigenvalue (check for PD of Sigma)
#' @param empirical is Sigma the empirical correlation?
#' @return \eqn{Dxn} matrix with Gaussian data
#' @export
rmvnorm <- function(n=100, mu=rep(0,10), Sigma=diag(10), tol=1e-6, empirical=TRUE) {
    p <- length(mu)
    if (!all(dim(Sigma) == c(p, p)))
        stop("incompatible arguments")
    eS <- eigen(Sigma, symmetric = TRUE)
    ev <- eS$values
    if (!all(ev >= -tol * abs(ev[1L])))
        stop("'Sigma' is not positive definite")
    X <- matrix(rnorm(p * n), n, p)
    if (empirical) {
        X <- scale(X, TRUE, FALSE)
        X <- X %*% svd(X, nu = 0, nv = length(mu))$v
        X <- scale(X, FALSE, TRUE)
    }
    X <- drop(mu) + eS$vectors %*% diag(sqrt(pmax(ev, 0)), p) %*% t(X)
    return(t(X))
}

#' Convert a symmetric correlation matrix to a covariance matrix
#' given the standard deviation
#'
#' @param cor a symmetric correlation matrix
#' @param sds standard deviations of the resulting covariance.
#' @return Covariance matrix of sample dimension as cor
#' @export
cor2cov <- function(cor, sds) {
    if (length(sds) != length(diag(cor))) stop("inputs are of mismatched dimension")
    cor * sds * rep(sds, each=nrow(cor))
}
zdk123/SpiecEasi documentation built on Oct. 20, 2023, 6:49 a.m.