R/simbinPROBIT.R

Defines functions simbinPROBIT

Documented in simbinPROBIT

#' Generating Correlated Binary Data using the Multivariate Probit Method.
#' @param mu a mean vector when \code{n = 1} or is \code{NULL}, otherwise a list of mean vectors for the \code{n} clusters
#' @param Sigma a correlation matrix when \code{n = 1} or is \code{NULL}, otherwise a list of correlation matrices for the \code{n} clusters
#' @param n number of clusters. The default is \code{1}
#' @keywords cluster-randomized-trials correlated-binary-data conditional-linear-family
#' @author Hengshi Yu <hengshi@umich.edu>, Fan Li <fan.f.li@yale.edu>, Paul Rathouz <paul.rathouz@austin.utexas.edu>, Elizabeth L. Turner <liz.turner@duke.edu>, John Preisser <jpreisse@bios.unc.edu>
#' @description simbinPROBIT generates correlated binary data using the multivariate Probit method
#' (Emrich and Piedmonte, 1991). It simulates a vector of binary outcomes according the specified marginal
#' mean vector and correlation structure. Constraints and compatibility between the marginal mean and
#' correlation matrix are checked.
#'
#' @references
#'
#' Emrich, L. J., & Piedmonte, M. R. (1991). A method for generating high-dimensional multivariate binary variates.
#' The American Statistician, 45(4), 302-304.
#'
#' Preisser, J. S., Qaqish, B. F. (2014). A comparison of methods for simulating correlated binary variables with specified marginal means
#' and correlations. Journal of Statistical Computation and Simulation, 84(11), 2441-2452.
#'
#' @export
#' @examples
#'
#' #####################################################################
#' # Simulate 2 clusters, 3 periods and cluster-period size of 5 #######
#' #####################################################################
#'
#' t <- 3
#' n <- 2
#' m <- 5
#'
#' # means of cluster 1
#' u_c1 <- c(0.4, 0.3, 0.2)
#' u1 <- rep(u_c1, c(rep(m, t)))
#' # means of cluster 2
#' u_c2 <- c(0.35, 0.25, 0.2)
#' u2 <- rep(u_c2, c(rep(m, t)))
#'
#' # List of mean vectors
#' mu <- list()
#' mu[[1]] <- u1
#' mu[[2]] <- u2
#' # List of correlation matrices
#'
#' ## correlation parameters
#' alpha0 <- 0.03
#' alpha1 <- 0.015
#' rho <- 0.8
#'
#' ## (1) exchangeable
#' Sigma <- list()
#' Sigma[[1]] <- diag(m * t) * (1 - alpha1) + matrix(alpha1, m * t, m * t)
#' Sigma[[2]] <- diag(m * t) * (1 - alpha1) + matrix(alpha1, m * t, m * t)
#' y_exc <- simbinPROBIT(mu = mu, Sigma = Sigma, n = n)
#'
#' ## (2) nested exchangeable
#' Sigma <- list()
#' cor_matrix <- matrix(alpha1, m * t, m * t)
#' loc1 <- 0
#' loc2 <- 0
#' for (t in 1:t) {
#'     loc1 <- loc2 + 1
#'     loc2 <- loc1 + m - 1
#'     for (i in loc1:loc2) {
#'         for (j in loc1:loc2) {
#'             if (i != j) {
#'                 cor_matrix[i, j] <- alpha0
#'             } else {
#'                 cor_matrix[i, j] <- 1
#'             }
#'         }
#'     }
#' }
#'
#' Sigma[[1]] <- cor_matrix
#' Sigma[[2]] <- cor_matrix
#' y_nex <- simbinPROBIT(mu = mu, Sigma = Sigma, n = n)
#'
#' ## (3) exponential decay
#'
#' Sigma <- list()
#'
#' ### function to find the period of the ith index
#' region_ij <- function(points, i) {
#'     diff <- i - points
#'     for (h in 1:(length(diff) - 1)) {
#'         if (diff[h] > 0 & diff[h + 1] <= 0) {
#'             find <- h
#'         }
#'     }
#'     return(find)
#' }
#'
#' cor_matrix <- matrix(0, m * t, m * t)
#' useage_m <- cumsum(m * t)
#' useage_m <- c(0, useage_m)
#'
#' for (i in 1:(m * t)) {
#'     i_reg <- region_ij(useage_m, i)
#'     for (j in 1:(m * t)) {
#'         j_reg <- region_ij(useage_m, j)
#'         if (i_reg == j_reg & i != j) {
#'             cor_matrix[i, j] <- alpha0
#'         } else if (i == j) {
#'             cor_matrix[i, j] <- 1
#'         } else if (i_reg != j_reg) {
#'             cor_matrix[i, j] <- alpha0 * (rho^(abs(i_reg - j_reg)))
#'         }
#'     }
#' }
#' Sigma[[1]] <- cor_matrix
#' Sigma[[2]] <- cor_matrix
#' y_ed <- simbinPROBIT(mu = mu, Sigma = Sigma, n = n)
#'
#' @return \code{y} a vector of simulated binary outcomes for \code{n} clusters.




simbinPROBIT <- function(mu, Sigma, n = 1) {
    # compute the orginal correlation
    tetra <- function(pj, pk, alpha) {
        rhs <- alpha * sqrt(pj * (1 - pj)) * sqrt(pk * (1 - pk)) +
            pj * pk
        f0 <- function(rho) {
            sigma <- (1 - rho) * diag(2) + rho * matrix(1, 2, 2)
            lhs <- pmvnorm(
                lower = -Inf, upper = c(qnorm(pj), qnorm(pk)),
                mean = c(0, 0), sigma = sigma
            )
            return(lhs[1] - rhs)
        }
        res <- multiroot(f0, start = alpha)
        return(res$root)
    }

    createR <- function(u, r) {
        m <- length(u)
        R <- matrix(0, m, m)
        for (j in 1:(m - 1)) {
            for (k in j:m) {
                rhojk <- tetra(u[j], u[k], r[i, j])
                R[i, j] <- rhojk
            }
        }
        R[lower.tri(R)] <- t(R)[lower.tri(R)]
        diag(R) <- 1
        return(R)
    }


    # r[1:n, 1:n] is the corr mtx u[1:n] is the mean of a binary
    # vector checks that pairwise corrs are in-range for the given
    # u[] only upper half of r[,] is checked.  return 0 if ok return
    # 1 if out of range
    chkbinc <- function(r, u) {
        n <- length(u)
        s <- sqrt(u * (1 - u))
        for (i in 1:(n - 1)) {
            for (j in (i + 1):n) {
                uij <- u[i] * u[j] + r[i, j] * s[i] * s[j]
                ok <- ((uij <= min(u[i], u[j])) & (uij >= max(
                    0,
                    u[i] + u[j] - 1
                )))
                if (!ok) {
                    return(1)
                }
            }
        }
        return(0)
    }

    if (is.null(n)) {
        n <- 1

        if (!is.list(mu)) {
            meanList <- list()
            meanList[[1]] <- mu
        } else {
            meanList <- mu
        }

        if (!is.list(Sigma)) {
            corList <- list()
            corList[[1]] <- Sigma
        } else {
            corList <- Sigma
        }
    } else if (n == 1) {
        n <- 1

        if (!is.list(mu)) {
            meanList <- list()
            meanList[[1]] <- mu
        } else {
            meanList <- mu
        }

        if (!is.list(Sigma)) {
            corList <- list()
            corList[[1]] <- Sigma
        } else {
            corList <- Sigma
        }
    } else {
        meanList <- mu
        corList <- Sigma
    }

    # Module to simulate individual outcomes
    y <- NULL
    for (i in 1:n) {
        # simulate outcome
        u <- meanList[[i]]
        r <- corList[[i]]
        oor <- chkbinc(r, u)
        if (oor) {
            stop("ERROR: Correlation out of range for given mean")
        }
        # multivarate probit
        Rstar <- createR(u, r)
        ystar <- rmvnorm(1, u, Rstar)
        ycut <- qnorm(u)
        y <- c(y, as.numeric(ystar <= ycut))
        # simulate data matrix
    }

    return(y)
}

Try the geeCRT package in your browser

Any scripts or data that you put into this service are public.

geeCRT documentation built on June 22, 2024, 10:46 a.m.