R/generate_contin_table_with_clustered_AE.R

Defines functions generate_contin_table_with_clustered_AE

Documented in generate_contin_table_with_clustered_AE

#' Generate simulated contingency tables with the option of incorporating
#' adverse event correlation within clusters.
#'
#' @description Generate simulated contingency tables with the option of
#' incorporating adverse event correlation within clusters.
#'
#' @param row_marginal Marginal sums for the rows of the contingency table.
#' @param column_marginal Marginal sums for the columns of the contingency
#' table.
#' @param signal_mat A data matrix of the same dimension as the contingency
#' table with entries representing the signal strength. The values should
#' be greater or equal to 1, where 1 indicates no signal, and values
#' greater than 1 indicate signal.
#' @param contin_table A data matrix of an \eqn{I} x \eqn{J} contingency
#' table with row (adverse event) and column (drug or vaccine) names, of which
#' the row and column marginals are used to generate the simulated data.
#' Please first check the input contingency table using the function
#' \code{check_and_fix_contin_table()}. Default is NULL.
#' @param AE_idx A data frame or list.
#' In case of data frame it must contain two variables \code{idx} and \code{AE},
#' where \code{idx} indicates the cluster index (a number),
#' and \code{AE} lists the adverse event names. See the
#' \code{statin49_AE_idx} for \code{statin49} data as an example.
#' In case of a list, make sure the cluster indices are aligned with the
#' corresponding row marginal.
#' @param n_rep Number of contingency tables to be generated.
#' @param rho A numeric value, matrix, or NULL indicating the correlation
#' structure.If a numeric value (float or int) is provided, it represents the
#' correlation value \code{rho} to be used between all elements within each
#' cluster specified by \code{AE_idx}. If a matrix is provided, it must be a
#' square matrix with dimensions equal to the number of rows in
#' \code{contin_table}. In this case, \code{rho} defines the correlation
#' structure directly, and \code{AE_idx} is not used. If \code{rho} is NULL,
#' a covariance matrix is generated based on the correlation coefficients of
#' \code{contin_table}.
#' @param seed An optional integer to set the seed for reproducibility.
#' If NULL, no seed is set.
#'
#' @return A list of \code{n_rep} simulated contingency tables.
#'
#' @useDynLib MDDC
#' @importFrom stats rnorm
#' @export
#'
#' @examples
#' # using statin49 as an example
#' data(statin49)
#' data(statin49_AE_idx)
#'
#' # Prepare a matrix of signal strength with the same dimension as
#' # statin49, where 1 indicates no signal and values > 1 indicate
#' # signal
#' lambda_matrix <- matrix(1, nrow = nrow(statin49), ncol = ncol(statin49))
#'
#' # Assign the cell (45,1) with signal strength 4
#' lambda_matrix[45, 1] <- 4
#'
#' # Generate 5 simulated tables
#' set.seed(123)
#' simulated_tables <- generate_contin_table_with_clustered_AE(
#'   contin_table = statin49,
#'   n_rep = 5,
#'   AE_idx = statin49_AE_idx,
#'   rho = 0.5,
#'   signal_mat = lambda_matrix
#' )
generate_contin_table_with_clustered_AE <- function(row_marginal,
                                                    column_marginal,
                                                    signal_mat,
                                                    contin_table = NULL,
                                                    AE_idx = NULL,
                                                    n_rep = 1,
                                                    rho = NULL,
                                                    seed = NULL) {
  if (!is.null(seed)) {
    set.seed(seed)
  }

  if (is.data.frame(AE_idx)) {
    AE_idx <- as.list(AE_idx$idx)
  }

  if (!is.null(contin_table)) {
    n_row <- nrow(contin_table)
    n_col <- ncol(contin_table)
    row_names <- row.names(contin_table)
    col_names <- colnames(contin_table)

    n_i_dot <- rowSums(contin_table)
    n_dot_j <- colSums(contin_table)
    n_dot_dot <- sum(contin_table)
  } else if (is.null(contin_table) &&
    (!is.null(row_marginal) &&
      !is.null(column_marginal))) {
    if (sum(row_marginal) == sum(column_marginal)) {} else {
      stop("The sum of row and column marginals do not match.")
    }

    n_row <- length(row_marginal)
    n_col <- length(column_marginal)

    n_i_dot <- row_marginal
    n_dot_j <- column_marginal
    n_dot_dot <- sum(row_marginal)
  } else {
    if ((is.null(row_marginal) || is.null(column_marginal)) &&
      is.null(contin_table)) {
      stop("`row_marginal` or `column_marginal` cannot be
            NULL when `contin_table` is also NULL.
      Please provide either `row_marginal` and `column_marginal`
            or `contin_table`.")
    }
  }

  p_i_dot <- n_i_dot / n_dot_dot
  p_dot_j <- n_dot_j / n_dot_dot

  E_ij_mat <- n_i_dot %*% t(n_dot_j) / n_dot_dot

  if (is.numeric(rho) && length(rho) == 1) {
    if (!(0 <= rho && rho <= 1)) {
      stop("The value of `rho` must lie between [0,1].")
    } else {
      if (!is.null(contin_table)) {
        if (length(AE_idx) != dim(contin_table)[1]) {
          stop("The length of `AE_idx` should be same as
                rows of `contin_table`.")
        }
      } else {
        if (is.null(AE_idx)) {
          stop("User provided `rho` but the `AE_idx` is not provided.")
        } else {
          if (length(AE_idx) != length(row_marginal)) {
            stop("The length of `AE_idx` should be same
              as length of `row_marginal`.")
          }
        }
      }

      group_s <- unique(AE_idx)
      n_group_s <- vapply(group_s, function(g) sum(AE_idx == g), numeric(1))
      names(n_group_s) <- group_s

      cov_matrices <- list()

      for (group in group_s) {
        size <- n_group_s[group]
        matrix <- matrix(rho, nrow = size, ncol = size)
        diag(matrix) <- 1
        cov_matrices[[group]] <- matrix
      }
      cov_matrix <- create_block_diagonal_matrix(cov_matrices)
    }
  } else if (is.matrix(rho)) {
    if (!is.null(contin_table)) {
      if (!all(dim(rho) == c(dim(contin_table)[1], dim(contin_table)[1]))) {
        stop("Please check the shape of the input matrix `rho`.
           It should be an I x I matrix where I is the number of rows
           in the contingency table.")
      }
    } else {
      if (!all(dim(rho) == c(length(row_marginal), length(row_marginal)))) {
        stop("Please check the shape of the input matrix `rho`.
              It should be an I x I matrix where I is same length
              as `row_marginal`.")
      }
    }

    cov_matrix <- rho
  } else if (is.null(rho)) {
    if (!is.null(AE_idx)) {
      stop("User provided the `AE` but `rho` has not been provided.
           If user is unable to provide `rho`, then please set `AE_idx`= NULL.")
    }
    if (!is.null(contin_table)) {
      cov_matrix <- cor(t(contin_table))
    } else {
      stop("`rho` cannot be estimated if no `contin_table` is provided.")
    }
  }

  tables <- lapply(seq_len(n_rep), function(i) {
    if (!is.null(seed)) {
      set.seed(seed + i) # To ensure each replication is reproducible
    }
    get_contin_table(
      i, n_row, n_col, cov_matrix, E_ij_mat,
      signal_mat, p_i_dot, p_dot_j
    )
  })

  if (!is.null(contin_table)) {
    if (all(!is.na(colnames(contin_table))) &&
      all(!is.na(rownames(contin_table)))) {
      tables <- lapply(tables, function(sample) {
        df <- as.matrix(sample)
        colnames(df) <- colnames(contin_table)
        rownames(df) <- rownames(contin_table)
        return(df)
      })
    }
  }

  return(tables)
}

Try the MDDC package in your browser

Any scripts or data that you put into this service are public.

MDDC documentation built on April 11, 2025, 5:45 p.m.