R/SimulateCorCbmDataset.R

Defines functions SimulateCorCbmDataset

Documented in SimulateCorCbmDataset

#' Simulate paired binned data for testing FitCorCbm
#'
#' @description Simulates single modality 2-reader binned ROC dataset, simulated according to the CORCBM model,
#' for the purpose of testing the fitting program \code{\link{FitCorCbm}}.
#'
#' @param seed The seed variable, default is 123; set to NULL for truly random seed
#' @param K1 The number of non-diseased cases, default is 50
#' @param K2 The number of diseased cases, default is 50
#' @param desiredNumBins The desired number of bins; default is 5
#' @param muX The CBM mu parameter in condition X
#' @param muY The CBM mu parameter in condition Y
#' @param alphaX The CBM alpha parameter in condition X
#' @param alphaY The CBM alpha parameter in condition Y
#' @param rhoNor The correlation of non-diseased case z-samples
#' @param rhoAbn2 The correlation of diseased case z-samples, when disease is visible in both conditions
#'
#'
#' @details X and Y refer to the two arms of the pairing. \code{muX} and \code{alphaX} refer to the univariate CBM parameters
#' in condition X, \code{rhoNor} is the correlation of ratings of non-diseased cases and \code{rhoAbn2} is the correlation of ratings of
#' diseased cases when disease is visible in both conditions. The ROC data is bined to 5 bins in each condition.
#' See referenced publication.
#'
#'
#' @return The desired dataset suitable for testing FitCorCbm
#'
#'
#'
#' @examples
#'
## seed <- 1 # this gives unequal numbers of bins in X and Y directions for 50/50 dataset
#' dataset <- SimulateCorCbmDataset()
#'
#' \donttest{
#' ## this takes very long
#' ## dataset <- SimulateCorCbmDataset(K1 = 5000, K2 = 5000)
#'}
#'
#'
#' @references
#' Zhai X, Chakraborty DP (2017) A bivariate contaminated binormal model for robust fitting of proper ROC
#' curves to a pair of correlated, possibly degenerate, ROC datasets. Medical Physics. 44(6):2207--2222.
#'
#'
#' @importFrom mvtnorm rmvnorm
#' @export
#'
#'
SimulateCorCbmDataset <- function(
  seed = 123, K1 = 50, K2 = 50, desiredNumBins = 5,
  muX = 1.5, muY = 3,
  alphaX = 0.4, alphaY = 0.7,
  rhoNor = 0.3, rhoAbn2 = 0.8){

  # 50/50; 100/100; 1000/1000; 5000/5000

  set.seed(seed)

  AUCX <- 0.5 * (1 - alphaX) + alphaX * pnorm(muX/sqrt(2))
  AUCY <- 0.5 * (1 - alphaY) + alphaY * pnorm(muY/sqrt(2))

  rhoAbn1 <- rhoNor
  rhoAbn12 <- mean(c(rhoAbn1, rhoAbn2))
  sigmaNor <- rbind(c(1, rhoNor), c(rhoNor, 1))
  sigmaAbn1 <- rbind(c(1, rhoAbn1), c(rhoAbn1, 1))
  sigmaAbn2 <- rbind(c(1, rhoAbn2), c(rhoAbn2, 1))
  sigmaAbn12 <- rbind(c(1, rhoAbn12), c(rhoAbn12, 1))

  p200 <- (1 - alphaX) * (1 - alphaY)
  p2X0 <- alphaX * (1 - alphaY)
  p20Y <- (1 - alphaX) * alphaY
  p2XY <- alphaX * alphaY
  K2Sample <- sample(c("00", "X0", "0Y", "XY"), size = K2, replace = TRUE, prob = c(p200, p2X0, p20Y, p2XY))
  K200 <- sum(K2Sample == "00")
  K2X0 <- sum(K2Sample == "X0")
  K20Y <- sum(K2Sample == "0Y")
  K2XY <- sum(K2Sample == "XY")

  zk1 <- t(rmvnorm(K1, sigma = sigmaNor))

  zk200 <- t(rmvnorm(K200, mean = c(0, 0), sigma = sigmaAbn1))
  zk2X0 <- t(rmvnorm(K2X0, mean = c(muX, 0), sigma = sigmaAbn12))
  zk20Y <- t(rmvnorm(K20Y, mean = c(0, muY), sigma = sigmaAbn12))
  zk2XY <- t(rmvnorm(K2XY, mean = c(muX, muY), sigma = sigmaAbn2))
  zk2 <- cbind(zk200, zk2X0, zk20Y, zk2XY)

  simuData <- Df2RJafrocDataset(zk1, zk2)
  simuDataB <- DfBinDataset(simuData, desiredNumBins = desiredNumBins, opChType = "ROC")

  return(simuDataB)
}
dpc10ster/rjafroc documentation built on Jan. 18, 2024, 4:37 a.m.