######################################
## code for sample size simulations ##
## Nicholas Reich
## August 2010
######################################
## this is the central user function
## N = overall sample size
## med = median of log normal distribution
## disp = dispersion of log normal distribution
## percentile = what percentile(s) should be simulated
## nsim = how many simulations to run
## exact data = T/F should simulation be done with exact data
## pct.type.A = percent type A data, will be rounded up to nearest integer
## exp.win.dat = a vector of exposure window lengths to sample from
## verb = whether to print 10 iteration counts during the course of the sim
##' @name precision.simulation
##' @aliases precision.simulation.exact
##' @aliases precision.simulation.coarse
##' @aliases generate.coarse.data
##'
##' @title Simulate incubation period analyses with coarse data
##'
##' @description These functions simulate coarse incubation period data sets and
##' analyze them. The goal is for these simulations to provide evidence for
##' how much information a given dataset contains about a characteristic of
##' the incubation period distribution.
##'
##' @param N Overall sample size for the datasets to be simulated.
##' @param med Median for the assumed log normal distribution of the incubation
##' periods.
##' @param disp Dispersion for the assumed log normal distribution of the
##' incubation periods.
##' @param percentile Percentile of the incubation period distribution which we
##' want to estimate.
##' @param nsim Number of datasets to analyze in the simulation.
##' @param exact.data Either TRUE/FALSE. Incidates whether the data generated
##' should be coarsened at all. If TRUE, pct.type.A and exp.win.dat are
##' ignored.
##' @param pct.type.A Percent of the N observations that are assumed to be type
##' A data. If N*pct.type.A is not an integer, it will be rounded to the
##' nearest integer.
##' @param exp.win.dat A vector of exposure window lengths. Defaults to the
##' observed window lengths from Lessler et al. (see below).
##' @param verb If TRUE, a message with the system time and iteration number
##' will be printed ten times during the simulation run.
##'
##'
##' @rdname precision.simulation
##' @return The \code{precision.simulation} functions return a matrix with four
##' columns and nsim rows. The "ests" column gives the estimated percentiles
##' for the incubation period distribution. The "SE" column gives the
##' standard error for the estimate. The "conv" column is 1 if the doubly
##' interval-censored likelihood maximization converged. Otherwise, it is 0.
##' The "bias" column gives the estimated percentile - true percentile. The
##' \code{generate.coarse.data} function returns a matrix with data suitable
##' for analysis by the \code{dic.fit} function.
##' @export
precision.simulation <- function(N,
med = 2,
disp = 1.3,
percentile = 0.5,
nsim = 100,
exact.data = FALSE,
pct.type.A = 0.5,
exp.win.dat = NULL,
verb = FALSE) {
## logic check
if (percentile <= 0 || percentile >= 1) {
stop("percentile must be between 0 and 1.")
}
if (pct.type.A < 0 || pct.type.A > 1) {
stop("% of data that is type A must be between 0 and 1.")
}
if (is.null(exp.win.dat)) {
if (verb) message("NYC exposure window data used")
exp.win.dat <- get(data(exp.win.lengths, envir = environment()))
}
## TODO: add default exp.win.dat to package and load if NULL
if (exact.data) {
out <- precision.simulation.exact(
N = N,
med = med,
disp = disp,
percentile = percentile,
nsim = nsim,
verb = verb
)
} else {
out <- precision.simulation.coarse(
N = N,
med = med,
disp = disp,
percentile = percentile,
nsim = nsim,
pct.type.A = pct.type.A,
exp.win.dat = exp.win.dat,
verb = verb
)
}
target <- qlnorm(percentile, log(med), log(disp))
bias <- out[, "ests"] - target
out <- cbind(out, bias)
return(out)
}
##' @rdname precision.simulation
##' @export
precision.simulation.exact <- function(N,
med,
disp,
percentile,
nsim,
verb) {
storage <- matrix(NA, ncol = 3, nrow = nsim)
colnames(storage) <- c("ests", "SE", "conv")
data <- matrix(0, ncol = 6, nrow = nsim * N)
colnames(data) <- c("dataset.id", "EL", "ER", "SL", "SR", "type")
data[, "dataset.id"] <- rep(1:nsim, each = N)
data[, "SL"] <- rlnorm(N * nsim, meanlog = log(med), sdlog = log(disp))
data[, "SR"] <- data[, "SL"]
data[, "type"] <- 2
for (i in 1:nsim) {
tmp.dat <- data[which(data[, "dataset.id"] == i), ]
tmp.fit <- dic.fit(tmp.dat, ptiles = percentile)
if (tmp.fit$conv == 1) {
row.name <- paste0("p", round(percentile * 100))
which.row <-
which(rownames(tmp.fit$ests) == row.name)[1]
storage[i, c("ests", "SE")] <-
tmp.fit$ests[which.row, c("est", "StdErr")]
} else {
storage[i, c("ests", "SE")] <- NA
}
storage[i, "conv"] <- tmp.fit$conv
if (verb && i %% (round(nsim / 10)) == 0) {
print(paste("iteration", i, "complete ::", Sys.time()))
}
}
return(storage)
}
##' @rdname precision.simulation
##' @export
precision.simulation.coarse <- function(N,
med,
disp,
percentile,
nsim,
pct.type.A,
exp.win.dat,
verb) {
## create storage
storage <- matrix(NA, ncol = 3, nrow = nsim)
colnames(storage) <- c("ests", "SE", "conv")
for (i in 1:nsim) {
tmp.dat <- generate.coarse.data(
N = N,
med = med,
disp = disp,
pct.type.A = pct.type.A,
exp.win.dat = exp.win.dat
)
tmp.fit <- dic.fit(tmp.dat, ptiles = percentile)
if (tmp.fit$conv == 1) {
row.name <- paste0("p", round(percentile * 100))
which.row <-
which(rownames(tmp.fit$ests) == row.name)[1]
storage[i, c("ests", "SE")] <-
tmp.fit$ests[which.row, c("est", "StdErr")]
} else {
storage[i, c("ests", "SE")] <- NA
}
storage[i, "conv"] <- tmp.fit$conv
if (verb && i %% (round(nsim / 10)) == 0) {
print(paste("iteration", i, "complete ::", Sys.time()))
}
}
return(storage)
}
##' @rdname precision.simulation
##' @export
generate.coarse.data <- function(N, med, disp, pct.type.A, exp.win.dat) {
n.type.A <- round(N * pct.type.A)
n.type.B <- N - n.type.A
E <- runif(N, 10, 11)
S <- rlnorm(N, log(med), log(disp)) + E
SR <- ceiling(S)
SL <- floor(S)
## generate window types
## 0 = short with bounded ER = type A
## 1 = "long" with no ER = type B
win.type <- rep(0:1, times = c(n.type.A, n.type.B))
## generate window lengths,
potential.lengths <- sample(exp.win.dat, size = N, replace = TRUE)
win.length <- 1 * (win.type == 0) + potential.lengths * (win.type > 0)
## fix data
ER <- ceiling(E) * (win.type == 0) + SR * (win.type == 1)
EL <- pmin(ER - win.length, floor(E))
## return the data
cbind(EL, E, ER, SL, S, SR, win.length, win.type, type = 0)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.