Nothing
RDS.bootstrap.intervals.local <- function(rds.data,
outcome.variable,
weight.type,
uncertainty,
N,
subset,
number.of.bootstrap.samples,
confidence.level = .95,
control = control.rds.estimates(),
fast = FALSE,
useC = FALSE,
csubset = "",
ci.type = "t",
to.factor=FALSE,
cont.breaks = 3,
...) {
if (is(rds.data, "rds.data.frame")) {
if (!(outcome.variable %in% names(rds.data))) {
stop(sprintf(
"No variable called %s appears in the data.",
outcome.variable
))
}
network.size <- attr(rds.data, "network.size.variable")
if (!(network.size %in% names(rds.data)))
stop("invalid network size variable")
rds.data[[network.size]] <- as.numeric(rds.data[[network.size]])
}
else {
stop("rds.data must be of type rds.data.frame")
}
if (is.null(weight.type)) {
weight.type <- "Gile's SS"
}
weight.type <- match.arg(
weight.type,
c(
"Gile's SS",
"RDS-I",
"RDS-II",
"RDS-I (DS)",
"HCG",
"Arithmetic Mean"
)
)
if (is.na(weight.type)) {
# User typed an unrecognizable name
stop(
paste(
'You must specify a valid weight.type. The valid types are "Gile\'s SS","RDS-I", "RDS-II", "RDS-I (DS)", "HCG", and "Arithmetic Mean"'
),
call. = FALSE
)
}
if (is.null(uncertainty)) {
if (weight.type == "Gile's SS") {
uncertainty <- "Gile"
} else if (weight.type %in% c("RDS-I", "RDS-I (DS)", "RDS-II")) {
uncertainty <- "Salganik"
} else if (weight.type == "Arithmetic Mean") {
uncertainty <- "SRS"
} else if (weight.type == "HCG"){
uncertainty <- "HCG"
}
}
uncertainty <-
match.arg(uncertainty, c("Treeboot", "Gile", "Salganik", "SRS", "HCG"))
if (is.null(N)) {
N <- attr(rds.data, "population.size.mid")
if (is.null(N) && uncertainty == "Gile") {
stop(
"N not specified with no default in the data. N is required for Gile's SS estimator and bootstrap"
)
}
if (!is.null(N))
cat("\nNote: Using the data's mid population size estimate: N =",
N,
"\n")
}
###########################################################################################
# Check for missing values and warn the user if any are removed. This should really taken
# care of elsewhere. NB: It is also worth considering the semantics of the message
# "values were missing and were removed".
remvalues <-
rds.data[[network.size]] == 0 | is.na(rds.data[[network.size]])
if (any(remvalues)) {
warning(
paste(
sum(remvalues),
"of",
nrow(rds.data),
"network sizes were missing or zero. The estimator will presume these are",
max(rds.data[[network.size]], na.rm = TRUE)
),
call. = FALSE
)
rds.data[[network.size]][remvalues] <-
max(rds.data[[network.size]], na.rm = TRUE)
}
rds.data.nomiss <- rds.data
se <- substitute(subset)
subset <- eval(se, rds.data, parent.frame())
if (is.null(se) | is.null(subset)) {
subset <- rep(TRUE, length = nrow(rds.data.nomiss))
} else{
subset[is.na(subset)] <- FALSE
if (!is.null(N)) {
#use VH estimator to adjust population size to sub-population
# defined by subset
tmp.wts <- vh.weights(rds.data.nomiss[[network.size]])
tmp.wts <- tmp.wts / sum(tmp.wts)
prop <- sum(tmp.wts * subset)
N <- N * prop
if (N < sum(subset))
stop(sprintf("Estimated sub-population size, %f, smaller than subsets size, %f.", N, sum(subset)))
}
#This subsets setting orphaned children to be seeds
#in order to maintain a valid recruitment tree.
rds.data.nomiss <- rds.data.nomiss[subset, , warn = FALSE]
#drop 0 count levels
if (is.factor(rds.data[[outcome.variable]])) {
outcome <- factor(rds.data.nomiss[[outcome.variable]])
# Make sure the factor labels are alphabetic!
outcome <- factor(outcome, levels = levels(outcome)[order(levels(outcome))])
rds.data.nomiss[[outcome.variable]] <- outcome
}
}
#if (mean(duplicated(rds.data.nomiss[[outcome.variable]], na.rm = TRUE)) < control$discrete.cutoff) {
# is.cts <- TRUE
#}
outcome <- rds.data.nomiss[[outcome.variable]]
is.cts <- is.numeric(outcome) & !to.factor
#only categorical estimates are supported
if(!is.cts){
outcome <- factor(rds.data.nomiss[[outcome.variable]])
#Make sure the factor labels are alphabetic!
outcome <-
factor(outcome, levels = levels(outcome)[order(levels(outcome))])
rds.data.nomiss[[outcome.variable]] <- outcome
outclasses <- levels(outcome)
nsamplesbyoutcome <- table(as.numeric(outcome))
}else{
nsamplesbyoutcome <- length(outcome)
}
g <- length(stats::na.omit(unique(rds.data.nomiss[[outcome.variable]])))
if (g == 1) {
warning(paste(outcome.variable, "has only one level. Skipping..."))
return(invisible(NULL))
}
nsamples <- sum(!is.na(as.vector(outcome)))
rds <- RDS.estimates.local(
rds.data = rds.data.nomiss,
outcome.variable = outcome.variable,
N = N,
weight.type = weight.type,
control = control,
empir.lik = TRUE,
useC = useC,
to.factor=to.factor,
cont.breaks=cont.breaks,
...
)
if (is.rds.interval.estimate(rds)) {
observed.estimate <- rds$estimate
weights.all <- rds$weights
} else{
observed.estimate <- rds@estimate
weights.all <- rds@weights
}
# outcome variance
if(!is.cts){
varoutcome <- observed.estimate * (1 - observed.estimate)
}else{
varoutcome <- wtd.var(rds.data.nomiss[[outcome.variable]], weights = weights.all)
}
if (is.null(number.of.bootstrap.samples) ||
number.of.bootstrap.samples < 10) {
if (is.rds.interval.estimate(rds)) {
EL.se <- matrix(rds$interval, ncol = 6, byrow = FALSE)[1, 5]
} else{
EL.se <-
sqrt(varoutcome / length(weights.all))
}
number.of.bootstrap.samples <-
min(500, max(20, ceiling(0.5 * (EL.se / 0.002) ^ 2 + 1)))
if (!is.numeric(number.of.bootstrap.samples)) {
number.of.bootstrap.samples <- 500
}
cat(sprintf(
"Using %d bootstrap samples.\n",
number.of.bootstrap.samples
))
}
#
# Confidence intervals
#
crit <- stats::qnorm((1 - confidence.level) / 2, lower.tail = FALSE)
if (uncertainty == "Salganik") {
bs <- salganik.bootstrap.se(
rds.data = rds.data.nomiss,
group.variable = outcome.variable,
number.of.bootstrap.samples = number.of.bootstrap.samples,
estimator.name = weight.type,
N = N,
to.factor=to.factor,
...
)
attr(bs, "is.cts") <- is.cts
#attr(bs, "is.quantile") <- is.quantile
attr(bs, "mu") <- observed.estimate
if(!is.cts)
bso <- bs[match(names(bs), outclasses)]
else
bso <- bs
estimate <-
cbind(observed.estimate,
observed.estimate - crit * bso,
observed.estimate + crit * bso)
colnames(estimate) <- c("point", "lower", "upper")
estimate = switch(
ci.type,
"pivotal" = {
bsests <- attr(bs, "bsresult")
bsests <- bsests[, match(names(bs), outclasses)]
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- 2 * observed.estimate - qcrit
# estimate[, 1] <- 2*observed.estimate-apply(bsests,2,mean,na.rm=TRUE)
estimate[, 1] <- observed.estimate
estimate
},
"quantile" = {
bsests <- attr(bs, "bsresult")
bsests <- bsests[, match(names(bs), outclasses)]
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- qcrit[, 2:1]
estimate[, 1] <- observed.estimate
estimate
},
"proportion" = {
if (all(observed.estimate >= 0) &
(any(observed.estimate < 0.15) |
any(estimate[, 2:3] < 0 | estimate[, 2:3] > 1))) {
bsests <- attr(bs, "bsresult")
bsests <- bsests[, match(names(bs), outclasses)]
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- qcrit[, 2:1]
}
estimate[estimate[, 2] < 0, 2] <- 0
estimate[estimate[, 3] < 0, 3] <- 0
estimate[estimate[, 2] > 1, 2] <- 1
estimate[estimate[, 3] > 1, 3] <- 1
estimate[, 1] <- observed.estimate
estimate
},
{
# t (the default)
estimate[, 1] <- observed.estimate
estimate
}
)
} else if (uncertainty == "HCG") {
bs <- HCG.bootstrap.se(
rds.data = rds.data.nomiss,
group.variable = outcome.variable,
number.of.bootstrap.samples = number.of.bootstrap.samples,
estimator.name = weight.type,
N = N,
cont.breaks=cont.breaks,
to.factor=to.factor,
control = control,
...
)
attr(bs, "is.cts") <- is.cts
#attr(bs, "is.quantile") <- is.quantile
attr(bs, "mu") <- observed.estimate
if(is.cts){
bso <- bs
outclasses <- outcome.variable
}else{
bso <- bs[match(names(bs), outclasses)]
}
estimate <-
cbind(observed.estimate,
observed.estimate - crit * bso,
observed.estimate + crit * bso)
colnames(estimate) <- c("point", "lower", "upper")
estimate = switch(
ci.type,
"pivotal" = {
bsests <- attr(bs, "bsresult")
bsests <- bsests[, match(names(bs), outclasses)]
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- 2 * observed.estimate - qcrit
# estimate[, 1] <- 2*observed.estimate-apply(bsests,2,mean,na.rm=TRUE)
estimate[, 1] <- observed.estimate
estimate
},
"quantile" = {
bsests <- attr(bs, "bsresult")
bsests <- bsests[, match(names(bs), outclasses)]
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- qcrit[, 2:1]
estimate[, 1] <- observed.estimate
estimate
},
"proportion" = {
if (all(observed.estimate >= 0) &
(any(observed.estimate < 0.15) |
any(estimate[, 2:3] < 0 | estimate[, 2:3] > 1))) {
bsests <- attr(bs, "bsresult")
bsests <- bsests[, match(names(bs), outclasses)]
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- qcrit[, 2:1]
}
estimate[estimate[, 2] < 0, 2] <- 0
estimate[estimate[, 3] < 0, 3] <- 0
estimate[estimate[, 2] > 1, 2] <- 1
estimate[estimate[, 3] > 1, 3] <- 1
estimate[, 1] <- observed.estimate
estimate
},
{
# t (the default)
estimate[, 1] <- observed.estimate
estimate
}
)
} else if (uncertainty == "Treeboot") {
bs <- treeboot.bootstrap.se(
rds.data = rds.data.nomiss,
group.variable = outcome.variable,
number.of.bootstrap.samples = number.of.bootstrap.samples,
estimator.name = weight.type,
N = N,
...
)
attr(bs, "is.cts") <- is.cts
#attr(bs, "is.quantile") <- is.quantile
attr(bs, "mu") <- observed.estimate
bso <- bs[match(names(bs), outclasses)]
estimate <-
cbind(observed.estimate,
observed.estimate - crit * bso,
observed.estimate + crit * bso)
colnames(estimate) <- c("point", "lower", "upper")
estimate = switch(
ci.type,
"pivotal" = {
bsests <- attr(bs, "bsresult")
bsests <- bsests[, match(names(bs), outclasses)]
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- 2 * observed.estimate - qcrit
# estimate[, 1] <- 2*observed.estimate-apply(bsests,2,mean,na.rm=TRUE)
estimate[, 1] <- observed.estimate
estimate
},
"quantile" = {
bsests <- attr(bs, "bsresult")
bsests <- bsests[, match(names(bs), outclasses)]
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- qcrit[, 2:1]
estimate[, 1] <- observed.estimate
estimate
},
"proportion" = {
if (all(observed.estimate >= 0) &
(any(observed.estimate < 0.15) |
any(estimate[, 2:3] < 0 | estimate[, 2:3] > 1))) {
bsests <- attr(bs, "bsresult")
bsests <- bsests[, match(names(bs), outclasses)]
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- qcrit[, 2:1]
}
estimate[estimate[, 2] < 0, 2] <- 0
estimate[estimate[, 3] < 0, 3] <- 0
estimate[estimate[, 2] > 1, 2] <- 1
estimate[estimate[, 3] > 1, 3] <- 1
estimate[, 1] <- observed.estimate
estimate
},
{
# t (the default)
estimate[, 1] <- observed.estimate
estimate
}
)
} else if (uncertainty == "SRS") {
varsrs <- observed.estimate * (1 - observed.estimate) / nsamples
if (!is.null(N))
varsrs <- ((N - nsamples) / (N - 1)) * varsrs
estimate <- cbind(
observed.estimate,
observed.estimate -
crit * sqrt(varsrs),
observed.estimate + crit * sqrt(varsrs)
)
colnames(estimate) <- c("point", "lower", "upper")
bs <- varsrs
attr(bs, "is.cts") <- is.cts
#attr(bs, "is.quantile") <- is.quantile
attr(bs, "mu") <- observed.estimate
} else if (uncertainty == "Gile") {
bs <- SSBS.estimates(
rds.data.nomiss,
outcome.variable,
confidence.level = confidence.level,
N = N,
refs.to.trait=NULL,
weight.type = weight.type,
number.of.bootstrap.samples = number.of.bootstrap.samples,
control = control,
to.factor=to.factor,
cont.breaks=cont.breaks,
fast = fast,
useC = useC,
...
)
if (attr(bs, "is.cts")) {
observed.estimate <- bs[1]
outclasses <- outcome.variable
nsamplesbyoutcome <- nrow(rds.data.nomiss)
estimate <- bs
}else
estimate <- bs[match(rownames(bs), outclasses), , drop = FALSE]
colnames(estimate) <- c("point", "lower", "upper")
bsests <- attr(bs, "bsresult")$bsests
bsests <- bsests[, match(rownames(bs), outclasses)]
estimate = switch(
ci.type,
"pivotal" = {
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- 2 * observed.estimate - qcrit
# estimate[, 1] <- 2*observed.estimate-apply(bsests,2,mean,na.rm=TRUE)
estimate[, 1] <- observed.estimate
estimate
},
"quantile" = {
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- qcrit[, 2:1]
estimate[, 1] <- observed.estimate
estimate
},
"proportion" = {
if (all(observed.estimate >= 0) &
(any(observed.estimate < 0.15) |
any(estimate[, 2:3] < 0 | estimate[, 2:3] > 1))) {
qcrit <-
t(apply(bsests, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- qcrit[, 2:1]
# estimate[, 1] <- 2*observed.estimate-apply(bsests,2,mean,na.rm=TRUE)
}
estimate[estimate[, 2] < 0, 2] <- 0
estimate[estimate[, 3] < 0, 3] <- 0
estimate[estimate[, 2] > 1, 2] <- 1
estimate[estimate[, 3] > 1, 3] <- 1
estimate[, 1] <- observed.estimate
estimate
},
"hmg" = {
nobs <- min(as.vector(nsamplesbyoutcome))
dae <-
as.vector(
differential.activity.estimates(
rds.data = rds.data.nomiss,
outcome.variable = outcome.variable,
N = N,
weight.type = weight.type
)
)[1]
if (nobs == as.vector(nsamplesbyoutcome)[1]) {
dae <- 1 / dae
}
if (nobs + control$lowprevalence[1] * dae < control$lowprevalence[2]) {
# Apply the combined Agresti-Coull and the bootstrap-t interval of Mantalos and Zografos (2008)
wbse <-
(attr(bs, "bsresult")$bsnm * bsests + 2) / (attr(bs, "bsresult")$bsnm +
4)
nm <-
sum(weights.all, na.rm = TRUE) ^ 2 / sum(weights.all ^ 2, na.rm = TRUE)
woe <- (nm * observed.estimate + 2) / (nm + 4)
sigmamb <- sqrt(wbse * (1 - wbse) / attr(bs, "bsresult")$bsnm)
tstarn <- sweep(wbse, 2, woe, "-") / sigmamb
tstarn[is.infinite(tstarn)] <- NA
qcrit <-
t(apply(tstarn, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- woe - qcrit * sqrt(woe * (1 - woe) / (nm + 4))
}
#
estimate[, 1] <- observed.estimate
estimate
},
"acbt" = {
# Apply the combined Agresti-Coull and the bootstrap-t interval of Mantalos and Zografos (2008)
wbse <-
(attr(bs, "bsresult")$bsnm * bsests + 2) / (attr(bs, "bsresult")$bsnm +
4)
nm <-
sum(weights.all, na.rm = TRUE) ^ 2 / sum(weights.all ^ 2, na.rm = TRUE)
woe <- (nm * observed.estimate + 2) / (nm + 4)
sigmamb <- sqrt(wbse * (1 - wbse) / attr(bs, "bsresult")$bsnm)
tstarn <- sweep(wbse, 2, woe, "-") / sigmamb
tstarn[is.infinite(tstarn)] <- NA
qcrit <-
t(apply(tstarn, 2, stats::quantile, c((confidence.level + 1) / 2, (1 -
confidence.level) / 2
), na.rm = TRUE))
estimate[, 2:3] <- woe - qcrit * sqrt(woe * (1 - woe) / (nm + 4))
#
estimate[, 1] <- observed.estimate
estimate
},
{
# t (normal;the default)
# estimate[,2:3] <- estimate[,2:3]-matrix(apply(estimate[,2:3],1,mean),ncol=2,nrow=nrow(estimate)) + observed.estimate
estimate[, 1] <- observed.estimate
estimate
}
)
}
#
# Design effect (for factor outcomes)
#
#if (attr(bs, "is.cts")) {
#if (attr(bs, "is.quantile")) {
# varoutcome <-
# continuous * (1 - continuous) / (stats::dnorm(
# observed.estimate,
# mean = attr(bs, "mu"),
# sd = sqrt(attr(bs, "sigma2"))
# )) ^ 2
#} else{
#varoutcome <- attr(bs, "sigma2")
#}
#} else{
#varoutcome <- estimate[, 1] * (1 - estimate[, 1])
#}
# Note the finite sample correction factor
if (!is.null(N)) {
varsrs <- (((N - nsamples) / (N - 1)) * varoutcome / nsamples)
} else{
varsrs <- varoutcome / nsamples
}
de <- ((estimate[, 3] - estimate[, 2]) / (crit * 2)) ^ 2 / varsrs
estimate <-
cbind(estimate,
de,
(estimate[, 3] - estimate[, 2]) / (crit * 2),
nsamplesbyoutcome)
colnames(estimate)[c(4, 5, 6)] <- c("Design Effect", "s.e.", "n")
if (attr(bs, "is.cts")) {
rownames(estimate) <- outcome.variable
} else{
outclasses <-
sort(unique(as.vector(rds.data.nomiss[[outcome.variable]])))
outclasses[outclasses == "NA.NA"] <- "NA"
rownames(estimate) <- outclasses[1:nrow(estimate)]
estimate <- as.numeric(estimate)
names(estimate)[1:g] <- outclasses[1:g]
}
if (exists("bs")){
attr(estimate, "bsresult") <- attr(bs, "bsresult")
attr(estimate, "is.cts") <- attr(bs, "is.cts")
}
result <- rds.interval.estimate(
estimate,
outcome.variable,
weight.type,
uncertainty,
weights.all,
N = N,
csubset = csubset,
conf.level = confidence.level
)
attr(result, "bsresult") <- attr(estimate, "bsresult")
attr(result, "is.cts") <- attr(estimate, "is.cts")
return(result)
}
#' RDS Bootstrap Interval Estimates
#'
#' This function computes an interval estimate for one or more categorical
#' variables. It optionally uses attributes of the RDS data set to determine
#' the type of estimator and type of uncertainty estimate to use.
#'
# Note that the current estimator only works on binary outcome variables. (I think this cane be removed now--non-binary categorical outcome variables work.)
#'
#' @param rds.data An \code{rds.data.frame} that indicates recruitment patterns
#' by a pair of attributes named ``id'' and ``recruiter.id''.
#' @param outcome.variable A string giving the name of the variable in the
#' \code{rds.data} that contains a categorical or numeric variable to be
#' analyzed.
#' @param weight.type A string giving the type of estimator to use. The options
#' are \code{"Gile's SS"}, \code{"RDS-I"}, \code{"RDS-II"}, \code{"RDS-I (DS)"},
#' and \code{"Arithemic Mean"}. If \code{NULL} it defaults to \code{"Gile's
#' SS"}.
#' @param uncertainty A string giving the type of uncertainty estimator to use.
#' The options are \code{"SRS"}, \code{"Gile"} and \code{"Salganik"}. This is usually
#' determined by \code{weight.type} to be consistent with the estimator's
#' origins. The estimators RDS-I, RDS-I (DS), and RDS-II default to \code{"Salganik"}, "Arithmetic
#' Mean" defaults to \code{"SRS"} and "Gile's SS" defaults to the \code{"Gile"} bootstrap.
#' @param N An estimate of the number of members of the population being
#' sampled. If \code{NULL} it is read as the \code{population.size.mid} attribute of
#' the \code{rds.data} frame. If that is missing it defaults to 1000.
#' @param subset An optional criterion to subset \code{rds.data} by. It is a
#' character string giving an R expression which, when evaluated, subset the
#' data. In plain English, it can be something like \code{"seed > 0"} to
#' exclude seeds. It can be the name of a logical vector of the same length of
#' the outcome variable where TRUE means include it in the analysis. If
#' \code{NULL} then no subsetting is done.
#' @param confidence.level The confidence level for the confidence intervals.
#' The default is 0.95 for 95\%.
#' @param number.of.bootstrap.samples The number of bootstrap samples to take
#' in estimating the uncertainty of the estimator. If \code{NULL} it defaults
#' to the number necessary to compute the standard error to accuracy 0.001.
#' \code{outcome.variable}. Otherwise it will compute the population frequencies of each value of the \code{outcome.variable}.
#' @param fast Use a fast bootstrap where the weights are reused from the
#' estimator rather than being recomputed for each bootstrap sample.
#' @param useC Use a C-level implementation of Gile's bootstrap (rather than
#' the R level). The implementations should be a computational
#' equivalent estimator (except for speed).
#' @param ci.type Type of confidence interval to use, if possible. If "t", use lower and upper confidence
#' interval values based on the standard deviation of the bootstrapped values and a t multiplier.
#' If "pivotal", use lower and upper confidence interval values based on the
#' basic bootstrap (also called the pivotal confidence interval).
#' If "quantile", use lower and upper confidence interval values based on the
#' quantiles of the bootstrap sample.
#' If "proportion", use the "t" unless the estimated proportion is less than 0.15 or the bounds are
#' outside [0,1 . In this case, try the "quantile" and constrain the bounds to be compatible with [0,1].
#' @param control A list of control parameters for algorithm
#' tuning. Constructed using\cr
#' \code{\link{control.rds.estimates}}.
#' @param to.factor force variable to be a factor
#' @param cont.breaks For continuous variates, some bootstrap proceedures require categorical data.
#' In these cases, in order to contruct each bootstrap replicate, the outcome variable is split into
#' cont.breaks categories.
#' @param ... Additional arguments for RDS.*.estimates.
#' @return An object of class \code{rds.interval.estimate} summarizing the inference.
#' The confidence interval and standard error are based on the bootstrap procedure.
#' In additon, the object has attribute \code{bsresult} which provides details of the
#' bootstrap procedure. The contents of the \code{bsresult} attribute depends on the
#' \code{uncertainty} used. If \code{uncertainty=="Salganik"} then \code{bsresult} is a
#' vector of standard deviations of the bootstrap samples.
#' If \code{uncertainty=="Gile's SS"} then
#' \code{bsresult} is a list with components for the bootstrap point estimate,
#' the bootstrap
#' samples themselves and the standard deviations of the bootstrap samples.
#' If \code{uncertainty=="SRS"} then \code{bsresult} is NULL.
#' @references Gile, Krista J. 2011 \emph{Improved Inference for
#' Respondent-Driven Sampling Data with Application to HIV Prevalence
#' Estimation}, \emph{Journal of the American Statistical Association}, 106,
#' 135-146.
#'
#' Gile, Krista J., Handcock, Mark S., 2010. Respondent-driven Sampling:
#' An Assessment of Current Methodology, Sociological Methodology, 40,
#' 285-327. <doi:10.1111/j.1467-9531.2010.01223.x>
#'
#' Gile, Krista J., Beaudry, Isabelle S. and Handcock, Mark S., 2018
#' Methods for Inference from Respondent-Driven Sampling Data,
#' Annual Review of Statistics and Its Application
#' <doi:10.1146/annurev-statistics-031017-100704>.
#'
#' @keywords survey manip
#' @examples
#'
#' \dontrun{
#' data(fauxmadrona)
#' RDS.bootstrap.intervals(rds.data=fauxmadrona,weight.type="RDS-II",
#' uncertainty="Salganik",
#' outcome.variable="disease",N=1000,number.of.bootstrap.samples=50)
#'
#' data(fauxtime)
#' RDS.bootstrap.intervals(rds.data=fauxtime,weight.type="HCG",
#' uncertainty="HCG",
#' outcome.variable="var1",N=1000,number.of.bootstrap.samples=10)
#' }
#'
#' @export
RDS.bootstrap.intervals <- function(rds.data,
outcome.variable,
weight.type = NULL,
uncertainty = NULL,
N = NULL,
subset = NULL,
confidence.level = .95,
number.of.bootstrap.samples = NULL,
fast = TRUE,
useC = TRUE,
ci.type = "t",
control = control.rds.estimates(),
to.factor=FALSE,
cont.breaks = 3,
...) {
se <- substitute(subset)
if (is.null(se)) {
csubset <- ""
} else{
csubset <- as.character(enquote(substitute(subset)))[2]
}
subset <- eval(se, rds.data, parent.frame())
if (length(outcome.variable) == 1) {
result <- RDS.bootstrap.intervals.local(
rds.data,
outcome.variable,
weight.type,
uncertainty,
N,
subset,
confidence.level,
number.of.bootstrap.samples = number.of.bootstrap.samples,
control = control,
fast = fast,
useC = useC,
ci.type = ci.type,
csubset = csubset,
to.factor = to.factor,
cont.breaks = cont.breaks,
...
)
}
else {
result <- lapply(
X = outcome.variable,
FUN = function(g) {
RDS.bootstrap.intervals.local(
rds.data,
g,
weight.type,
uncertainty,
N,
subset,
confidence.level,
number.of.bootstrap.samples = number.of.bootstrap.samples,
control = control,
fast = fast,
useC = useC,
ci.type = ci.type,
csubset = csubset,
to.factor = to.factor,
cont.breaks = cont.breaks,
...
)
}
)
names(result) <- outcome.variable
}
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.