Nothing
#' @import graphics
NULL
#' @import stats
NULL
#' @import utils
NULL
#' @importFrom osDesign tps
NULL
#' @import np
NULL
#' @import chngpt
NULL
#' @import MASS
NULL
globalVariables(c("Z","S","Y"))
logit <- function(p){
return(log(p/(1-p)))
}
expit <- function(x){
return(exp(x)/(1+exp(x)))
}
# 'tpsPredict' returns predicted values from a model fitted by tps
# columns of newMatrix in the same order as the coefficient vector from tps
tpsPredict <- function(fit, newMatrix){
linPred <- newMatrix %*% fit$coef
return(drop(1/(1+exp(-linPred))))
}
# 'hNum' returns function values at s0 of the integrand in the numerator of P{Y(0)=1 | S(1)=s1}
# s0 is a numeric vector, whereas s1 is a scalar
hNum <- function(s0, s1, lev, vars, data, pstype, bsmtype, tpsFit, changePoint=NULL, npcdensFit1, npcdensFit2){
ss0 <- s0
if (!is.null(changePoint)){ ss0 <- ifelse(s0>changePoint, s0-changePoint, 0) }
# if any baseline covariates are specified
if (length(vars)>=1){
phat.s0 <- tpsPredict(tpsFit, cbind(1, ss0, matrix(lev[-1], nrow=length(ss0), ncol=length(lev[-1]), byrow=TRUE)))
lev1names <- names(which(lev==1))
predictLevels <- NULL
for (vi in 1:length(vars)){
varLevels <- levels(as.factor(data[,vars[vi]]))
predictLevelVar <- NULL
for (li in 2:length(varLevels)){
if (any(grepl(vars[vi], lev1names) & grepl(varLevels[li], lev1names))){
predictLevelVar <- varLevels[li]
break
}
}
if (is.null(predictLevelVar)){ predictLevelVar <- varLevels[1] }
predictLevels <- c(predictLevels, predictLevelVar)
}
dfVars <- as.data.frame(matrix(predictLevels, nrow=length(s0), ncol=length(predictLevels), byrow=TRUE))
colnames(dfVars) <- vars
if (pstype=="ordered"){ s1 <- ordered(s1) }
if (bsmtype=="ordered"){ s0 <- ordered(s0) }
fhat.s0 <- predict(npcdensFit1, newdata=data.frame(Sb=s0, dfVars, S=s1))
ghat.s0 <- predict(npcdensFit2, newdata=data.frame(dfVars, S=s0))
} else {
phat.s0 <- tpsPredict(tpsFit, cbind(1, ss0))
if (pstype=="ordered"){ s1 <- ordered(s1) }
if (bsmtype=="ordered"){ s0 <- ordered(s0) }
fhat.s0 <- predict(npcdensFit1, newdata=data.frame(Sb=s0, S=s1))
ghat.s0 <- predict(npcdensFit2, newdata=data.frame(S=s0))
}
return(phat.s0*fhat.s0*ghat.s0)
}
# 'hDen' returns function values at s0 of the integrand in the denominator of risk_{(0)}(s_1)
# s0 is a numeric vector, whereas s1 is a scalar
hDen <- function(s0, s1, lev, vars, data, pstype, bsmtype, npcdensFit1, npcdensFit2){
# if any baseline covariates are specified
if (length(vars)>=1){
lev1names <- names(which(lev==1))
predictLevels <- NULL
for (vi in 1:length(vars)){
varLevels <- levels(as.factor(data[,vars[vi]]))
predictLevelVar <- NULL
for (li in 2:length(varLevels)){
if (any(grepl(vars[vi], lev1names) & grepl(varLevels[li], lev1names))){
predictLevelVar <- varLevels[li]
break
}
}
if (is.null(predictLevelVar)){ predictLevelVar <- varLevels[1] }
predictLevels <- c(predictLevels, predictLevelVar)
}
dfVars <- as.data.frame(matrix(predictLevels, nrow=length(s0), ncol=length(predictLevels), byrow=TRUE))
colnames(dfVars) <- vars
fhat.s0 <- predict(npcdensFit1, newdata=data.frame(Sb=s0, dfVars, S=s1))
ghat.s0 <- predict(npcdensFit2, newdata=data.frame(dfVars, S=s0))
} else {
fhat.s0 <- predict(npcdensFit1, newdata=data.frame(Sb=s0, S=s1))
ghat.s0 <- predict(npcdensFit2, newdata=data.frame(S=s0))
}
return(fhat.s0*ghat.s0)
}
rowMatch <- function(x, y){
return(sum(abs(x-y))==0)
}
propX <- function(X, lev){
Xmatch <- apply(as.matrix(X), 1, rowMatch, y=lev)
return(mean(Xmatch))
}
# 'riskP' returns the value of P{Y(0)=1 | S(1)=s1}
# s1 is a scalar
# all baseline covariates are assumed to be discrete variables with a finite number of categories
# 'formula' is a one-sided formula of baseline covariates
riskP <- function(s1, formula, data, pstype, bsmtype, tpsFit, npcdensFit1, npcdensFit2, changePoint=NULL){
den <- num <- 0
UL <- 1.05 * max(data$S, na.rm=TRUE) # if integration over (0,Inf) fails, use (0,UL)
mf <- model.frame(formula, data)
X <- model.matrix(terms(formula), mf)
Xu <- unique(X)
vars <- all.vars(formula)
for (i in 1:NROW(Xu)){
lev <- drop(Xu[i,])
names(lev) <- colnames(Xu)
pX <- propX(X, lev)
# if both 'pstype' and 'bsmtype' are ordered categorical, replace numerical integration with simple summation because 'predict' on objects from 'npudens' and 'npcdens' assigns probability mass to values
# other than the discrete values taken on by the categorical variables (consequently, the sum of the probability mass would exceed 1)
if (pstype=="ordered" & bsmtype=="ordered"){
num <- num + pX*sum(hNum(s0=as.numeric(as.vector(sort(unique(data$S)))), s1=s1, lev=lev, vars=vars, data=data, pstype=pstype, bsmtype=bsmtype, tpsFit=tpsFit, changePoint=changePoint, npcdensFit1=npcdensFit1, npcdensFit2=npcdensFit2))
den <- den + pX*sum(hDen(s0=as.numeric(as.vector(sort(unique(data$S)))), s1=s1, lev=lev, vars=vars, data=data, pstype=pstype, bsmtype=bsmtype, npcdensFit1=npcdensFit1, npcdensFit2=npcdensFit2))
} else {
hNumInt <- try(integrate(hNum, 0, Inf, s1=s1, lev=lev, vars=vars, data=data, pstype=pstype, bsmtype=bsmtype, tpsFit=tpsFit, changePoint=changePoint, npcdensFit1=npcdensFit1, npcdensFit2=npcdensFit2, subdivisions=2000)$value, silent=TRUE)
if (inherits(hNumInt, 'try-error')){
hNumInt <- try(integrate(hNum, 0, UL, s1=s1, lev=lev, vars=vars, data=data, pstype=pstype, bsmtype=bsmtype, tpsFit=tpsFit, changePoint=changePoint, npcdensFit1=npcdensFit1, npcdensFit2=npcdensFit2, subdivisions=2000)$value, silent=TRUE)
if (inherits(hNumInt, 'try-error')){
hNumInt <- try(integrate(hNum, 0, UL, s1=s1, lev=lev, vars=vars, data=data, pstype=pstype, bsmtype=bsmtype, tpsFit=tpsFit, changePoint=changePoint, npcdensFit1=npcdensFit1, npcdensFit2=npcdensFit2, subdivisions=2000, rel.tol=30*.Machine$double.eps^0.25)$value, silent=TRUE)
}
}
if (inherits(hNumInt, 'try-error')){
return(NA)
} else {
num <- num + pX*hNumInt
}
hDenInt <- try(integrate(hDen, 0, UL, s1=s1, lev=lev, vars=vars, data=data, pstype=pstype, bsmtype=bsmtype, npcdensFit1=npcdensFit1, npcdensFit2=npcdensFit2, subdivisions=2000, rel.tol=30*.Machine$double.eps^0.25)$value, silent=TRUE)
if (inherits(hDenInt, 'try-error')){
hDenInt <- try(integrate(hDen, 0, UL, s1=s1, lev=lev, vars=vars, data=data, pstype=pstype, bsmtype=bsmtype, npcdensFit1=npcdensFit1, npcdensFit2=npcdensFit2, subdivisions=2000, rel.tol=.Machine$double.eps^0.05)$value, silent=TRUE)
}
if (inherits(hDenInt, 'try-error')){
return(NA)
} else {
den <- den + pX*hDenInt
}
}
}
return(num/den)
}
# 'riskV' returns the value of P{Y(1)=1 | S(1)=s1}
# s1 is a scalar
riskV <- function(s1, data, data2, changePoint=NULL){
nTControls <- NROW(subset(data, Z==1 & Y==0))
nTCases <- NROW(subset(data, Z==1 & Y==1))
tpsFormula <- paste0("Y~",ifelse(is.null(changePoint),"S","Sc"))
fit <- tps(tpsFormula, data=subset(data2, Z==1 & !is.na(Y)), nn0=nTControls, nn1=nTCases, group=rep(1, NROW(subset(data2, Z==1 & !is.na(Y)))), method="PL", cohort=TRUE)
if (!is.null(changePoint)){ s1 <- ifelse(s1>changePoint,s1-changePoint,0) }
return(tpsPredict(fit, cbind(1, s1)))
}
# 'risk' returns the estimates of risk in each study group for a given s1
# s1 is a scalar
risk <- function(s1, formula, data, data2, pstype, bsmtype, tpsFit, npcdensFit1, npcdensFit2, changePoint=NULL){
risk1 <- riskV(s1, data, data2, changePoint)
risk0 <- riskP(s1, formula, data, pstype, bsmtype, tpsFit, npcdensFit1, npcdensFit2, changePoint)
return(list(plaRisk=risk0, txRisk=risk1))
}
#' Bootstrap Estimation of Conditional Clinical Endpoint Risk under Placebo and Treatment Given Biomarker Response to Treatment in a Baseline Surrogate Measure
#' Three-Phase Sampling Design
#'
#' Estimates \eqn{P\{Y(z)=1|S(1)=s_1\}}, \eqn{z=0,1}, on a grid of \eqn{s_1} values in bootstrap resamples (see \code{\link{riskCurve}} for notation introduction). Cases
#' (\eqn{Y=1}) and controls (\eqn{Y=0}) are sampled separately yielding a fixed number of cases and controls in each bootstrap sample. Consequentially, the number of controls
#' with available phase 2 data varies across bootstrap samples.
#'
#' @param formula a formula object with the binary clinical endpoint on the left of the \code{~} operator. The first listed variable on the right must be the biomarker response
#' at \eqn{t0} and all variables that follow, if any, are discrete baseline covariates specified in all fitted models that condition on them. Interactions and transformations
#' of the baseline covariates are allowed. All terms in the formula must be evaluable in the data frame \code{data}.
#' @param bsm a character string specifying the variable name in \code{data} representing the baseline surrogate measure
#' @param tx a character string specifying the variable name in \code{data} representing the treatment group indicator
#' @param data a data frame with one row per randomized participant endpoint-free at \eqn{t_0} that contains at least the variables specified in \code{formula}, \code{bsm} and
#' \code{tx}. Values of \code{bsm} and the biomarker at \eqn{t_0} that are unavailable are represented as \code{NA}.
#' @param pstype a character string specifying whether the biomarker response shall be treated as a \code{continuous} (default) or \code{ordered} categorical variable in the
#' kernel density/probability estimation
#' @param bsmtype a character string specifying whether the baseline surrogate measure shall be treated as a \code{continuous} (default) or \code{ordered} categorical variable in the
#' kernel density/probability estimation
#' @param bwtype a character string specifying the bandwidth type for continuous variables in the kernel density estimation. The options are \code{fixed} (default) for fixed
#' bandwidths, \code{generalized_nn} for generalized nearest neighbors, and \code{adaptive_nn} for adaptive nearest neighbors. As noted in the documentation of the function
#' \code{npcdensbw} in the \code{np} package: "Adaptive nearest-neighbor bandwidths change with each sample realization in the set when estimating the density at the point \eqn{x}.
#' Generalized nearest-neighbor bandwidths change with the point at which the density is estimated, \eqn{x}. Fixed bandwidths are constant over the support of \eqn{x}."
#' @param hinge a logical value (\code{FALSE} by default) indicating whether a hinge model (Fong et al., 2017) shall be used for modeling the effect of \eqn{S(z)} on the
#' clinical endpoint risk. A hinge model specifies that variability in \eqn{S(z)} below the hinge point does not associate with the clinical endpoint risk. The hinge point
#' is reestimated in each bootstrap sample.
#' @param weights either a numeric vector of weights or a character string specifying the variable name in \code{data} representing weights applied to observations
#' in the phase 2 subset in order to make inference about the target population of all randomized participants endpoint-free at \eqn{t_0}. The weights reflect that
#' the case:control ratio in the phase 2 subset is different from that in the target population and are passed on to GLMs in the estimation of the hinge point.
#' If \code{NULL} (default and recommended), weights for cases and controls are recalculated separately in each study group \emph{within each bootstrap sample}; otherwise the
#' same specified vector of weights is used in each bootstrap sample.
#' @param psGrid a numeric vector of \eqn{S(1)} values at which the conditional clinical endpoint risk in each study group is estimated. If \code{NULL} (default),
#' a grid of values spanning the range of observed values of the biomarker will be used.
#' @param iter the number of bootstrap iterations
#' @param seed a seed of the random number generator supplied to \code{set.seed} for reproducibility
#' @param saveFile a character string specifying the name of an \code{.RData} file storing the output list. If \code{NULL} (default), the output list will only be returned.
#' @param saveDir a character string specifying a path for the output directory. If \code{NULL} (default), the output list will only be returned; otherwise, if
#' \code{saveFile} is specified, the output list will also be saved as an \code{.RData} file in the specified directory.
#'
#' @return If \code{saveFile} and \code{saveDir} are both specified, the output list (named \code{bList}) is saved as an \code{.RData} file; otherwise it is returned only.
#' The output object is a list with the following components:
#' \itemize{
#' \item \code{psGrid}: a numeric vector of \eqn{S(1)} values at which the conditional clinical endpoint risk is estimated in the components \code{plaRiskCurveBoot} and
#' \code{txRiskCurveBoot}
#' \item \code{plaRiskCurveBoot}: a \code{length(psGrid)}-by-\code{iter} matrix of estimates of \eqn{P\{Y(0)=1|S(1)=s_1\}} for \eqn{s_1} in \code{psGrid},
#' with columns representing bootstrap samples
#' \item \code{txRiskCurveBoot}: a \code{length(psGrid)}-by-\code{iter} matrix of estimates of \eqn{P\{Y(1)=1|S(1)=s_1\}} for \eqn{s_1} in \code{psGrid},
#' with columns representing bootstrap samples
#' \item \code{cpointPboot}: if \code{hinge=TRUE}, a numeric vector of estimates of the hinge point in the placebo group in each bootstrap sample
#' \item \code{cpointTboot}: if \code{hinge=TRUE}, a numeric vector of estimates of the hinge point in the treatment group in each bootstrap sample
#' }
#'
#' @references Fong, Y., Huang, Y., Gilbert, P. B., and Permar, S. R. (2017), chngpt: threshold regression model estimation and inference, \emph{BMC Bioinformatics}, 18.
#'
#' @examples
#' n <- 500
#' Z <- rep(0:1, each=n/2)
#' S <- MASS::mvrnorm(n, mu=c(2,2,3), Sigma=matrix(c(1,0.9,0.7,0.9,1,0.7,0.7,0.7,1), nrow=3))
#' p <- pnorm(drop(cbind(1,Z,(1-Z)*S[,2],Z*S[,3]) %*% c(-1.2,0.2,-0.02,-0.2)))
#' Y <- sapply(p, function(risk){ rbinom(1,1,risk) })
#' X <- rbinom(n,1,0.5)
#' # delete S(1) in placebo recipients
#' S[Z==0,3] <- NA
#' # delete S(0) in treatment recipients
#' S[Z==1,2] <- NA
#' # generate the indicator of being sampled into the phase 2 subset
#' phase2 <- rbinom(n,1,0.4)
#' # delete Sb, S(0) and S(1) in controls not included in the phase 2 subset
#' S[Y==0 & phase2==0,] <- c(NA,NA,NA)
#' # delete Sb in cases not included in the phase 2 subset
#' S[Y==1 & phase2==0,1] <- NA
#' data <- data.frame(X,Z,S[,1],ifelse(Z==0,S[,2],S[,3]),Y)
#' colnames(data) <- c("X","Z","Sb","S","Y")
#' qS <- quantile(data$S, probs=c(0.05,0.95), na.rm=TRUE)
#' grid <- seq(qS[1], qS[2], length.out=3)
#'
#' out <- bootRiskCurve(formula=Y ~ S + factor(X), bsm="Sb", tx="Z", data=data,
#' psGrid=grid, iter=1, seed=10)
#' \donttest{
#' # alternatively, to save the .RData output file (no '<-' needed):
#' bootRiskCurve(formula=Y ~ S + factor(X), bsm="Sb", tx="Z", data=data,
#' psGrid=grid, iter=1, seed=10, saveFile="out.RData", saveDir="./")
#' }
#'
#' @seealso \code{\link{riskCurve}}, \code{\link{summary.riskCurve}} and \code{\link{plotMCEPcurve}}
#' @export
bootRiskCurve <- function(formula, bsm, tx, data, pstype=c("continuous", "ordered"), bsmtype=c("continuous", "ordered"), bwtype=c("fixed", "generalized_nn", "adaptive_nn"),
hinge=FALSE, weights=NULL, psGrid=NULL, iter, seed=NULL, saveFile=NULL, saveDir=NULL){
if (missing(bsm)){ stop("The variable name in argument 'bsm' for the baseline surrogate measure is missing.") }
if (missing(tx)){ stop("The variable name in argument 'tx' for the treatment group indicator is missing.") }
if (missing(data)){ stop("The data frame 'data' for interpreting the variables in 'formula' is missing.") }
if (missing(iter)){ stop("The number of bootstrap iterations in argument 'iter' is missing.") }
pstype <- match.arg(pstype)
bsmtype <- match.arg(bsmtype)
bwtype <- match.arg(bwtype)
if (!is.null(weights)){
if (is.character(weights)){
colnames(data)[colnames(data)==weights] <- "weights"
} else {
# assuming 'weights' is a numeric vector
data$weights <- weights
}
}
formulaDecomp <- strsplit(strsplit(paste(deparse(formula), collapse = ""), " *[~] *")[[1]], " *[+] *")
anyBaselineCovar <- length(formulaDecomp[[2]])>1
# standardize the variable name for the treatment group indicator
colnames(data)[colnames(data)==tx] <- "Z"
# standardize the variable name for the biomarker measurement at fixed time t0 post-randomization
# this line assumes that it is the first listed variable on the RHS of 'formula'
colnames(data)[colnames(data)==formulaDecomp[[2]][1]] <- "S"
# standardize the variable name for the biomarker's baseline measurement
colnames(data)[colnames(data)==bsm] <- "Sb"
# standardize the variable name for the binary clinical endpoint measured after t0
colnames(data)[colnames(data)==formulaDecomp[[1]]] <- "Y"
# extract the subset with available phase 2 data (i.e., with measured S)
data2 <- subset(data, !is.na(S))
# case deletion method applied to 'data2' to match the case:control ratio in 'data', separately in each study group,
# in order to be able to use the standard kernel density estimation procedure
dataControls <- subset(data, Y==0)
nControls <- NROW(dataControls)
nPControls <- NROW(subset(dataControls, Z==0))
nTControls <- NROW(subset(dataControls, Z==1))
nPControls2 <- NROW(dataPControls2 <- subset(data2, Z==0 & Y==0))
nTControls2 <- NROW(dataTControls2 <- subset(data2, Z==1 & Y==0))
dataCases <- subset(data, Y==1)
nCases <- NROW(dataCases)
nPCases <- NROW(subset(dataCases, Z==0))
nTCases <- NROW(subset(dataCases, Z==1))
nPCases2 <- NROW(dataPCases2 <- subset(data2, Z==0 & Y==1))
nTCases2 <- NROW(dataTCases2 <- subset(data2, Z==1 & Y==1))
# in each study group separately, calculate the required number of cases in 'data2' to match the case:control ratio in 'data'
nPCases2new <- nPCases * nPControls2 / nPControls
nTCases2new <- nTCases * nTControls2 / nTControls
# in each study group separately, randomly sample cases to match the case:control ratio in 'data'
dataP2correctRatio <- rbind(dataPControls2, dataPCases2[sample(1:nPCases2, max(1,round(nPCases2new,0))),])
dataT2correctRatio <- rbind(dataTControls2, dataTCases2[sample(1:nTCases2, max(1,round(nTCases2new,0))),])
rm(dataPControls2); rm(dataPCases2); rm(dataTControls2); rm(dataTCases2)
# estimate optimal bandwidths for kernel estimates of the conditional (or unconditional) densities
Sterm <- "S"
if (pstype=="ordered"){ Sterm <- "ordered(S)"}
Sbterm <- "Sb"
if (bsmtype=="ordered"){ Sbterm <- "ordered(Sb)"}
if (anyBaselineCovar){
fm.fbw <- as.formula(paste0(Sterm," ~ ",paste(c(Sbterm,formulaDecomp[[2]][-1]),collapse="+")))
fm.gbw <- as.formula(paste0(Sterm," ~ ",paste(formulaDecomp[[2]][-1],collapse="+")))
gbw <- npcdensbw(fm.gbw, data=dataP2correctRatio, cxkertype="epanechnikov", cykertype="epanechnikov", bwtype=bwtype)
} else {
fm.fbw <- as.formula(paste0(Sterm," ~ ",Sbterm))
fm.gbw <- as.formula(paste0("~ ",Sterm))
# marginal density
gbw <- npudensbw(fm.gbw, data=dataP2correctRatio, ckertype="epanechnikov", bwtype=bwtype)
}
fbw <- npcdensbw(fm.fbw, data=dataT2correctRatio, cxkertype="epanechnikov", cykertype="epanechnikov", bwtype=bwtype)
if(!is.null(seed)){ set.seed(seed) }
bSampleControls <- matrix(sample(1:nControls, nControls*iter, replace=TRUE), nrow=nControls, ncol=iter)
bSampleCases <- matrix(sample(1:nCases, nCases*iter, replace=TRUE), nrow=nCases, ncol=iter)
# a grid of values of S on which the bootstrapped risk curves are returned
if (is.null(psGrid)){
psGrid <- seq(min(data2$S), max(data2$S), length.out=200)
}
rm(data2)
bRiskCurveList <- lapply(1:iter, function(i, formulaDecomp, Sterm, Sbterm){
# create a bootstrap sample
bdata <- rbind(dataControls[bSampleControls[,i],], dataCases[bSampleCases[,i],])
# extract the bootstrap subset with phase 2 data
bdata2 <- subset(bdata, !is.na(S))
# estimate the hinge point in each bootstrap iteration
if (hinge){
# recalculate weights in each bootstrap iteration for passing on to 'chngptm'
if (is.null(weights)){
bdata2$weights <- NA
bdata2$weights <- ifelse(bdata2$Z==0 & bdata2$Y==0, nPControls/nPControls2, bdata2$weights)
bdata2$weights <- ifelse(bdata2$Z==1 & bdata2$Y==0, nTControls/nTControls2, bdata2$weights)
bdata2$weights <- ifelse(bdata2$Z==0 & bdata2$Y==1, nPCases/nPCases2, bdata2$weights)
bdata2$weights <- ifelse(bdata2$Z==1 & bdata2$Y==1, nTCases/nTCases2, bdata2$weights)
}
# in each study group separately, estimate the hinge point for the association of S with Y
if (anyBaselineCovar){
fm <- as.formula(paste0("Y ~ ",paste(formulaDecomp[[2]][-1],collapse="+")))
} else {
fm <- Y ~ 1
}
cpointP <- chngptm(formula.1=fm, formula.2=~S, data=subset(bdata2, Z==0 & !is.na(Y)), family="binomial", type="hinge", weights=subset(bdata2, Z==0 & !is.na(Y))$weights)$coefficients["chngpt"]
cpointT <- chngptm(formula.1=Y ~ 1, formula.2=~S, data=subset(bdata2, Z==1 & !is.na(Y)), family="binomial", type="hinge", weights=subset(bdata2, Z==1 & !is.na(Y))$weights)$coefficients["chngpt"]
# use their minimum as the hinge point in the below specified GLMs
cpoint <- min(cpointP, cpointT)
# biomarker S left-censored at 'cpoint' for use in the below specified GLMs
bdata$Sc <- with(bdata, ifelse(S>cpoint, S-cpoint, 0))
# re-extract the subset with phase 2 data
bdata2 <- subset(bdata, !is.na(S))
# IPW logistic regression model fitted to placebo recipients in the phase 2 subset accounting for two-phase sampling of S
if (anyBaselineCovar){
fm <- as.formula(paste0("Y ~ ",paste(c("Sc",formulaDecomp[[2]][-1]),collapse="+")))
} else {
fm <- Y ~ Sc
}
fit1 <- tps(fm, data=subset(bdata2, Z==0 & !is.na(Y)), nn0=NROW(subset(bdata, Z==0 & Y==0)), nn1=NROW(subset(bdata, Z==0 & Y==1)), group=rep(1, NROW(subset(bdata2, Z==0 & !is.na(Y)))), method="PL", cohort=TRUE)
} else {
# for passing on to the function 'risk'
cpoint <- NULL
# IPW logistic regression model fitted to placebo recipients in the phase 2 subset accounting for two-phase sampling of S
if (anyBaselineCovar){
fm <- as.formula(paste0("Y ~ ",paste(c("S",formulaDecomp[[2]][-1]),collapse="+")))
} else {
fm <- Y ~ S
}
fit1 <- tps(fm, data=subset(bdata2, Z==0 & !is.na(Y)), nn0=NROW(subset(bdata, Z==0 & Y==0)), nn1=NROW(subset(bdata, Z==0 & Y==1)), group=rep(1, NROW(subset(bdata2, Z==0 & !is.na(Y)))), method="PL", cohort=TRUE)
}
bdataControls <- subset(bdata, Y==0)
nPControls2 <- NROW(bdataPControls2 <- subset(bdata2, Z==0 & Y==0))
nTControls2 <- NROW(bdataTControls2 <- subset(bdata2, Z==1 & Y==0))
bdataCases <- subset(bdata, Y==1)
nPCases2 <- NROW(bdataPCases2 <- subset(bdata2, Z==0 & Y==1))
nTCases2 <- NROW(bdataTCases2 <- subset(bdata2, Z==1 & Y==1))
nPControls <- NROW(subset(bdataControls, Z==0))
nTControls <- NROW(subset(bdataControls, Z==1))
nPCases <- NROW(subset(bdataCases, Z==0))
nTCases <- NROW(subset(bdataCases, Z==1))
# in each study group separately, recalculate the required number of cases in 'bdata2' to match the case:control ratio in 'bdata'
nPCases2new <- nPCases * nPControls2 / nPControls
nTCases2new <- nTCases * nTControls2 / nTControls
# in each study group separately, randomly sample cases to match the case:control ratio in 'bdata'
bdataP2correctRatio <- rbind(bdataPControls2, bdataPCases2[sample(1:nPCases2, max(1,round(nPCases2new,0))),])
bdataT2correctRatio <- rbind(bdataTControls2, bdataTCases2[sample(1:nTCases2, max(1,round(nTCases2new,0))),])
rm(bdataPControls2); rm(bdataPCases2); rm(bdataTControls2); rm(bdataTCases2)
# ensure that all factor levels of S and Sb in the data used for computing 'fbw' and 'gbw' are included in the data used for computing 'bfbw' and 'bgbw'
if (Sterm=="ordered(S)"){
bdataP2correctRatio$S <- factor(bdataP2correctRatio$S, levels=levels(ordered(dataP2correctRatio$S)), ordered=TRUE)
bdataT2correctRatio$S <- factor(bdataT2correctRatio$S, levels=levels(ordered(dataT2correctRatio$S)), ordered=TRUE)
}
if (Sbterm=="ordered(Sb)"){
bdataT2correctRatio$Sb <- factor(bdataT2correctRatio$Sb, levels=levels(ordered(dataT2correctRatio$Sb)), ordered=TRUE)
}
# kernel density estimator for g(s0|X=x) using the placebo group in the phase 2 subset
if (anyBaselineCovar){
# the formulas are reintroduced in the 'lapply' so that 'npcdensbw' and 'npudensbw' are able to evaluate them in the parent.frame() environment
fm.fbw <- as.formula(paste0(Sterm," ~ ",paste(c(Sbterm,formulaDecomp[[2]][-1]),collapse="+")))
fm.gbw <- as.formula(paste0(Sterm," ~ ",paste(formulaDecomp[[2]][-1],collapse="+")))
bgbw <- npcdensbw(fm.gbw, data=bdataP2correctRatio, bws=gbw, bandwidth.compute=FALSE)
ghat <- npcdens(bgbw)
} else {
fm.fbw <- as.formula(paste0(Sterm," ~ ",Sbterm))
fm.gbw <- as.formula(paste0("~ ",Sterm))
# marginal density
bgbw <- npudensbw(fm.gbw, data=bdataP2correctRatio, bws=gbw, bandwidth.compute=FALSE)
ghat <- npudens(bgbw)
}
# kernel density estimator for f(s1|Sb=s0, X=x) using the treatment group in the phase 2 subset
bfbw <- npcdensbw(fm.fbw, data=bdataT2correctRatio, bws=fbw, bandwidth.compute=FALSE)
fhat <- npcdens(bfbw)
# the first argument of 'risk' is a scalar
if (anyBaselineCovar){
fm <- as.formula(paste0("~",paste(formulaDecomp[[2]][-1],collapse="+")))
} else {
fm <- ~ 1
}
curves <- lapply(psGrid, function(s){ risk(s, fm, bdata, bdata2, pstype, bsmtype, fit1, fhat, ghat, cpoint) })
plaRiskCurve <- sapply(curves, "[[", "plaRisk")
txRiskCurve <- sapply(curves, "[[", "txRisk")
# the output list
out <- list(plaRiskCurve=plaRiskCurve, txRiskCurve=txRiskCurve)
if (hinge){
out$cpointP <- cpointP
out$cpointT <- cpointT
}
return(out)
}, formulaDecomp=formulaDecomp, Sterm=Sterm, Sbterm=Sbterm)
# cbind all bootstrap risk curves
plaRiskCurveBoot <- sapply(bRiskCurveList,"[[","plaRiskCurve")
txRiskCurveBoot <- sapply(bRiskCurveList,"[[","txRiskCurve")
# the output list
bList <- list(psGrid=psGrid, plaRiskCurveBoot=plaRiskCurveBoot, txRiskCurveBoot=txRiskCurveBoot)
if (hinge){
bList$cpointPboot <- sapply(bRiskCurveList,"[[","cpointP")
bList$cpointTboot <- sapply(bRiskCurveList,"[[","cpointT")
}
if (!is.null(saveFile)){
save(bList, file=file.path(saveDir, saveFile))
cat("Output saved in:\n",file.path(saveDir, saveFile),"\n")
}
return(invisible(bList))
}
#' Estimation of Conditional Clinical Endpoint Risk under Placebo and Treatment Given Biomarker Response to Treatment in a Baseline Surrogate Measure Three-Phase Sampling Design
#'
#' Estimates \eqn{P\{Y(z)=1|S(1)=s_1\}}, \eqn{z=0,1}, on a grid of \eqn{s_1} values following the estimation method of Juraska, Huang, and Gilbert (2018), where \eqn{Z} is the
#' treatment group indicator (\eqn{Z=1}, treatment; \eqn{Z=0}, placebo), \eqn{S(z)} is a continuous or ordered categorical univariate biomarker under assignment to \eqn{Z=z}
#' measured at fixed time \eqn{t_0} after randomization, and \eqn{Y} is a binary clinical endpoint (\eqn{Y=1}, disease; \eqn{Y=0}, no disease) measured after \eqn{t_0}. The
#' estimator employs the generalized product kernel density/probability estimation method of Hall, Racine, and Li (2004) implemented in the \code{np} package. The risks
#' \eqn{P\{Y(z)=1|S(z)=s_1,X=x\}}, \eqn{z=0,1}, where \eqn{X} is a vector of discrete baseline covariates, are estimated by fitting inverse probability-weighted logistic regression
#' models using the \code{osDesign} package.
#'
#' @param formula a formula object with the binary clinical endpoint on the left of the \code{~} operator. The first listed variable on the right must be the biomarker response
#' at \eqn{t0} and all variables that follow, if any, are discrete baseline covariates specified in all fitted models that condition on them. Interactions and transformations
#' of the baseline covariates are allowed. All terms in the formula must be evaluable in the data frame \code{data}.
#' @param bsm a character string specifying the variable name in \code{data} representing the baseline surrogate measure
#' @param tx a character string specifying the variable name in \code{data} representing the treatment group indicator
#' @param data a data frame with one row per randomized participant endpoint-free at \eqn{t_0} that contains at least the variables specified in \code{formula}, \code{bsm} and
#' \code{tx}. Values of \code{bsm} and the biomarker at \eqn{t_0} that are unavailable are represented as \code{NA}.
#' @param pstype a character string specifying whether the biomarker response shall be treated as a \code{continuous} (default) or \code{ordered} categorical variable in the
#' kernel density/probability estimation
#' @param bsmtype a character string specifying whether the baseline surrogate measure shall be treated as a \code{continuous} (default) or \code{ordered} categorical variable in the
#' kernel density/probability estimation
#' @param bwtype a character string specifying the bandwidth type for continuous variables in the kernel density estimation. The options are \code{fixed} (default) for fixed
#' bandwidths, \code{generalized_nn} for generalized nearest neighbors, and \code{adaptive_nn} for adaptive nearest neighbors. As noted in the documentation of the function
#' \code{npcdensbw} in the \code{np} package: "Adaptive nearest-neighbor bandwidths change with each sample realization in the set when estimating the density at the point \eqn{x}.
#' Generalized nearest-neighbor bandwidths change with the point at which the density is estimated, \eqn{x}. Fixed bandwidths are constant over the support of \eqn{x}."
#' @param hinge a logical value (\code{FALSE} by default) indicating whether a hinge model (Fong et al., 2017) shall be used for modeling the effect of \eqn{S(z)} on the
#' clinical endpoint risk. A hinge model specifies that variability in \eqn{S(z)} below the hinge point does not associate with the clinical endpoint risk.
#' @param weights either a numeric vector of weights or a character string specifying the variable name in \code{data} representing weights applied to observations
#' in the phase 2 subset in order to make inference about the target population of all randomized participants endpoint-free at \eqn{t_0}. The weights reflect that
#' the case:control ratio in the phase 2 subset is different from that in the target population and are passed on to GLMs in the estimation of the hinge point.
#' If \code{NULL} (default), weights for cases and controls are calculated separately in each study group.
#' @param psGrid a numeric vector of \eqn{S(1)} values at which the conditional clinical endpoint risk in each study group is estimated. If \code{NULL} (default),
#' a grid of values spanning the range of observed values of the biomarker will be used.
#' @param saveFile a character string specifying the name of an \code{.RData} file storing the output list. If \code{NULL} (default), the output list will only be returned.
#' @param saveDir a character string specifying a path for the output directory. If \code{NULL} (default), the output list will only be returned; otherwise, if
#' \code{saveFile} is specified, the output list will also be saved as an \code{.RData} file in the specified directory.
#'
#' @return If \code{saveFile} and \code{saveDir} are both specified, the output list (named \code{oList}) is saved as an \code{.RData} file; otherwise it is returned only.
#' The output object (of class \code{riskCurve}) is a list with the following components:
#' \itemize{
#' \item \code{psGrid}: a numeric vector of \eqn{S(1)} values at which the conditional clinical endpoint risk is estimated in the components \code{plaRiskCurve} and
#' \code{txRiskCurve}
#' \item \code{plaRiskCurve}: a numeric vector of estimates of \eqn{P\{Y(0)=1|S(1)=s_1\}} for \eqn{s_1} in \code{psGrid}
#' \item \code{txRiskCurve}: a numeric vector of estimates of \eqn{P\{Y(1)=1|S(1)=s_1\}} for \eqn{s_1} in \code{psGrid}
#' \item \code{fOptBandwidths}: a \code{conbandwidth} object returned by the call of the function \code{npcdensbw} containing the optimal bandwidths, selected by likelihood
#' cross-validation, in the kernel estimation of the conditional density of \eqn{S(1)} given the baseline surrogate measure and any other specified baseline covariates
#' \item \code{gOptBandwidths}: a \code{conbandwidth} object returned by the call of the function \code{npcdensbw} or \code{npudensbw} containing the optimal bandwidths,
#' selected by likelihood cross-validation, in the kernel estimation of the conditional density of \eqn{S(0)} given any specified baseline covariates or the marginal density
#' of \eqn{S(0)} if no baseline covariates are specified in \code{formula}
#' \item \code{cpointP}: if \code{hinge=TRUE}, the estimate of the hinge point in the placebo group
#' \item \code{cpointT}: if \code{hinge=TRUE}, the estimate of the hinge point in the treatment group
#' }
#'
#' @references Fong, Y., Huang, Y., Gilbert, P. B., and Permar, S. R. (2017), chngpt: threshold regression model estimation and inference, \emph{BMC Bioinformatics}, 18.
#'
#' Hall, P., Racine, J., and Li, Q. (2004), Cross-validation and the estimation of conditional probability densities, \emph{JASA} 99(468), 1015-1026.
#'
#' Juraska, M., Huang, Y., and Gilbert, P. B. (2020), Inference on treatment effect modification by biomarker response in a three-phase sampling design, Biostatistics, 21(3): 545-560, \url{https://doi.org/10.1093/biostatistics/kxy074}.
#'
#' @examples
#' n <- 500
#' Z <- rep(0:1, each=n/2)
#' S <- MASS::mvrnorm(n, mu=c(2,2,3), Sigma=matrix(c(1,0.9,0.7,0.9,1,0.7,0.7,0.7,1), nrow=3))
#' p <- pnorm(drop(cbind(1,Z,(1-Z)*S[,2],Z*S[,3]) %*% c(-1.2,0.2,-0.02,-0.2)))
#' Y <- sapply(p, function(risk){ rbinom(1,1,risk) })
#' X <- rbinom(n,1,0.5)
#' # delete S(1) in placebo recipients
#' S[Z==0,3] <- NA
#' # delete S(0) in treatment recipients
#' S[Z==1,2] <- NA
#' # generate the indicator of being sampled into the phase 2 subset
#' phase2 <- rbinom(n,1,0.4)
#' # delete Sb, S(0) and S(1) in controls not included in the phase 2 subset
#' S[Y==0 & phase2==0,] <- c(NA,NA,NA)
#' # delete Sb in cases not included in the phase 2 subset
#' S[Y==1 & phase2==0,1] <- NA
#' data <- data.frame(X,Z,S[,1],ifelse(Z==0,S[,2],S[,3]),Y)
#' colnames(data) <- c("X","Z","Sb","S","Y")
#' qS <- quantile(data$S, probs=c(0.05,0.95), na.rm=TRUE)
#' grid <- seq(qS[1], qS[2], length.out=3)
#'
#' out <- riskCurve(formula=Y ~ S + factor(X), bsm="Sb", tx="Z", data=data, psGrid=grid)
#' \donttest{
#' # alternatively, to save the .RData output file (no '<-' needed):
#' riskCurve(formula=Y ~ S + factor(X), bsm="Sb", tx="Z", data=data, saveFile="out.RData",
#' saveDir="./")
#' }
#'
#' @seealso \code{\link{bootRiskCurve}}, \code{\link{summary.riskCurve}} and \code{\link{plotMCEPcurve}}
#' @export
riskCurve <- function(formula, bsm, tx, data, pstype=c("continuous", "ordered"), bsmtype=c("continuous", "ordered"), bwtype=c("fixed", "generalized_nn", "adaptive_nn"),
hinge=FALSE, weights=NULL, psGrid=NULL, saveFile=NULL, saveDir=NULL){
if (missing(bsm)){ stop("The variable name in argument 'bsm' for the baseline surrogate measure is missing.") }
if (missing(tx)){ stop("The variable name in argument 'tx' for the treatment group indicator is missing.") }
if (missing(data)){ stop("The data frame 'data' for interpreting the variables in 'formula' is missing.") }
pstype <- match.arg(pstype)
bsmtype <- match.arg(bsmtype)
bwtype <- match.arg(bwtype)
if (!is.null(weights)){
if (is.character(weights)){
colnames(data)[colnames(data)==weights] <- "weights"
} else {
# assuming 'weights' is a numeric vector
data$weights <- weights
}
}
formulaDecomp <- strsplit(strsplit(paste(deparse(formula), collapse = ""), " *[~] *")[[1]], " *[+] *")
anyBaselineCovar <- length(formulaDecomp[[2]])>1
# convert any baseline covariates into factors
if (anyBaselineCovar){
# 'vars' is of length >= 3
vars <- all.vars(formula)[-(1:2)]
for (i in 1:length(vars)){
data[,vars[i]] <- as.factor(data[,vars[i]])
}
}
# standardize the variable name for the treatment group indicator
colnames(data)[colnames(data)==tx] <- "Z"
# standardize the variable name for the biomarker measurement at fixed time t0 post-randomization
# this line assumes that it is the first listed variable on the RHS of 'formula'
colnames(data)[colnames(data)==formulaDecomp[[2]][1]] <- "S"
# standardize the variable name for the biomarker's baseline measurement
colnames(data)[colnames(data)==bsm] <- "Sb"
# standardize the variable name for the binary clinical endpoint measured after t0
colnames(data)[colnames(data)==formulaDecomp[[1]]] <- "Y"
# extract the subset with available phase 2 data (i.e., with measured S)
data2 <- subset(data, !is.na(S))
# case deletion method applied to 'data2' to match the case:control ratio in 'data', separately in each study group,
# in order to be able to use the standard kernel density estimation procedure
dataControls <- subset(data, Y==0)
nPControls <- NROW(subset(dataControls, Z==0))
nTControls <- NROW(subset(dataControls, Z==1))
nPControls2 <- NROW(dataPControls2 <- subset(data2, Z==0 & Y==0))
nTControls2 <- NROW(dataTControls2 <- subset(data2, Z==1 & Y==0))
dataCases <- subset(data, Y==1)
nPCases <- NROW(subset(dataCases, Z==0))
nTCases <- NROW(subset(dataCases, Z==1))
nPCases2 <- NROW(dataPCases2 <- subset(data2, Z==0 & Y==1))
nTCases2 <- NROW(dataTCases2 <- subset(data2, Z==1 & Y==1))
# in each study group separately, calculate the required number of cases in 'data2' to match the case:control ratio in 'data'
nPCases2new <- nPCases * nPControls2 / nPControls
nTCases2new <- nTCases * nTControls2 / nTControls
# in each study group separately, randomly sample cases to match the case:control ratio in 'data'
dataP2correctRatio <- rbind(dataPControls2, dataPCases2[sample(1:nPCases2, max(1,round(nPCases2new,0))),])
dataT2correctRatio <- rbind(dataTControls2, dataTCases2[sample(1:nTCases2, max(1,round(nTCases2new,0))),])
rm(dataPControls2); rm(dataPCases2); rm(dataTControls2); rm(dataTCases2)
# estimate optimal bandwidths for kernel estimates of the conditional (or unconditional) densities
Sterm <- "S"
if (pstype=="ordered"){ Sterm <- "ordered(S)"}
Sbterm <- "Sb"
if (bsmtype=="ordered"){ Sbterm <- "ordered(Sb)"}
if (anyBaselineCovar){
fm.fbw <- as.formula(paste0(Sterm," ~ ",paste(c(Sbterm,formulaDecomp[[2]][-1]),collapse="+")))
fm.gbw <- as.formula(paste0(Sterm," ~ ",paste(formulaDecomp[[2]][-1],collapse="+")))
gbw <- npcdensbw(fm.gbw, data=dataP2correctRatio, cxkertype="epanechnikov", cykertype="epanechnikov", bwtype=bwtype)
} else {
fm.fbw <- as.formula(paste0(Sterm," ~ ",Sbterm))
fm.gbw <- as.formula(paste0("~ ",Sterm))
# marginal density
gbw <- npudensbw(fm.gbw, data=dataP2correctRatio, ckertype="epanechnikov", bwtype=bwtype)
}
fbw <- npcdensbw(fm.fbw, data=dataT2correctRatio, cxkertype="epanechnikov", cykertype="epanechnikov", bwtype=bwtype)
if (hinge){
# calculate weights for passing on to 'chngptm'
if (is.null(weights) & !("weights" %in% colnames(data))){
data2$weights <- NA
data2$weights <- ifelse(data2$Z==0 & data2$Y==0, nPControls/nPControls2, data2$weights)
data2$weights <- ifelse(data2$Z==1 & data2$Y==0, nTControls/nTControls2, data2$weights)
data2$weights <- ifelse(data2$Z==0 & data2$Y==1, nPCases/nPCases2, data2$weights)
data2$weights <- ifelse(data2$Z==1 & data2$Y==1, nTCases/nTCases2, data2$weights)
}
# in each study group separately, estimate the hinge point for the association of S with Y
if (anyBaselineCovar){
fm <- as.formula(paste0("Y ~ ",paste(formulaDecomp[[2]][-1],collapse="+")))
} else {
fm <- Y ~ 1
}
cpointP <- chngptm(formula.1=fm, formula.2=~S, data=subset(data2, Z==0 & !is.na(Y)), family="binomial", type="hinge", weights=subset(data2, Z==0 & !is.na(Y))$weights)$coefficients["chngpt"]
cpointT <- chngptm(formula.1=Y ~ 1, formula.2=~S, data=subset(data2, Z==1 & !is.na(Y)), family="binomial", type="hinge", weights=subset(data2, Z==1 & !is.na(Y))$weights)$coefficients["chngpt"]
# use their minimum as the hinge point in the below specified GLMs
cpoint <- min(cpointP, cpointT)
# biomarker S left-censored at 'cpoint' for use in the below specified GLMs
data$Sc <- with(data, ifelse(S>cpoint, S-cpoint, 0))
# re-extract the subset with phase 2 data
data2 <- subset(data, !is.na(S))
# IPW logistic regression model fitted to placebo recipients in the phase 2 subset accounting for two-phase sampling of S
if (anyBaselineCovar){
fm <- as.formula(paste0("Y ~ ",paste(c("Sc",formulaDecomp[[2]][-1]),collapse="+")))
} else {
fm <- Y ~ Sc
}
fit1 <- tps(fm, data=subset(data2, Z==0 & !is.na(Y)), nn0=nPControls, nn1=nPCases, group=rep(1, NROW(subset(data2, Z==0 & !is.na(Y)))), method="PL", cohort=TRUE)
} else {
# for passing on to the function 'risk'
cpoint <- NULL
# IPW logistic regression model fitted to placebo recipients in the phase 2 subset accounting for two-phase sampling of S
if (anyBaselineCovar){
fm <- as.formula(paste0("Y ~ ",paste(c("S",formulaDecomp[[2]][-1]),collapse="+")))
} else {
fm <- Y ~ S
}
fit1 <- tps(fm, data=subset(data2, Z==0 & !is.na(Y)), nn0=nPControls, nn1=nPCases, group=rep(1, NROW(subset(data2, Z==0 & !is.na(Y)))), method="PL", cohort=TRUE)
}
# kernel density estimator for f(s1|Sb=s0, X=x) using the treatment group in the phase 2 subset
fhat <- npcdens(fbw)
# kernel density estimator for g(s0|X=x) using the placebo group in the phase 2 subset
if (anyBaselineCovar){
ghat <- npcdens(gbw)
} else {
ghat <- npudens(gbw)
}
# a grid of values of S on which the estimated risk curves are returned
if (is.null(psGrid)){
psGrid <- seq(min(data2$S), max(data2$S), length.out=200)
}
# the first argument of 'risk' is a scalar
if (anyBaselineCovar){
fm <- as.formula(paste0("~",paste(formulaDecomp[[2]][-1],collapse="+")))
} else {
fm <- ~ 1
}
curves <- lapply(psGrid, function(s){ risk(s, fm, data, data2, pstype, bsmtype, fit1, fhat, ghat, cpoint) })
plaRiskCurve <- sapply(curves, "[[", "plaRisk")
txRiskCurve <- sapply(curves, "[[", "txRisk")
# the output list
oList <- list(psGrid=psGrid, plaRiskCurve=plaRiskCurve, txRiskCurve=txRiskCurve, fOptBandwidths=fbw, gOptBandwidths=gbw)
if (hinge){
oList$cpointP <- cpointP
oList$cpointT <- cpointT
}
class(oList) <- "riskCurve"
if (!is.null(saveFile) & !is.null(saveDir)){
save(oList, file=file.path(saveDir, saveFile))
cat("Output saved in:\n",file.path(saveDir, saveFile),"\n")
}
return(invisible(oList))
}
# 'object' is the output list from either 'riskCurve' or 'bootRiskCurve'
contrastRiskCurve <- function(object, contrast){
if (contrast=="te"){ return(1 - object$txRisk/object$plaRisk) }
if (contrast=="rr"){ return(object$txRisk/object$plaRisk) }
if (contrast=="logrr"){ return(log(object$txRisk/object$plaRisk)) }
if (contrast=="rd"){ return(object$plaRisk - object$txRisk) }
}
# 'object' is the output list from either 'riskCurve' or 'bootRiskCurve'
tContrastRiskCurve <- function(object, contrast){
if (contrast %in% c("te","rr","logrr")){ return(log(object$txRisk/object$plaRisk)) }
if (contrast=="rd"){ return(object$plaRisk - object$txRisk) }
}
# 'x' is a numeric vector
invtContrastRiskCurve <- function(x, contrast){
if (contrast=="te"){ return(1 - exp(x)) }
if (contrast=="rr"){ return(exp(x)) }
if (contrast=="logrr"){ return(x) }
if (contrast=="rd"){ return(x) }
}
#' Summary of Point and Interval Estimation of a Marginal Causal Effect Predictiveness Curve
#'
#' Summarizes point estimates and pointwise and simultaneous Wald-type bootstrap confidence intervals for a specified marginal causal effect predictiveness (mCEP) curve (see,
#' e.g., Juraska, Huang, and Gilbert (2018) for the definition).
#'
#' @param object an object of class \code{riskCurve}, typically returned by \code{\link{riskCurve}}
#' @param boot an object returned by \code{\link{bootRiskCurve}}. If \code{NULL} (default), only point estimates are reported.
#' @param contrast a character string specifying the mCEP curve. It must be one of \code{te} (treatment efficacy), \code{rr} (relative risk), \code{logrr} (log relative risk), and \code{rd} (risk
#' difference [placebo minus treatment]).
#' @param confLevel the confidence level of pointwise and simultaneous confidence intervals
#' @param \dots for other methods
#'
#' @return A data frame containing point and possibly interval estimates of the specified mCEP curve.
#'
#' @references Juraska, M., Huang, Y., and Gilbert, P. B. (2020), Inference on treatment effect modification by biomarker response in a three-phase sampling design, Biostatistics, 21(3): 545-560, \url{https://doi.org/10.1093/biostatistics/kxy074}.
#'
#' @examples
#' n <- 500
#' Z <- rep(0:1, each=n/2)
#' S <- MASS::mvrnorm(n, mu=c(2,2,3), Sigma=matrix(c(1,0.9,0.7,0.9,1,0.7,0.7,0.7,1), nrow=3))
#' p <- pnorm(drop(cbind(1,Z,(1-Z)*S[,2],Z*S[,3]) %*% c(-1.2,0.2,-0.02,-0.2)))
#' Y <- sapply(p, function(risk){ rbinom(1,1,risk) })
#' # delete S(1) in placebo recipients
#' S[Z==0,3] <- NA
#' # delete S(0) in treatment recipients
#' S[Z==1,2] <- NA
#' # generate the indicator of being sampled into the phase 2 subset
#' phase2 <- rbinom(n,1,0.4)
#' # delete Sb, S(0) and S(1) in controls not included in the phase 2 subset
#' S[Y==0 & phase2==0,] <- c(NA,NA,NA)
#' # delete Sb in cases not included in the phase 2 subset
#' S[Y==1 & phase2==0,1] <- NA
#' data <- data.frame(Z,S[,1],ifelse(Z==0,S[,2],S[,3]),Y)
#' colnames(data) <- c("Z","Sb","S","Y")
#' qS <- quantile(data$S, probs=c(0.05,0.95), na.rm=TRUE)
#' grid <- seq(qS[1], qS[2], length.out=2)
#'
#' out <- riskCurve(formula=Y ~ S, bsm="Sb", tx="Z", data=data, psGrid=grid)
#' boot <- bootRiskCurve(formula=Y ~ S, bsm="Sb", tx="Z", data=data,
#' psGrid=grid, iter=2, seed=10)
#' summary(out, boot, contrast="te")
#'
#' @seealso \code{\link{riskCurve}} and \code{\link{bootRiskCurve}}
#' @export
summary.riskCurve <- function(object, boot=NULL, contrast=c("te", "rr", "logrr", "rd"), confLevel=0.95,...){
contrast <- match.arg(contrast)
# point estimates of mCEP(s1)
MCEP <- contrastRiskCurve(object, contrast)
out <- data.frame(object$psGrid, MCEP)
colnames(out) <- c("psGrid", contrast)
# interval estimates of mCEP(s1)
if(!is.null(boot)){
# transformed MCEP curve
tMCEP <- tContrastRiskCurve(object, contrast)
# transformed bootstrapped MCEP curves
# assuming the matrices have the same dimensions
tbMCEP <- tContrastRiskCurve(boot, contrast)
# bootstrap SE of tMCEP estimates
bSE <- apply(tbMCEP, 1, sd, na.rm=TRUE)
# pointwise confidence bounds for MCEP(s1)
ptLB.MCEP <- invtContrastRiskCurve(tMCEP - qnorm(1-(1-confLevel)/2) * bSE, contrast=contrast)
ptUB.MCEP <- invtContrastRiskCurve(tMCEP + qnorm(1-(1-confLevel)/2) * bSE, contrast=contrast)
supAbsZ <- NULL
for (j in 1:NCOL(tbMCEP)){
Zstat <- abs((tbMCEP[,j]-tMCEP)/bSE)
supAbsZ <- c(supAbsZ, max(Zstat, na.rm=!all(is.na(Zstat))))
}
qSupAbsZ <- quantile(supAbsZ, probs=confLevel, na.rm=TRUE)
smLB.MCEP <- invtContrastRiskCurve(tMCEP - qSupAbsZ * bSE, contrast=contrast)
smUB.MCEP <- invtContrastRiskCurve(tMCEP + qSupAbsZ * bSE, contrast=contrast)
if (contrast=="te"){
tmp <- ptUB.MCEP
ptUB.MCEP <- ptLB.MCEP
ptLB.MCEP <- tmp
tmp <- smUB.MCEP
smUB.MCEP <- smLB.MCEP
smLB.MCEP <- tmp
}
out$ptLB <- ptLB.MCEP
out$ptUB <- ptUB.MCEP
out$smLB <- smLB.MCEP
out$smUB <- smUB.MCEP
}
return(out)
}
#' Testing of the Null Hypotheses of a Flat and a Constant Marginal Causal Effect Predictiveness Curve
#'
#' Computes a two-sided p-value either from the test of \{\eqn{H_0^1: mCEP(s_1)=CE} for all \eqn{s_1}\}, where \eqn{CE} is the overall causal treatment effect on the clinical
#' endpoint, or from the test of \{\eqn{H_0^2: mCEP(s_1)=c} for all \eqn{s_1} in the interval \code{limS1} and a specified constant \eqn{c}\}, each against a general alternative
#' hypothesis. The testing procedures are described in Juraska, Huang, and Gilbert (2018) and are based on the simultaneous estimation method of Roy and Bose (1953).
#'
#' @param object an object returned by \code{\link{riskCurve}}
#' @param boot an object returned by \code{\link{bootRiskCurve}}
#' @param contrast a character string specifying the mCEP curve. It must be one of \code{te} (treatment efficacy), \code{rr} (relative risk), \code{logrr}
#' (log relative risk), and \code{rd} (risk difference [placebo minus treatment]).
#' @param null a character string specifying the null hypothesis to be tested; one of \code{H01} and \code{H02} as introduced above
#' @param overallPlaRisk a numeric value of the estimated overall clinical endpoint risk in the placebo group. It is required when \code{null} equals \code{H01}.
#' @param overallTxRisk a numeric value of the estimated overall clinical endpoint risk in the treatment group. It is required when \code{null} equals \code{H01}.
#' @param MCEPconstantH02 the constant \eqn{c} in the null hypothesis \eqn{H_0^2}. It is required when \code{null} equals \code{H02}.
#' @param limS1 a numeric vector of length 2 specifying an interval that is a subset of the support of \eqn{S(1)} and that is used in the evaluation of the null hypothesis
#' \eqn{H_0^2}. If \code{NULL} (default), then \eqn{H_0^2} is evaluated for all \eqn{s_1}.
#'
#' @return A numeric value representing the two-sided p-value from the test of either \eqn{H_0^1} or \eqn{H_0^2}.
#'
#' @references Juraska, M., Huang, Y., and Gilbert, P. B. (2020), Inference on treatment effect modification by biomarker response in a three-phase sampling design, Biostatistics, 21(3): 545-560, \url{https://doi.org/10.1093/biostatistics/kxy074}.
#'
#' Roy, S. N. and Bose, R. C. (1953), Simultaneous condence interval estimation, \emph{The Annals of Mathematical Statistics}, 24, 513-536.
#'
#' @examples
#' n <- 500
#' Z <- rep(0:1, each=n/2)
#' S <- MASS::mvrnorm(n, mu=c(2,2,3), Sigma=matrix(c(1,0.9,0.7,0.9,1,0.7,0.7,0.7,1), nrow=3))
#' p <- pnorm(drop(cbind(1,Z,(1-Z)*S[,2],Z*S[,3]) %*% c(-1.2,0.2,-0.02,-0.2)))
#' Y <- sapply(p, function(risk){ rbinom(1,1,risk) })
#' X <- rbinom(n,1,0.5)
#' # delete S(1) in placebo recipients
#' S[Z==0,3] <- NA
#' # delete S(0) in treatment recipients
#' S[Z==1,2] <- NA
#' # generate the indicator of being sampled into the phase 2 subset
#' phase2 <- rbinom(n,1,0.4)
#' # delete Sb, S(0) and S(1) in controls not included in the phase 2 subset
#' S[Y==0 & phase2==0,] <- c(NA,NA,NA)
#' # delete Sb in cases not included in the phase 2 subset
#' S[Y==1 & phase2==0,1] <- NA
#' data <- data.frame(X,Z,S[,1],ifelse(Z==0,S[,2],S[,3]),Y)
#' colnames(data) <- c("X","Z","Sb","S","Y")
#' qS <- quantile(data$S, probs=c(0.05,0.95), na.rm=TRUE)
#' grid <- seq(qS[1], qS[2], length.out=3)
#' \donttest{
#' out <- riskCurve(formula=Y ~ S + factor(X), bsm="Sb", tx="Z", data=data, psGrid=grid)
#' boot <- bootRiskCurve(formula=Y ~ S + factor(X), bsm="Sb", tx="Z", data=data,
#' psGrid=grid, iter=2, seed=10)
#' fit <- glm(Y ~ Z, data=data, family=binomial)
#' prob <- predict(fit, newdata=data.frame(Z=0:1), type="response")
#'
#' testConstancy(out, boot, contrast="te", null="H01", overallPlaRisk=prob[1],
#' overallTxRisk=prob[2])
#' testConstancy(out, boot, contrast="te", null="H02", MCEPconstantH02=0, limS1=c(qS[1],1.5))
#' }
#'
#' @seealso \code{\link{riskCurve}}, \code{\link{bootRiskCurve}} and \code{\link{testEquality}}
#' @export
testConstancy <- function(object, boot, contrast=c("te", "rr", "logrr", "rd"), null=c("H01", "H02"), overallPlaRisk=NULL, overallTxRisk=NULL, MCEPconstantH02=NULL, limS1=NULL){
contrast <- match.arg(contrast)
null <- match.arg(null)
if (null=="H01"){
if (is.null(overallPlaRisk) | is.null(overallTxRisk)){ stop("'overallPlaRisk' and 'overallTxRisk' must be specified for the test of H01.") }
}
if (null=="H02"){
if (is.null(MCEPconstantH02)){ stop("'MCEPconstantH02' must be specified for the test of H02.") }
# trim the risk curves if 'limS1' is specified
if (!is.null(limS1)){
object$plaRiskCurve <- object$plaRiskCurve[object$psGrid>=limS1[1] & object$psGrid<=limS1[2]]
object$txRiskCurve <- object$txRiskCurve[object$psGrid>=limS1[1] & object$psGrid<=limS1[2]]
boot$plaRiskCurveBoot <- boot$plaRiskCurveBoot[boot$psGrid>=limS1[1] & boot$psGrid<=limS1[2],,drop=FALSE]
boot$txRiskCurveBoot <- boot$txRiskCurveBoot[boot$psGrid>=limS1[1] & boot$psGrid<=limS1[2],,drop=FALSE]
}
}
# transformed estimated MCEP curve
tMCEP <- tContrastRiskCurve(object, contrast)
# transformed bootstrapped MCEP curves
tbMCEP <- tContrastRiskCurve(boot, contrast)
# bootstrap SE based on tbMCEP estimates
bSE <- apply(tbMCEP, 1, sd, na.rm=TRUE)
# calculate the supremum statistic for each bootstrap sample
supAbsZ <- NULL
for (j in 1:NCOL(tbMCEP)){
Zstat <- abs((tbMCEP[,j]-tMCEP)/bSE)
supAbsZ <- c(supAbsZ, max(Zstat, na.rm=!all(is.na(Zstat))))
}
if (null=="H01"){
tCEest <- ifelse(contrast=="rd", overallPlaRisk - overallTxRisk, log(overallTxRisk/overallPlaRisk))
testStat <- max(abs(tMCEP-tCEest)/bSE, na.rm=TRUE)
return(mean(supAbsZ > testStat))
}
if (null=="H02"){
tMCEPconstantH02 <- switch(contrast, te=log(1-MCEPconstantH02), rr=log(MCEPconstantH02), logrr=MCEPconstantH02, rd=MCEPconstantH02)
testStat <- max(abs(tMCEP-tMCEPconstantH02)/bSE, na.rm=TRUE)
return(mean(supAbsZ > testStat))
}
}
#' Testing of the Null Hypothesis of Equal Marginal Causal Effect Predictiveness Curves for Two Biomarkers, Endpoints, or Baseline Covariate Subgroups
#'
#' Computes a two-sided p-value either from the test of \{\eqn{H_0^3: mCEP_1(s_1)=mCEP_2(s_1)} for all \eqn{s_1} in \code{limS1}\}, where \eqn{mCEP_1} and \eqn{mCEP_2} are
#' each associated with either a different biomarker (measured in the same units) or a different endpoint or both, or from the test of \{\eqn{H_0^4: mCEP(s_1|X=0)=
#' mCEP(s_1|X=1)} for all \eqn{s_1} in \code{limS1}\}, where \eqn{X} is a baseline dichotomous phase 1 covariate of interest, each against a general alternative
#' hypothesis. The testing procedures are described in Juraska, Huang, and Gilbert (2018) and are based on the simultaneous estimation method of Roy and Bose (1953).
#'
#' @param object1 an object returned by \code{\link{riskCurve}} pertaining to either \eqn{mCEP_1(s_1)} in \eqn{H_0^3} or \eqn{mCEP(s1|X=0)} in \eqn{H_0^4}
#' @param object2 an object returned by \code{\link{riskCurve}} pertaining to either \eqn{mCEP_2(s_1)} in \eqn{H_0^3} or \eqn{mCEP(s1|X=1)} in \eqn{H_0^4}
#' @param boot1 an object returned by \code{\link{bootRiskCurve}} pertaining to either \eqn{mCEP_1(s_1)} in \eqn{H_0^3} or \eqn{mCEP(s1|X=0)} in \eqn{H_0^4}
#' @param boot2 an object returned by \code{\link{bootRiskCurve}} pertaining to either \eqn{mCEP_2(s_1)} in \eqn{H_0^3} or \eqn{mCEP(s1|X=1)} in \eqn{H_0^4}
#' @param contrast a character string specifying the mCEP curve. It must be one of \code{te} (treatment efficacy), \code{rr} (relative risk), \code{logrr}
#' (log relative risk), and \code{rd} (risk difference [placebo minus treatment]).
#' @param null a character string specifying the null hypothesis to be tested; one of \code{H03} and \code{H04} as introduced above
#' @param limS1 a numeric vector of length 2 specifying an interval that is a subset of the support of \eqn{S(1)}. If \code{NULL} (default), then the specified null
#' hypothesis is evaluated for all \eqn{s_1}.
#'
#' @return A numeric value representing the two-sided p-value from the test of either \eqn{H_0^3} or \eqn{H_0^4}.
#'
#' @references Juraska, M., Huang, Y., and Gilbert, P. B. (2020), Inference on treatment effect modification by biomarker response in a three-phase sampling design, Biostatistics, 21(3): 545-560, \url{https://doi.org/10.1093/biostatistics/kxy074}.
#'
#' Roy, S. N. and Bose, R. C. (1953), Simultaneous condence interval estimation, \emph{The Annals of Mathematical Statistics}, 24, 513-536.
#'
#' @examples
#' n <- 500
#' Z <- rep(0:1, each=n/2)
#' S <- MASS::mvrnorm(n, mu=c(2,2,3), Sigma=matrix(c(1,0.9,0.7,0.9,1,0.7,0.7,0.7,1), nrow=3))
#' p <- pnorm(drop(cbind(1,Z,(1-Z)*S[,2],Z*S[,3]) %*% c(-1.2,0.2,-0.02,-0.2)))
#' Y <- sapply(p, function(risk){ rbinom(1,1,risk) })
#' X <- rbinom(n,1,0.5)
#' # delete S(1) in placebo recipients
#' S[Z==0,3] <- NA
#' # delete S(0) in treatment recipients
#' S[Z==1,2] <- NA
#' # generate the indicator of being sampled into the phase 2 subset
#' phase2 <- rbinom(n,1,0.4)
#' # delete Sb, S(0) and S(1) in controls not included in the phase 2 subset
#' S[Y==0 & phase2==0,] <- c(NA,NA,NA)
#' # delete Sb in cases not included in the phase 2 subset
#' S[Y==1 & phase2==0,1] <- NA
#' data <- data.frame(X,Z,S[,1],ifelse(Z==0,S[,2],S[,3]),Y)
#' colnames(data) <- c("X","Z","Sb","S","Y")
#' qS <- quantile(data$S, probs=c(0.05,0.95), na.rm=TRUE)
#' grid <- seq(qS[1], qS[2], length.out=3)
#' out0 <- riskCurve(formula=Y ~ S, bsm="Sb", tx="Z", data=data[data$X==0,], psGrid=grid)
#' out1 <- riskCurve(formula=Y ~ S, bsm="Sb", tx="Z", data=data[data$X==1,], psGrid=grid)
#' boot0 <- bootRiskCurve(formula=Y ~ S, bsm="Sb", tx="Z", data=data[data$X==0,],
#' psGrid=grid, iter=2, seed=10)
#' boot1 <- bootRiskCurve(formula=Y ~ S, bsm="Sb", tx="Z", data=data[data$X==1,],
#' psGrid=grid, iter=2, seed=15)
#'
#' testEquality(out0, out1, boot0, boot1, contrast="te", null="H04")
#'
#' @seealso \code{\link{riskCurve}}, \code{\link{bootRiskCurve}} and \code{\link{testConstancy}}
#' @export
testEquality <- function(object1, object2, boot1, boot2, contrast=c("te", "rr", "logrr", "rd"), null=c("H03", "H04"), limS1=NULL){
contrast <- match.arg(contrast)
null <- match.arg(null)
if (!setequal(object1$psGrid, object2$psGrid)){ stop("The estimated risk curves in 'object1' and 'object2' must be evaluated on the same grid of biomarker values.") }
if (!setequal(boot1$psGrid, boot2$psGrid)){ stop("The bootstrapped risk curves in 'boot1' and 'boot2' must be evaluated on the same grid of biomarker values.") }
# trim the risk curves if 'limS1' is specified
if (!is.null(limS1)){
object1$plaRiskCurve <- object1$plaRiskCurve[object1$psGrid>=limS1[1] & object1$psGrid<=limS1[2]]
object1$txRiskCurve <- object1$txRiskCurve[object1$psGrid>=limS1[1] & object1$psGrid<=limS1[2]]
object2$plaRiskCurve <- object2$plaRiskCurve[object2$psGrid>=limS1[1] & object2$psGrid<=limS1[2]]
object2$txRiskCurve <- object2$txRiskCurve[object2$psGrid>=limS1[1] & object2$psGrid<=limS1[2]]
boot1$plaRiskCurveBoot <- boot1$plaRiskCurveBoot[boot1$psGrid>=limS1[1] & boot1$psGrid<=limS1[2],,drop=FALSE]
boot1$txRiskCurveBoot <- boot1$txRiskCurveBoot[boot1$psGrid>=limS1[1] & boot1$psGrid<=limS1[2],,drop=FALSE]
boot2$plaRiskCurveBoot <- boot2$plaRiskCurveBoot[boot2$psGrid>=limS1[1] & boot2$psGrid<=limS1[2],,drop=FALSE]
boot2$txRiskCurveBoot <- boot2$txRiskCurveBoot[boot2$psGrid>=limS1[1] & boot2$psGrid<=limS1[2],,drop=FALSE]
}
# transformed estimated mCEP curves
tMCEP1 <- tContrastRiskCurve(object1, contrast)
tMCEP2 <- tContrastRiskCurve(object2, contrast)
# transformed bootstrapped mCEP curves
tbMCEP1 <- tContrastRiskCurve(boot1, contrast)
tbMCEP2 <- tContrastRiskCurve(boot2, contrast)
# difference in the transformed mCEP curves
difftMCEP <- tMCEP1 - tMCEP2
difftbMCEP <- tbMCEP1 - tbMCEP2
# bootstrap SE based on tbMCEP estimates
if (null=="H03"){
bSE <- apply(difftbMCEP, 1, sd, na.rm=TRUE)
} else {
# take advantage of the independence
bSE <- sqrt(apply(tbMCEP1, 1, var, na.rm=TRUE) + apply(tbMCEP2, 1, var, na.rm=TRUE))
}
# calculate the supremum statistic for each bootstrap sample
supAbsZ <- NULL
for (j in 1:NCOL(difftbMCEP)){
Z <- abs((difftbMCEP[,j]-difftMCEP)/bSE)
supAbsZ <- c(supAbsZ, max(Z, na.rm=!all(is.na(Z))))
}
testStat <- max(abs(difftMCEP)/bSE, na.rm=TRUE)
return(mean(supAbsZ > testStat))
}
#' Plotting of the Estimated Marginal Causal Effect Predictiveness Curve
#'
#' Plots point estimates and, if available, pointwise and simultaneous Wald-type bootstrap confidence intervals for the specified marginal causal effect predictiveness (mCEP)
#' curve.
#'
#' @param object an object returned by \code{\link{summary.riskCurve}}
#' @param confLevel the confidence level (0.95 by default) of pointwise and simultaneous confidence intervals
#' @param hingePoint the hinge point estimate (\code{NULL} by default)
#' @param title a character string specifying the plot title
#' @param xLab a character string specifying the x-axis label (\code{NULL} by default)
#' @param yLab a character string specifying the y-axis label (\code{NULL} by default)
#' @param yLim a numeric vector of length 2 specifying the y-axis range (\code{NULL} by default)
#' @param pType a character string specifying the type of plot. Possible options are \code{"l"} for lines (default) and \code{"p"} for points.
#'
#' @return None. The function is called solely for plot generation.
#'
#' @examples
#' n <- 500
#' Z <- rep(0:1, each=n/2)
#' S <- MASS::mvrnorm(n, mu=c(2,2,3), Sigma=matrix(c(1,0.9,0.7,0.9,1,0.7,0.7,0.7,1), nrow=3))
#' p <- pnorm(drop(cbind(1,Z,(1-Z)*S[,2],Z*S[,3]) %*% c(-1.2,0.2,-0.02,-0.2)))
#' Y <- sapply(p, function(risk){ rbinom(1,1,risk) })
#' X <- rbinom(n,1,0.5)
#' # delete S(1) in placebo recipients
#' S[Z==0,3] <- NA
#' # delete S(0) in treatment recipients
#' S[Z==1,2] <- NA
#' # generate the indicator of being sampled into the phase 2 subset
#' phase2 <- rbinom(n,1,0.3)
#' # delete Sb, S(0) and S(1) in controls not included in the phase 2 subset
#' S[Y==0 & phase2==0,] <- c(NA,NA,NA)
#' # delete Sb in cases not included in the phase 2 subset
#' S[Y==1 & phase2==0,1] <- NA
#' data <- data.frame(X,Z,S[,1],ifelse(Z==0,S[,2],S[,3]),Y)
#' colnames(data) <- c("X","Z","Sb","S","Y")
#' qS <- quantile(data$S, probs=c(0.05,0.95), na.rm=TRUE)
#' grid <- seq(qS[1], qS[2], length.out=3)
#' \donttest{
#' out <- riskCurve(formula=Y ~ S + factor(X), bsm="Sb", tx="Z", data=data, psGrid=grid)
#' boot <- bootRiskCurve(formula=Y ~ S + factor(X), bsm="Sb", tx="Z", data=data,
#' psGrid=grid, iter=2, seed=10)
#' sout <- summary(out, boot, contrast="te")
#' plotMCEPcurve(sout)
#' }
#'
#' @seealso \code{\link{riskCurve}}, \code{\link{bootRiskCurve}} and \code{\link{summary.riskCurve}}
#' @export
plotMCEPcurve <- function(object, confLevel=0.95, hingePoint=NULL, title=NULL, xLab=NULL, yLab=NULL, yLim=NULL, pType=c("l","p")){
pType <- match.arg(pType)
cexTitle <- 1.6
cexLab <- 1.4
cexAxis <- 1.3
cexLegend <- 1.2
if (is.null(yLim)){
yLim <- range(object[,-1], na.rm=TRUE)
yLim <- c(yLim[1]-0.1*(yLim[2]-yLim[1]), yLim[2])
}
if (is.null(xLab)){ xLab <- expression(paste("Biomarker Response at ",t[0])) }
if (is.null(yLab)){ yLab <- switch(colnames(object)[2], te="Treatment Efficacy", rr="Relative Risk", logrr="Log Relative Risk", rd="Risk Difference (Pla - Tx)") }
par(mar=c(5,5,2,1), cex.lab=cexLab, cex.axis=cexAxis, las=1)
plot(object[,1], object[,2], type="n", ylim=yLim, xlab=xLab, ylab=yLab)
if (!is.null(title)){ mtext(title, side=3, line=0.5, cex=cexTitle) }
abline(h=ifelse(colnames(object)[2]=="rr", 1, 0), lty="dotted", lwd=2, col="gray50")
if (pType=="l"){
lines(object[,1], object[,2], lwd=3.5)
# if interval estimates are available in the data frame
if (NCOL(object) > 2){
lines(object[,1], object$ptLB, lty="dashed", lwd=3)
lines(object[,1], object$ptUB, lty="dashed", lwd=3)
lines(object[,1], object$smLB, lty="dotdash", lwd=3)
lines(object[,1], object$smUB, lty="dotdash", lwd=3)
legend("bottomleft", lty=c("dashed","dotdash"), lwd=3, legend=c(paste0("Pointwise ",confLevel*100,"% CI"), paste0("Simultaneous ",confLevel*100,"% CI")),
cex=cexLegend, bty="n")
}
} else {
points(object[,1], object[,2], lwd=3.5, pch=16, cex=1.6)
# if interval estimates are available in the data frame
if (NCOL(object) > 2){
points(object[,1], object$ptLB, lty="dashed", lwd=3, pch=1)
points(object[,1], object$ptUB, lty="dashed", lwd=3, pch=1)
points(object[,1], object$smLB, lty="dotdash", lwd=3, pch=4)
points(object[,1], object$smUB, lty="dotdash", lwd=3, pch=4)
legend("bottomleft", pch=c(1,4), legend=c(paste0("Pointwise ",confLevel*100,"% CI"), paste0("Simultaneous ",confLevel*100,"% CI")),
cex=cexLegend, bty="n")
}
}
if (!is.null(hingePoint)){ legend("bottomright", paste0("Hinge Point = ", round(hingePoint,2)," "), cex=cexLegend, bty="n") }
}
npcdensbw.formula <- function (formula, data, subset, na.action, call, ...){
orig.class <- if (missing(data))
sapply(eval(attr(terms(formula), "variables"), environment(formula)),
class)
else sapply(eval(attr(terms(formula), "variables"), data,
environment(formula)), class)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "na.action"), names(mf),
nomatch = 0)
mf <- mf[c(1, m)]
if (!missing(call) && is.call(call)) {
for (i in 1:length(call)) {
if (tryCatch(class(eval(call[[i]])) == "formula",
error = function(e) FALSE))
break
}
mf[[2]] <- call[[i]]
}
mf[[1]] <- as.name("model.frame")
# patch
#if (m[2] > 0) { # use data as environment
# mf[["formula"]] = eval(mf[[m[1]]], environment(mf[[m[2]]]))
#} else { # use parent frame
mf[["formula"]] = eval(mf[[m[1]]], parent.frame())
#}
# end of patch
variableNames <- explodeFormula(mf[["formula"]])
varsPlus <- lapply(variableNames, paste, collapse = " + ")
mf[["formula"]] <- as.formula(paste(" ~ ", varsPlus[[1]],
" + ", varsPlus[[2]]), env = environment(formula))
mf[["formula"]] <- terms(mf[["formula"]])
if (all(orig.class == "ts")) {
args <- (as.list(attr(mf[["formula"]], "variables"))[-1])
attr(mf[["formula"]], "predvars") <- as.call(c(quote(as.data.frame),
as.call(c(quote(ts.intersect), args))))
}
else if (any(orig.class == "ts")) {
arguments <- (as.list(attr(mf[["formula"]], "variables"))[-1])
arguments.normal <- arguments[which(orig.class != "ts")]
arguments.timeseries <- arguments[which(orig.class ==
"ts")]
ix <- sort(c(which(orig.class == "ts"), which(orig.class !=
"ts")), index.return = TRUE)$ix
attr(mf[["formula"]], "predvars") <- bquote(.(as.call(c(quote(cbind),
as.call(c(quote(as.data.frame), as.call(c(quote(ts.intersect),
arguments.timeseries)))), arguments.normal, check.rows = TRUE)))[,
.(ix)])
}
mf <- eval(mf, parent.frame())
ydat <- mf[, variableNames[[1]], drop = FALSE]
xdat <- mf[, variableNames[[2]], drop = FALSE]
tbw = npcdensbw(xdat = xdat, ydat = ydat, ...)
tbw$call <- match.call(expand.dots = FALSE)
environment(tbw$call) <- parent.frame()
tbw$formula <- formula
tbw$rows.omit <- as.vector(attr(mf, "na.action"))
tbw$nobs.omit <- length(tbw$rows.omit)
tbw$terms <- attr(mf, "terms")
tbw$variableNames <- variableNames
tbw
}
explodeFormula <- function (formula){
res <- strsplit(strsplit(paste(deparse(formula), collapse = ""),
" *[~] *")[[1]], " *[+] *")
stopifnot(all(sapply(res, length) > 0))
names(res) <- c("response", "terms")
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.