#' Estimate standard measures of predictive accuracy for two-phase designs using AIPW
#'
#' Estimate measures of predictive accuracy using augmented inverse probability weights (ipw) or true ipw for two-phase biomarker validation studies
#'
#' @param time vector of time to event
#' @param event status indicator (1 for observed failure, 0 for censoring)
#' @param X matrix or data frame of covariates
#' @param subcohort indicator for selection into the subcohort (1 for selected, 0 for not selected)
#' @param aug.weights.x vector of variable to be used to derive augmented inverse probability weights.
#' @param risk.threshold vector of risk thresholds on the absolute risk scale used to calcuate cutoff-based summary measures.
#' @param landmark.time numeric value specifying landmark time at which to estimate the performance measures.
#' @param weight.method either "Aug" for augmented ipw or "True" for true ipw.
#' @param design either "CCH" for case-cohort or "NCC" for nested case control specifying the subcohort design used.
#' @param calculate.sd should analytic standard errors be calculated.
#' @param smoothing.par nearest neighbor smoothing parameter used in locfit (default = 0.7)
#' @param pnf.threshold value used to calculate proportion needed to follow (PNF) such that pnf.threshold percent of the cases are classified high risk.
#' @param pcf.threshold value used to calculate proportion of cases followed (PCF) if pcf.threshold percent of the population are classified high risk.
#' @param ncc.nmatch For design = "NCC", specify the number of controls matched per case.
#'
#' @note variance calculations are unavailable for the NB measure.
#'
#' @return data.frame with estimates of AUC, IDI, ITPR, IFPR, TPR, FPR, PPV, NPV and net benefit (NB) and standard errors.
#'
#' @examples
#'
#'data(CCHsimdata)
#'
#'predict.time <- 0.75
#'
#'## augmented ipw
#'AIPWmeasures( time = CCHsimdata$xi, event = CCHsimdata$di,
#' X = cbind(CCHsimdata$y1, CCHsimdata$y2),
#' subcohort = CCHsimdata$vi,
#' aug.weights.x = CCHsimdata$y1,
#' risk.threshold = c(.05, .3),
#' landmark.time = predict.time,
#' weight.method = 'Aug',
#' design = "CCH",
#' smoothing.par = 0.9,
#' calculate.sd = TRUE,
#' pnf.threshold = 0.85,
#' pcf.threshold = 0.2)
#'
#'
#'#simulated data from a ncc design with nmatch = 2
#'data("NCCsimdata")
#'AIPWmeasures( time = NCCsimdata$xi, event = NCCsimdata$di,
#' X = cbind(NCCsimdata$y1, NCCsimdata$y2),
#' subcohort = NCCsimdata$vi,
#' aug.weights.x = NCCsimdata$y1,
#' risk.threshold = c(.01, .03),
#' landmark.time = predict.time,
#' weight.method = 'Aug',
#' design = "NCC",
#' smoothing.par = 0.9,
#' calculate.sd = TRUE,
#' pnf.threshold = 0.85,
#' pcf.threshold = 0.2,
#' ncc.nmatch = 2)
#'
#' @import locfit
#' @import survival
#' @export
#'
AIPWmeasures <- function(time,event, X, subcohort,
aug.weights.x = NULL,
risk.threshold,
landmark.time,
weight.method = c("Aug", "True"),
design= c("CCH", "NCC"),
calculate.sd = TRUE,
smoothing.par = 0.7,
pnf.threshold = 0.5,
pcf.threshold = 0.2,
ncc.nmatch = NULL)
{
###### checks
weight.method <- match.arg(weight.method)
design <- match.arg(design)
stopifnot(is.numeric(time))
stopifnot(is.indicator(event))
stopifnot(is.indicator(subcohort))
stopifnot(length(time) == nrow(X))
stopifnot(length(event) == nrow(X))
if(weight.method == "Aug") if(missing(aug.weights.x)) stop("aug.weights.x is needed if weight.method = 'Aug'")
stopifnot(all(risk.threshold > 0))
stopifnot(all(risk.threshold < 1))
stopifnot(pcf.threshold >=0); stopifnot(pcf.threshold <= 1)
stopifnot(pnf.threshold >=0); stopifnot(pnf.threshold <= 1)
stopifnot(is.numeric(landmark.time))
stopifnot(is.logical(calculate.sd))
if(design == "NCC") if(missing(ncc.nmatch)) stop("ncc.nmatch must be set when design = 'NCC'")
###### end checks
z = rep(0, length(time))
cohortdata = data.frame(xi = time, di = event, zi = z, X, vi = subcohort )
vp = risk.threshold
typey = rep(c("TPR","FPR","PPV","NPV"), length(vp))
typex = c(rep("RiskT",length(typey)))
vp = rep(vp, rep(4, length(vp))) # four outcome measures
NBp = risk.threshold
predict.time = landmark.time
method = weight.method
N = nrow(cohortdata)
nn.par = smoothing.par
CalVar = calculate.sd
mytpr = pnf.threshold # for PNF V(TPR = 0.85)
vv = 1-pcf.threshold # for PCF TPR(v = 0.8)
AugWeightsX = aug.weights.x
nmatch = ncc.nmatch
if (method == 'True'){
if (design == 'CCH') {
cohortdata$wi = 1/P0HAT.CCH.Z.FUN(cohortdata, type=2);
subdata <- cohortdata[cohortdata$vi == 1,]
}
if (design=='NCC') {
cohortdata$wi = ifelse(cohortdata$di==1,1 ,1/P0HAT.NCC.FUN(cohortdata, NULL, nmatch))
subdata <- cohortdata[cohortdata$vi == 1,]
}
AugWeightsX.sub <- NULL
}
#print(sum(subdata$wi))
if (method == "Aug") {
#if (is.null(AugPos)) {print("Need the positions of the variables used in estimating weights")}
tmp.data0 <- cohortdata[cohortdata$di==0,]
tmp.data1 <- cohortdata[cohortdata$di==1,]
#browser()
# input.sm <- tmp.data0[,AugPos]
# tmp.data0$wi = 1/sm.regression(input.sm,tmp.data0$vi,eval.points=input.sm,eval.grid=FALSE, display = "none")$estimate
if(design == "NCC"){
tmp.mod <- locfit(vi~lp(xi, AugWeightsX[event==0], nn=nn.par), data = tmp.data0, family = "binomial", link = "logit")
tmp.data1$wi = 1
tmp.data0$wi <- 1/fitted(tmp.mod, data = tmp.data0)
}else{
# design == "CCH"
tmp.mod <- locfit(vi~lp(AugWeightsX[event==0], nn=nn.par), data = tmp.data0, family = "binomial", link = "logit")
tmp.data0$wi <- 1/fitted(tmp.mod, data = tmp.data0)
tmp.mod <- locfit(vi~lp(AugWeightsX[event==1], nn=nn.par), data = tmp.data1, family = "binomial", link = "logit")
tmp.data1$wi <- 1/fitted(tmp.mod, data = tmp.data1)
}
augw0 <- aug.weights.x[cohortdata$di==0]
augw1 <- aug.weights.x[cohortdata$di==1]
AugWeightsX <- c(augw0, augw1)
cohortdata <- rbind(tmp.data0, tmp.data1)
subdata = cohortdata[cohortdata$vi==1,]
AugWeightsX.sub <- AugWeightsX[cohortdata$vi==1]
}
#print( sum(subdata$wi))
junk = GetRTdata.SEMIPW.sorted.FUN(subdata, predict.time, AugWeightsX.sub) ## data.RT and data are sorted by linear predictor
data.RT <- junk$data.RT
cutoff = data.RT[,1]
ncutoff = length(cutoff)
cutoff = sort(cutoff)[-c(1:4,(ncutoff-4):ncutoff)]
#cutoff = sort(data.RT[,1])
cutpos = sum.I(cutoff,">=", data.RT[,1])
if (sum(cutpos<=0)>0) {
cutoff = cutoff[-c(1:sum(cutpos<=0)) ]}
RT.out <- EstRTall.fixcutoff.FUN(cutoff,data.RT)
betahat <- junk$beta
# TG <- RT.out[[2]]
rho <- RT.out[[3]]
AUC <- RT.out$AUC
IDI <- RT.out$IDI
ITPR <- RT.out$ITPR
IFPR <- RT.out$IFPR
RT.out <- RT.out$RT.out.c
RTvp.out <- NULL
if (!is.null(vp)) {
nvp = length(vp)
RTvp.out = rep(0,nvp)
for (pp in 1:nvp) {
RTvp.out[pp] = EstRTvp.FUN(RT.out,vp[pp],typex[pp],typey[pp])
}
}
if (!is.null(vv)) {
pcf = EstRTvp.FUN(RT.out,vv,'v','TPR')
}else{
pcf = NA
pcf.threshold <- NA
}
if (!is.null(mytpr)) {
pnf = EstRTvp.FUN(RT.out,mytpr,'TPR','v')
}else{
pnf = NA
pnf.threshold <- NA
}
NBp.out = NULL
if (!is.null(NBp)) {
for (jj in 1:length(NBp)) {
NBp.out=rbind(NBp.out,EstNB.FUN(RT.out,NBp[jj],rho))
}
}
#est = data.frame(c(betahat), AUC, IDI, ITPR,IFPR,RTvp.out,pcf,pnf, NB = NBp.out )
out = data.frame(measure = c(paste0("coef.x", 1:length(betahat)),
"AUC", "IDI", "ITPR", "IFPR", typey,"PCF", "PNF", rep("NB", length(risk.threshold))),
threshold = c(rep(NA, length(betahat)), NA, NA, NA, NA, vp, pcf.threshold, pnf.threshold, risk.threshold),
estimate = c(c(betahat), AUC, IDI, ITPR,IFPR,RTvp.out,pcf,pnf, NBp.out ))
if (CalVar) {
subdata = cbind(junk$data,data.RT[,c(2,1)])
names(subdata)=c("times","status", "zi",paste0("y", 1:length(betahat)), "vi", "weights","Sy","linearY")
#cutoff,data,N,RT.out,predict.time,uu0Vec,typexVec,typeyVec,
#vv=NULL,mytpr=NULL, vp
jjunk = Est.Wexp(cutoff = cutoff, data = subdata,N = N, RT.out = RT.out,
predict.time =predict.time,
uu0Vec = vp,typexVec = typex, typeyVec = typey,
vv = vv, mytpr = mytpr, vp = vp)
Wexp = cbind(jjunk$Wexp.beta,jjunk$Wexp.AUC,jjunk$Wexp.IDI,jjunk$Wexp.ITPR,jjunk$Wexp.IFPR,jjunk$Wexp.vp,jjunk$Wexp.pcf,jjunk$Wexp.pnf)
if (method == 'True') {
if (design == 'CCH') {
sd = sqrt(Est.Var.CCH.trueweights(N,Wexp,subdata,subdata$status)[[3]])
}
if (design == 'NCC') {
tk = sort(subdata$times[cohortdata$status == 1])
pi.tk = sum.I(tk, "<=", cohortdata$xi)/N
sd = sqrt(Est.Var.NCC.trueweights(N,Wexp,subdata,subdata$status,tk,pi.tk, nmatch)[[3]])
}
}
if (method == 'Aug') {
if (design =='CCH')
{
sd = sqrt(Est.Var.CCH.augweights(N,Wexp,subdata,AugWeightX = junk$AugWeightX,subdata$status, nn.par = nn.par)[[3]])
}else if(design =='NCC')
{
sd = sqrt(Est.Var.NCC.augweights(N,Wexp,subdata,AugWeightX = junk$AugWeightX, nn.par = nn.par)[[3]])
}
}
#Wexp = cbind(jjunk$Wexp.beta,jjunk$Wexp.AUC,jjunk$Wexp.IDI,jjunk$Wexp.ITPR,jjunk$Wexp.IFPR,
#jjunk$Wexp.vp,jjunk$Wexp.pcf,jjunk$Wexp.pnf)
#cbind(c(betahat, AUC, IDI, ITPR,IFPR,RTvp.out,pcf,pnf, NBp.out), sd)
out$sd = c(sd, rep(NA, length(risk.threshold)))
}
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.