mROC_class_template<-list(p=NA,FPs=NA,TPs=NA)
class(mROC_class_template)<-"mROC"
mROC_inference_template<-list(n_sim=NA,stats=c(A=NA,B=NA,A.dir=NA),pvals=c(A=NA,B=NA),null_stats=c(A.mu=NA,A.se=NA,B.mu=NA,b.se=NA),stat=c(value=NA,df=NA),pval=NA)
class(mROC_inference_template)<-"mROC_inference"
#' @export
print.mROC_inference<-function(x,...)
{
obj <- x
cat("Mean calibration statistic (A):",obj$stats['A'],"(",ifelse(obj$stats['A.dir'],"Obs>Pred","Obs<Pred") ,") (p:",obj$pvals['A'],")\nmROC/ROC equality statsitic (B):",obj$stats['B']," (p:",obj$pvals['B'],")\nUnified statistic:",obj$stat['value']," (df:",obj$stat['df'],",p:",obj$pval,")",sep ="")
return(invisible(obj$pval))
}
aux<-environment()
#' @export
plot.mROC<-function(x,...)
{
mROC_obj <- x
step <- list(...)$step
if(is.null(step)) step <- FALSE
if(step)
{
sf<-stepfun(mROC_obj$FPs,c(0,mROC_obj$TPs))
plot(sf,xlim=c(1,0),ylim=c(0,1),type='l',xlab="Specificity",ylab="Sensitivity",...)
}
else
{
#TODO: let possible xlim and ylim from ... override the default
plot(1-mROC_obj$FPs,mROC_obj$TPs,xlim=c(1,0),ylim=c(0,1),type='l',xlab="Specificity",ylab="Sensitivity",...)
}
}
#' @export
lines.mROC<-function(x,...)
{
mROC_obj <- x
#sf<-stepfun(mROC_obj$FPs,c(0,mROC_obj$TPs))
#TODO: let possible xlim and ylim from ... override the default
xlim<-par("usr")[1:2]
x<-mROC_obj$FPs
if(xlim[1]>xlim[2]) x<-1-x
lines(x,mROC_obj$TPs,type='l',...)
}
#' @title Calculates mROC from the vector of predicted risks
#' Takes in a vector of probabilities and returns mROC values (True positives, False Positives in an object of class mROC)
#' @param p A numeric vector of probabilities.
#' @param ordered Optional, if the vector p is ordered from small to large (if not the function will do it; TRUE is to facilitate fast computations).
#' @return This function returns an object of class mROC. It has three vectors: thresholds on predicted risks (which is the ordered vector of input probabilities), false positive rates (FPs), and true positive rates (TPs). You can directly call the plot function on this object to draw the mROC
#' @export
mROC<-function(p, ordered=FALSE)
{
if(min(p)<0 || max(p)>1) {stop("Error: invalid probability vector."); return(-1); }
if(!ordered) p<-p[order(p)]
sumP1<-sum(p)
sumP0<-sum(1-p)
n<-length(p)
tp<-0
fp<-0
roc<-cbind(fp=rep(NA,length(p)),tp=rep(NA,length(p)))
for(i in n:1)
{
fp<-fp+(1-p[i])/sumP0
tp<-tp+p[i]/sumP1
roc[n-i+1,]<-c(fp,tp)
}
out<-mROC_class_template
out$p=p
out$FPs<-c(0,roc[,1])
out$TPs<-c(0,roc[,2])
return(out)
}
#' Takes in a mROC object and calculates the area under the curve
#' @param mROC_obj An object of class mROC
#' @return Returns the area under the mROC curve
#' @export
mAUC<-function(mROC_obj)
{
l<-length(mROC_obj$FPs)
x<-mROC_obj$FPs[-1]-mROC_obj$FPs[-l]
a<-sum(x*mROC_obj$TPs[-1])
b<-sum(x*mROC_obj$TPs[-l])
return((a+b)/2)
}
#' Calculates the absolute surface between the empirical and expected ROCs
#' @param y y vector of binary responses
#' @param p p vector of predicted probabilities (same length as y)
#' @param ordered defaults to false
#' @param fast defaults to true
#' @return Returns a list with the A (mean calibration statistic) and B (mROC/ROC equality statistic) as well as the direction of poential miscalibration (sign of the difference between the ctual and predicted mean risk)
#' @export
calc_mROC_stats<-function(y, p, ordered=FALSE, fast=TRUE)
{
if(!ordered)
{
o<-order(p)
p<-p[o]
y<-y[o]
}
if(fast)
{
tmp<-Ccalc_mROC_stats(p,y) #Warning: The C code still takes p as first argument
return(c(A=tmp[1],B=tmp[2],A.dir=(mean(y-p)>0)))
}
n0<-length(which(y==0))
n1<-length(which(y==1))
n<-n0+n1
sumP1<-sum(p)
sumP0<-sum(1-p)
xo<-0
xe<-0
yo<-0
ye<-0
io<-n
ie<-n
B<-0
step<-0
#plot(c(0,1),c(0,1))
while(io>0 && ie>0)
{
if(xo<xe) #xo is behind and has to make a jump
{
if(y[io]==1)
{
step<-0
yo<-yo+1/n1
}
else
{
step<-1/n0
B<-B+abs(yo-ye)*min(step,xe-xo)
}
xo<-xo+step
io<-io-1
}
else #now xe is behind
{
step<-(1-p[ie])/sumP0
B<-B+abs(yo-ye)*min(step,xo-xe)
xe<-xe+step
ye<-ye+p[ie]/sumP1
ie<-ie-1
}
#lines(xe,ye,col='red',type='o')
#lines(xo,yo,type='o')
}
tmp<-mean(y-p)
return(list(A=abs(tmp),B=B,A.dir=(tmp>0)))
}
#' Statistical inference for comparing empirical and expected ROCs. If CI=TRUE then also returns pointwise CIs
#'
#' @param p vector of probabilities
#' @param y vector of binary response values
#' @param n_sim number of Monte Carlo simulations to calculate p-value
#' @param CI optional. Whether confidence interval should be calculated for each point of mROC. Default is FALSE.
#' @param aux aux optional. whether additional results (component-wise p-values etc) should be written in the package's aux variable. Default is FALSE.
#' @param fast fast optional. Whether the fast code (C++) or slow code (R) should be called. Default is TRUE (R code will be slow unless the dataset is small)
#' @return Returns an object of type mROC_inference containing the results of statistical inference for the mROC curve
#' @export
mROC_inference<-function(y,p,n_sim=100000,CI=FALSE,aux=FALSE,fast=TRUE)
{
conditional <- FALSE
out<-mROC_inference_template
out$n_sim<-n_sim
n<-length(p)
if(aux)
{
aux$AB<<-matrix(NA, nrow =n_sim, ncol=2)
}
o<-order(p)
p<-p[o]
y<-y[o]
if(conditional)
{
stop("Not implemented yet.")
}
else #If conditional
{
if(fast)
{
n1<-sum(y)
tmp<-Csimulate_null_mROC_stats_unconditional(p,n_sim)
stats<-calc_mROC_stats(y,p)
out$stats<-stats
out$null_stats<-c(A.mu=mean(tmp[,1]),A.se=sqrt(var(tmp[,1]/length(tmp[,1]))),B.mu=mean(tmp[,2]),B.se=sqrt(var(tmp[,2]/length(tmp[,2]))))
cdf1<-ecdf(x = tmp[,1])
cdf2<-ecdf(x = tmp[,2])
p1<-1-cdf1(stats[1])
p2<-1-cdf2(stats[2])
d<- -2*(log(p1)+log(p2))
p1s<-cdf1(tmp[,1])
p2s<-cdf2(tmp[,2])
ds<--2*(log(p1s)+log(p2s))
var_ds<-var(ds)
e_ds<-mean(ds)
c<-var_ds/2/e_ds
k<-2*e_ds^2/var_ds
out$stat<-c(value=d/c,df=k)
p3<-1-pchisq(q = d/c, df = k)
out$pval<-p3
out$pvals<-c(A=p1,B=p2)
if(aux)
{
p3s<-1-pchisq(q = ds/c, df = k)
aux$ds<<-tmp
aux$pvals<<-cbind(p1s,p2s,p3s)
}
}
else #if (fast)
{
tmp<-matrix(NA,n_sim,2)
sns<-matrix(NA,nrow = n+1, ncol = n_sim)
for(i in 1:n_sim)
{
y<-rbinom(n,size = 1,prob=p)
if(CI)
{
res<-roc(y,p,quiet = T)
sns[,i]<-coords(res,(0:n)/n,input="specificity",transpose=FALSE)[,"sensitivity",]
}
tmp[i,]<-calc_mROC_stats(y,p,ordered = TRUE)
if(aux)
{
aux$AB[i,]<<-c(mean(y)-mean(p),tmp[i])
}
}
}
} #else conditional
if(CI)
{
tmp2<-apply(sns,MARGIN = 1,FUN = ecdf)
out$low<-unlist(lapply(tmp2,quantile,0.025))
out$high<-unlist(lapply(tmp2,quantile,0.975))
}
return(out)
}
#' Main eROC analysis that plots ROC and eROC
#'
#' @param y y vector of observed responses.
#' @param p p vector of predicted probabilities (the same length as observed responses)
#' @param inference 0 for no inference, 1 for p-value only, and 2 for p-value and 95 percent CI.
#' @param n_sim number of simulations
#' @param fast defaults to true
#' @return returns a list containing the results of mROC analysis.
#' @export
mROC_analysis<-function(y,p,inference=0, n_sim, fast=TRUE)
{
if(inference==2 && fast) stop("Confidence intervals are currently only available when fast=FALSE")
roc_data<-pROC::roc(y,p)
plot(roc_data)
out<-list()
out$roc_data<-roc_data
message("AUC is ",roc_data$auc)
res<-mROC(p)
out$mROC_data<-res
message("mAUC is ",mAUC(res))
lines(1-res$FPs,res$TPs,col='red')
if(inference)
{
inf<-mROC_inference(y=y, p=p, CI=(inference==2), n_sim = n_sim, fast=fast)
if(inference==2)
{
n<-length(p)
lines((0:n)/n,inf$low,type='l',col="gray")
lines((0:n)/n,inf$high,type='l',col="gray")
}
out$inference<-inf
message("Test statistic is ",inf$stat,"\n")
message("p-value is ",inf$pval,"\n")
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.