R/AUC.R In tdhock/WeightedROC: Fast, Weighted ROC Curves

```WeightedAUC <- structure(function
### Calculate the exact area under the ROC curve.
(tpr.fpr
### Output of WeightedROC: data.frame with the true positive rate
### (TPR) and false positive rate (FPR).
){
stopifnot(is.data.frame(tpr.fpr))
stopifnot(nrow(tpr.fpr) > 1)
for(var.name in c("TPR", "FPR")){
## Ensure that the curve is sorted in decreasing order.
stopifnot(diff(tpr.fpr[[var.name]]) <= 0)
}
right <- tpr.fpr[-nrow(tpr.fpr),]
left <- tpr.fpr[-1,]
width <- right\$FPR - left\$FPR
rect.area <- left\$TPR * width
triangle.h <- right\$TPR - left\$TPR
triangle.area <- triangle.h * width / 2
my.auc <- sum(rect.area, triangle.area)
my.auc
### Numeric scalar.
}, ex=function(){

library(WeightedROC)
## Compute the AUC for this weighted data set.
y <- c(0, 0, 1, 1, 1)
w <- c(1, 1, 1, 4, 5)
y.hat <- c(1, 2, 3, 1, 1)
tp.fp <- WeightedROC(y.hat, y, w)
(wauc <- WeightedAUC(tp.fp))

if(require(glmnet)){

## glmnet 1.9-5 (2013-8-1) also has a function called auc, with a
## third argument "w" that seems to be for weights. In a data set
## with no tied scores, glmnet::auc is the same as WeightedAUC.
y <- c(0, 0, 1, 0, 1)
y.hat <- c(1, 2, 3, 4, 5)
w <- c(1, 1, 1, 4, 5)
rbind(
`glmnet::auc`=c(
missing.weights=glmnet::auc(y, y.hat),
unequal.weights=glmnet::auc(y, y.hat, w),
equal.weights=glmnet::auc(y, y.hat, rep(1, length(y)))),
`WeightedROC::WeightedAUC`=c(
WeightedAUC(WeightedROC(y.hat, y)),
WeightedAUC(WeightedROC(y.hat, y, w)),
WeightedAUC(WeightedROC(y.hat, y, rep(1, length(y))))))

## In a data set with tied scores, using glmnet::auc(y,prob) with
## a missing third argument gives the same result as WeightedAUC,
## but glmnet::auc(y,prob,w) does not.
set.seed(1)
y <- c(0, 0, 1, 1, 1)
w <- c(1, 1, 1, 4, 5)
y.hat <- c(1, 2, 3, 1, 1)
rbind(
`glmnet::auc`=c(
missing.weights=glmnet::auc(y, y.hat),
unequal.weights=glmnet::auc(y, y.hat, w),
equal.weights=glmnet::auc(y, y.hat, rep(1, length(y)))),
`WeightedROC::WeightedAUC`=c(
WeightedAUC(WeightedROC(y.hat, y)),
wauc,
WeightedAUC(WeightedROC(y.hat, y, rep(1, length(y))))))

}

## For the un-weighted ROCR example data set, verify that our AUC is
## the same as that of ROCR/pROC/glmnet. Note that glmnet::auc is
## faster than WeightedROC::WeightedAUC because glmnet::auc it does
## not include the overhead of computing a data.frame(TPR, FPR) that
## could be plotted.
if(require(microbenchmark) && require(ROCR) && require(pROC)){
data(ROCR.simple, envir=environment())
microbenchmark(WeightedROC={
tp.fp <- with(ROCR.simple, WeightedROC(predictions, labels))
wroc <- WeightedAUC(tp.fp)
}, ROCR={
pred <- with(ROCR.simple, prediction(predictions, labels))
rocr <- performance(pred, "auc")@y.values[[1]]
}, pROC={
proc <- pROC::auc(labels ~ predictions, ROCR.simple, algorithm=2)
}, glmnet={
gnet <- with(ROCR.simple, {
glmnet::auc(labels, predictions, rep(1, length(labels)))
})
}, times=10)
rbind(WeightedROC=wroc, ROCR=rocr, pROC=proc, glmnet=gnet) #same
}

## For the un-weighted pROC example data set, verify that our AUC is
## the same as that of ROCR/pROC/glmnet.
data(aSAH, envir=environment())
table(aSAH\$s100b)
if(require(microbenchmark)){
microbenchmark(WeightedROC={
tp.fp <- with(aSAH, WeightedROC(s100b, outcome))
wroc <- WeightedAUC(tp.fp)
}, ROCR={
pred <- with(aSAH, prediction(s100b, outcome))
rocr <- performance(pred, "auc")@y.values[[1]]
}, pROC={
proc <- pROC::auc(outcome ~ s100b, aSAH, algorithm=2)
}, glmnet={
gnet <- with(aSAH, {
glmnet::auc(ifelse(outcome=="Good", 0, 1), s100b, rep(1, length(s100b)))
})
}, times=10)
rbind(WeightedROC=wroc, ROCR=rocr, pROC=proc, glmnet=gnet)
}

})
```
tdhock/WeightedROC documentation built on Oct. 4, 2018, 5:16 p.m.