# R/wdists.R In wevid: Quantifying Performance of a Binary Classifier Through Weight of Evidence

#### Documented in auroc.crudeauroc.modellambda.crudelambda.modelmean.Wdensitiesprop.belowthresholdrecalibrate.psummary.WdensitiesWdensitiesWdensities.crudeWdensities.mixweightsofevidence

```##=============================================================================
##
## Copyright (c) 2018-2019 Paul McKeigue
##
## This program is free software: you can redistribute it and/or modify
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.
##
##=============================================================================

#' Kullback-Leibler divergence of p from q
#'
#' @param p,q Probability distributions.
#'
#' @return
#' The KL divergence expressed in nats.
#'
#' @noRd
kl <- function(p, q) {
kl <- p * log(p / q)
kl[p == 0] <- 0
kl <- mean(kl)
return(kl)
}

#' Convert from natural log units to bits
#'
#' @param x Value expressed in natural log units (nats).
#'
#' @return
#' Value expressed in bits.
#'
#' @noRd
tobits <- function(x) {
return(log2(exp(1)) * x)
}

#' @noRd
reweight.densities <- function(theta, fhat.ctrls, fhat.cases,
n.ctrls, n.cases, xseq, wts) {
mean.ctrls <- sum(fhat.ctrls * xseq) / sum(fhat.ctrls)
mean.cases <- sum(fhat.cases * xseq) / sum(fhat.cases)
weights.ctrls <- n.ctrls * exp(-theta * (xseq - mean.ctrls)^2)
weights.cases <- n.cases * exp(+theta * (xseq - mean.cases)^2)
weights.ctrls <- weights.ctrls / sum(weights.ctrls)
weights.cases <- weights.cases / sum(weights.cases)

# average two estimates of geometric mean
fhat.geomean <- wts[, 1] * fhat.ctrls * weights.ctrls * exp(+0.5 * xseq) +
wts[, 2] * fhat.cases * weights.cases * exp(-0.5 * xseq)

## compute case and control log densities
f.ctrls <- fhat.geomean * exp(-0.5 * xseq)
f.cases <- fhat.geomean * exp(+0.5 * xseq)
return(data.frame(f.ctrls=f.ctrls, f.cases=f.cases))
}

#' Evaluate objective function
#' @noRd
error.integrals <- function(theta, densities, wts) {
wdens <- with(densities, reweight.densities(theta, f.ctrls, f.cases,
n.ctrls, n.cases, x, wts))
## objective function is abs(log(ratio of normalizing constants)
obj <- abs(log(sum(wdens\$f.ctrls / sum(wdens\$f.cases))))
return(obj)
}

#' Calculate weights of evidence in natural log units
#'
#' @param posterior.p Vector of posterior probabilities generated by using model
#'        to predict on test data.
#' @param prior.p Vector of prior probabilities.
#'
#' @return
#' The weight of evidence in nats for each observation.
#'
#' @examples
#' data(cleveland) # load example dataset
#' W <- with(cleveland, weightsofevidence(posterior.p, prior.p))
#'
#' @export
weightsofevidence <- function(posterior.p, prior.p) {
validate.probabilities(posterior.p, prior.p)
W <- (log(posterior.p) - log(1 - posterior.p) -
log(prior.p / (1 - prior.p)))
return(W)
}

#' Compute densities of weights of evidence in cases and controls
#'
#' The function computes smoothed densities of the weight of evidence in
#' cases and in controls from the crude probabilities, then adjusts them to
#' make them mathematically consistent so that p(W_ctrl) = exp(-W) p(W_case).
#'
#' If the sample distributions in cases and controls support a 2-component
#' mixture model (based on model comparison with BIC) for the densities, this
#' will be detected and a 2-component mixture model will be fitted before
#'
#' @param y Binary outcome label (0 for controls, 1 for cases).
#' @param posterior.p Vector of posterior probabilities generated by using model
#'        to predict on test data.
#' @param prior.p Vector of prior probabilities.
#' @param range.xseq Range of points where the curves should be sampled.
#' @param x.stepsize Distance between each point.
#'        estimator. By default it is set to 1 (no adjustment), setting it to
#'        a value smaller/larger than 1 reduces/increases the smoothing of
#'        the kernel. This argument is ignored if more than one mixture component
#'        is identified.
#' @param recalibrate If \code{TRUE} (the default) the weights of evidence are
#'        calculated after the posterior probabilities have been recalibrated
#'        against \code{y} using a logistic regression model.
#' @param debug If \code{TRUE}, the size of the adjustment is reported.
#'
#' @return
#' A densities object that contains the information necessary to compute
#' summary measures and generate plots.
#'
#' @examples
#' data(cleveland)
#' densities <- with(cleveland, Wdensities(y, posterior.p, prior.p))
#'
#' # Example which requires fitting a mixture distribution
#' data(fitonly)
#' densities <- with(fitonly, Wdensities(y, posterior.p, prior.p))
#'
#' @importFrom mclust mclustBIC Mclust
#' @importFrom stats optim
#' @export
Wdensities <- function(y, posterior.p, prior.p,
range.xseq=c(-25, 25), x.stepsize=0.01,
n.ctrls <- sum(y == 0)
n.cases <- sum(y == 1)
if (n.ctrls + n.cases != length(y))
stop("y contains values different from 0 or 1.")
if (length(y) != length(posterior.p))
stop("y and posterior.p must have the same number of observations.")
if (x.stepsize < 0)
stop("x.stepsize must be positive.")
if (any(posterior.p == 0 | posterior.p == 1))
stop("Posterior probabilities exactly 0 or 1.")

posterior.p.recalib <- recalibrate.p(y, posterior.p)
if(recalibrate) {# default is to use the recalibrated posterior
posterior.p.used <- posterior.p.recalib
} else {
posterior.p.used <- posterior.p
}

W <- weightsofevidence(posterior.p.used, prior.p)
xseq <- seq(range.xseq, range.xseq, by=x.stepsize)

# determine number of mixture components by model comparison with BIC
BIC.matrix.ctrls <- mclustBIC(data.frame(W[y==0]), G=1:2, verbose=FALSE)
BIC.matrix.cases <- mclustBIC(data.frame(W[y==1]), G=1:2, verbose=FALSE)
BIC.vector <- (BIC.matrix.ctrls + BIC.matrix.cases)[, 1]
BIC.vector <- (BIC.vector - min(BIC.vector))
if (debug) {
cat("Controls:\n")
print(BIC.matrix.ctrls)
cat("\nCases:\n")
print(BIC.matrix.cases)
cat("\nBIC vector for controls + cases:\n")
print(round(BIC.vector, 2))
}
num.components <- as.integer(which.max(BIC.vector))

if (num.components > 1) {
message("Density with ", num.components, " mixture components ",
"chosen by BIC")
mixmodel <- Mclust(W, G=num.components, verbose=FALSE)
mixcomponent <- as.integer(mixmodel\$classification)
crude <- Wdensities.mix(y, W, xseq, mixcomponent)
}
else {
crude <- Wdensities.crude(y, W, xseq, adjust.bw)
}
crude\$x <- xseq
crude\$n.ctrls <- n.ctrls
crude\$n.cases <- n.cases

wts <- cbind(exp(-0.5 * xseq) * n.ctrls,
exp(+0.5 * xseq) * n.cases)
wts  <- wts / rowSums(wts) # normalize weights

optim.result <- optim(par=0, fn=error.integrals, method="L-BFGS-B",
densities=crude, wts=wts,
lower=-0.5, upper=0.5)
theta <- optim.result\$par
wdens <- reweight.densities(theta, crude\$f.ctrls, crude\$f.cases,
n.ctrls, n.cases,
xseq, wts)

## mean normalizing constant
z <- 0.5 * (sum(wdens\$f.ctrls) + sum(wdens\$f.cases)) * x.stepsize
f.cases <- wdens\$f.cases / z
f.ctrls <- wdens\$f.ctrls / z
if (debug) {
cat("Optimal value of reweighting parameter theta:", theta, "\n")
cat("f.cases normalizes to", sum(f.cases * x.stepsize), "\n")
cat("f.ctrls normalizes to", sum(f.ctrls * x.stepsize), "\n")
}

## cumulative frequencies for the adjusted distributions
cumfreq.ctrls <- cumfreqs(f.ctrls, x.stepsize)
cumfreq.cases <- cumfreqs(f.cases, x.stepsize)

obj <- list(y=y, posterior.p=posterior.p, posterior.p.recalib=posterior.p.recalib,
prior.p=prior.p,
W=W, x=xseq, f.ctrls=f.ctrls, f.cases=f.cases,
f.ctrls.crude=crude\$f.ctrls, f.cases.crude=crude\$f.cases,
cumfreq.ctrls=cumfreq.ctrls, cumfreq.cases=cumfreq.cases,
n.ctrls=n.ctrls, n.cases=n.cases, x.stepsize=x.stepsize)
class(obj) <- "Wdensities"
return(obj)
}

#' Calculate the crude smoothed densities of W in cases and in controls
#'
#' These functions allow to compute the smoothed densities of the weight of
#' evidence in cases and in controls from the crude probabilities.
#' \code{Wdensities.crude} is designed for the general case; if the model
#' probabilities reflect a spike-slab mixture distribution, where a high
#' proportion of values of the predictor are zero, then \code{Wdensities.mix}
#' will be more appropriate.
#'
#' @param y Binary outcome label (0 for controls, 1 for cases).
#' @param W Weight of evidence.
#' @param xseq Sequence of points where the curves should be sampled.
#'
#' @return
#' List of crude densities for controls and cases sampled at each point in the
#' range provided.
#'
#' @importFrom stats bw.SJ density
#' @keywords internal
Wdensities.crude <- function(y, W, xseq, adjust.bw=1) {
W.ctrls <- W[y == 0]
W.cases <- W[y == 1]
fhat.ctrls.raw <- density(W.ctrls, bw.ctrls, n=length(xseq),
from=min(xseq), to=max(xseq))\$y
fhat.cases.raw <- density(W.cases, bw.cases, n=length(xseq),
from=min(xseq), to=max(xseq))\$y
return(list(f.ctrls=fhat.ctrls.raw, f.cases=fhat.cases.raw))
}

#' @rdname Wdensities.crude
#' @param mixcomponent integer vector same length as \code{y}, coded 1 or 2.
#'        Typically set by \code{Wdensities} which calls this function.
#'
#' @keywords internal
Wdensities.mix <- function(y, W, xseq, mixcomponent) {
if (length(mixcomponent) != length(y))
stop("Length of mixcomponent vector doesn't match length of y.")
stopifnot(is.integer(mixcomponent))
Wdensity.mix.ctrls <- density.mixture(W[y==0], mixcomponent[y==0], xseq)
Wdensity.mix.cases <- density.mixture(W[y==1], mixcomponent[y==1], xseq)
return(list(f.ctrls=Wdensity.mix.ctrls, f.cases=Wdensity.mix.cases))
}

#' Fit a 2-component mixture density over range of xseq
#' @importFrom stats density
#' @noRd
density.mixture <- function(W, mixcomponent, xseq) {
num.xseq <- length(xseq)
min.xseq <- min(xseq)
max.xseq <- max(xseq)

## special case in which there is only one component in this subset or
## the number of observations for a component is not enough to compute
## the bandwidth of the kernel density estimate
if (length(unique(mixcomponent)) == 1 || any(table(mixcomponent) < 2))
return(density(W, bw="SJ", n=num.xseq, from=min.xseq, to=max.xseq)\$y)

## fit a density for each mixture component
density.1 <- density(W[mixcomponent == 1], bw="SJ",
n=num.xseq, from=min.xseq, to=max.xseq)
density.2 <- density(W[mixcomponent == 2], bw="SJ",
n=num.xseq, from=min.xseq, to=max.xseq)

## sum these densities weighted by the mixture proportions
wts.mix <- as.integer(table(mixcomponent)) / length(mixcomponent)
density.mix <- wts.mix * density.1\$y + wts.mix * density.2\$y
return(density.mix)
}

#' Summary evaluation of predictive performance
#'
#' @param object,x,densities Densities object produced by
#' @param \dots Further arguments passed to or from other methods. These are
#'        currently ignored.
#'
#' @return
#' \code{summary} returns a data frame that reports the number of cases and
#' controls, the test log-likelihood, the crude and model-based C-statistic
#' and expected weight of evidence Lambda.
#'
#' @examples
#' data(cleveland)
#' densities <- with(cleveland, Wdensities(y, posterior.p, prior.p))
#'
#' summary(densities)
#' mean(densities)
#' auroc.model(densities)
#' lambda.model(densities)
#'
#' @name summary-densities
#' @export
summary.Wdensities <- function(object, ...) {
validate.densities(object)
y <- object\$y
posterior.p <- object\$posterior.p

## test log-likelihood
loglik <- y * log(posterior.p) + (1 - y) * log(1 - posterior.p)

posterior.p.recalib <- object\$posterior.p.recalib
loglik.recalib <-
y * log(posterior.p.recalib) + (1 - y) * log(1 - posterior.p.recalib)

results <- data.frame(casectrl=paste(object\$n.cases, "/", object\$n.ctrls),
auroc=round(auroc.crude(object), 3),
lambda=round(lambda.crude(object), 2),
test.loglik=round(sum(loglik), 2),
test.loglik.recalib=round(sum(loglik.recalib), 2)
)
Lambda.char <- intToUtf8(923)
names(results) <-
c("Cases / controls",
"Crude C-statistic",
"Model-based C-statistic",
paste("Crude", Lambda.char, "(bits)"),
paste("Model-based", Lambda.char, "(bits)"),
"Test log-likelihood (nats)",
"log-likelihood after recalibration (nats)"
)
return(results)
}

#' @rdname summary-densities
#' @return
#' \code{mean} returns a numeric vector listing the mean densities of the weight
#' of evidence in controls and in cases.
#'
#' @export
mean.Wdensities <- function(x, ...) {
validate.densities((densities <- x))
means.ctrls <- sum(densities\$x * densities\$f.ctrls) / sum(densities\$f.ctrls)
means.cases <- sum(densities\$x * densities\$f.cases) / sum(densities\$f.cases)
return(c(ctrls=-means.ctrls, cases=means.cases))
}

#' @rdname summary-densities
#' @importFrom pROC auc
#' @export
auroc.crude <- function(densities) {

## force direction of computation of the C-statistic so that predicted
## values for controls are lower or equal than values for cases
auroc <- as.numeric(with(densities, auc(y, posterior.p, direction="<",
quiet=TRUE)))
return(auroc)
}

#' @rdname summary-densities
#' @return
#' \code{auroc.crude} and \code{auroc.model} return the area under the ROC curve
#' according to the crude and the model-based densities of weight of evidence,
#' respectively.
#'
#' @importFrom zoo rollmean
#' @export
auroc.model <- function(densities) {
roc.model <- data.frame(x=1 - densities\$cumfreq.ctrls,
y=1 - densities\$cumfreq.cases)
auroc.model <- -sum(diff(roc.model\$x) * rollmean(roc.model\$y, 2))
return(auroc.model)
}

#' @rdname summary-densities
#' @export
lambda.crude <- function(densities) {
W <- with(densities, weightsofevidence(posterior.p, prior.p))
W.case <- W[densities\$y == 1]
W.ctrl <- W[densities\$y == 0]
lambda <- tobits(mean(c(mean(W.case), -mean(W.ctrl))))
return(lambda)
}

#' @rdname summary-densities
#' @return
#' \code{lambda.crude} and \code{lambda.model} return the expected weight of
#' evidence (expected information for discrimination) in bits from the crude
#' and the model-based densities, respectively.
#'
#' @export
lambda.model <- function(densities) {
wts.ctrlscases <- with(densities, c(n.ctrls, n.cases) / (n.ctrls + n.cases))
lambda <- tobits(sum(wts.ctrlscases * mean(densities)))
return(lambda)
}

#' Proportions of cases and controls below a threshold of weight of evidence
#'
#' @param densities Densities object produced by \code{\link{Wdensities}}.
#' @param w.threshold Threshold value of weight of evidence (natural logs).
#'
#' @return
#' Numeric vector of length 2 listing the proportions of controls and
#' cases with weight of evidence below the given threshold.
#'
#' @examples
#' data(cleveland)
#' densities <- with(cleveland, Wdensities(y, posterior.p, prior.p))
#' w.threshold <- log(4) # threshold Bayes factor of 4
#' prop.belowthreshold(densities, w.threshold)
#'
#' @export
prop.belowthreshold <- function(densities, w.threshold) {
validate.densities(densities)
xseq.threshold.idx <- which(densities\$x >= w.threshold)
prop.ctrls <- round(densities\$cumfreq.ctrls[xseq.threshold.idx], 3)
prop.cases <- round(densities\$cumfreq.cases[xseq.threshold.idx], 3)
return(c(ctrls=prop.ctrls, cases=prop.cases))
}

#' Cumulative frequency distribution
#'
#' @noRd
cumfreqs <- function(f, x.stepsize) {
## normalize f
f <- f / sum(f * x.stepsize)
return(cumsum(f * x.stepsize))
}

#' Validate densities object
#'
#' Checks that the object has been created by \code{\link{Wdensities}}.
#'
#' @param obj An object to be checked.
#'
#' @return
#' Throws an error if the object is not a densities object.
#'
#' @noRd
validate.densities <- function(obj) {
if (class(obj) != "Wdensities") {
stop("Not a densities object of class 'Wdensities'.")
}
}

#' Validate probability vectors
#'
#' Checks that the input vectors are numeric, don't have any NAs, and contain
#' values that are probabilities.
#'
#' @param posterior,prior Vectors of probabilities.
#'
#' @return
#' Throws an error if the vectors do not contain probabilities.
#'
#' @noRd
validate.probabilities <- function(posterior, prior) {
stopifnot(is.numeric(posterior), is.numeric(prior))
if (length(prior) > 1 && length(prior) != length(posterior))
stop("Wrong length for the vector of prior probabilities.")
if (anyNA(posterior) || anyNA(prior))
stop("NAs not allowed in the vectors of probabilities.")
if (any(posterior < 0 | posterior > 1 | prior < 0 | prior > 1))
stop("Vector does not contain probabilities.")
if (any(posterior == 0 | posterior == 1))
stop("Posterior probabilities exactly 0 or 1.")
}

#' Recalibrate posterior probabilities
#'
#' Transforms posterior probabilities to logits, fits a logistic regression model
#' and returns the predictive probabilities from this model.
#'
#' @param y Binary outcome label (0 for controls, 1 for cases).
#' @param posterior.p Vector of posterior probabilities.
#'
#' @return
#' Recalibrated posterior probabilities.
#' @importFrom stats glm predict
recalibrate.p <- function(y, posterior.p) {
x <- log(posterior.p / (1 - posterior.p))
glm.model <- glm(y ~ x, family="binomial")
newp <- predict(object=glm.model, type="response")
return(newp)
}
```

## Try the wevid package in your browser

Any scripts or data that you put into this service are public.

wevid documentation built on Sept. 12, 2019, 5:04 p.m.