# part of the 'calibtools' package by Norm Matloff, Kenneth Lee and
# Rongkui Han
#######################################################################
####################### calibration #################################
#######################################################################
# the *Calib() functions generate estimated conditional class
# probabilities from scores, e.g. SVM decision values
# useful for classification methodology that does not inherently
# generate those probabilities, or for which those probabilities are
# biased or otherwise inaccurate
######################### knnCalib() ################################
#' Calibrate probability by k-NN
#'
#' for a given new case, the k nearest neighbors in the training set are
#' obtained; the class labels for the neighbors are then obtained, and
#' the resulting proportions for the different labels are then used as
#' the estimated probabilities for the binary class.
#' @param y R factor of labels in training set
#' @param trnScores vector/matrix of scores in training set
#' @param tstScores scores of new case(s)
#' @param k number of nearest neighbors
#' @param loclin indicate whether local linear regression should be applied
#' @param smoothingFtn a smoothing function whether it should be applied.
#' @return vector of estimated probabilities for the new cases
#' @export
knnCalib <- function(y,
trnScores,
tstScores,
k,
loclin=FALSE,
scaleX=NULL,
smoothingFtn=NULL)
{
if (!is.factor(y)) stop('Y must be an R factor')
if (is.vector(trnScores))
trnScores <- matrix(trnScores,ncol=1)
if (is.vector(tstScores))
tstScores <- matrix(tstScores,ncol=1)
if (length(levels(y)) < 2) stop("Y should have at least 2 levels.")
if(loclin){
# loclin case
# turn multcase into 0 and 1 for lm()
classes <- levels(y)
probMat <- matrix(NA, nrow(tstScores), length(classes))
count <- 1
for(class in classes){
tmp_y <- ifelse(y == class, 1, 0)
dat <- as.data.frame(cbind(tmp_y, trnScores))
knnout <- qeKNN(dat, colnames(dat)[1], k=k, scaleX=scaleX,
smoothingFtn=smoothingFtn,
holdout=NULL)
tstS <- as.data.frame(tstScores)
# ensure the test set has the same column names
colnames(tstS) <- colnames(dat)[2:ncol(trnScores)]
# knn model prediction
pred <- predict(knnout, tstS)
# set all values that are greater than 1 to 1
pred[pred > 1] <- 1
# set all values that are lower than 0 to 0
pred[pred < 0] <- 0
pred <- as.vector(pred)
probMat[, count] <- pred
count <- count + 1
}
}else{
# mean case instead of loclin
original_class <- levels(y)
dat <- as.data.frame(cbind(trnScores, y))
yname <- colnames(dat)[ncol(dat)]
dat[[yname]] <- as.factor(dat[[yname]])
levels(dat[[yname]]) <- original_class
knnout <- qeKNN(dat, yname, k=k, scaleX=scaleX, holdout=NULL)
tstS <- as.data.frame(tstScores)
out <- predict(knnout, tstS)
probMat <- out$probs
}
probMat
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.