R/precrec.R

Defines functions .onUnload

#' precrec: A package for computing accurate ROC and Precision-Recall curves
#'
#' The precrec package contains several functions and \code{S3} generics to
#' provide a robust platform for performance evaluation of binary classifiers.
#'
#' @section Functions:
#' The precrec package provides the following six functions.
#'
#'   \tabular{ll}{
#'     \strong{Function} \tab \strong{Description} \cr
#'     \code{\link{evalmod}}
#'           \tab Main function to calculate evaluation measures \cr
#'     \code{\link{mmdata}}
#'           \tab Reformat input data for performance evaluation calculation \cr
#'     \code{\link{join_scores}}
#'           \tab Join scores of multiple models into a list \cr
#'     \code{\link{join_labels}}
#'           \tab Join observed labels of multiple test datasets into a list \cr
#'     \code{\link{create_sim_samples}}
#'           \tab Create random samples for simulations \cr
#'     \code{\link{format_nfold}}
#'           \tab Create n-fold cross validation dataset from data frame
#'   }
#'
#' @section S3 generics:
#' The precrec package provides nine different \code{S3} generics for the
#'  \code{S3} objects generated by the \code{\link{evalmod}} function.
#'
#'   \tabular{lll}{
#'     \strong{S3 generic}
#'     \tab \strong{Library}
#'     \tab \strong{Description} \cr
#'     \code{print}
#'     \tab base
#'     \tab Print the calculation results and the summary of the test data \cr
#'     \code{\link{as.data.frame}}
#'     \tab base
#'     \tab Convert a precrec object to a data frame \cr
#'     \code{\link{plot}}
#'     \tab graphics
#'     \tab Plot performance evaluation measures \cr
#'     \code{\link{autoplot}}
#'     \tab ggplot2
#'     \tab Plot performance evaluation measures with ggplot2  \cr
#'     \code{\link{fortify}}
#'     \tab ggplot2
#'     \tab Prepare a data frame for ggplot2 \cr
#'     \code{\link{auc}}
#'     \tab precrec
#'     \tab Make a data frame with AUC scores \cr
#'     \code{\link{part}}
#'     \tab precrec
#'     \tab Calculate partial curves and partial AUC scores \cr
#'     \code{\link{pauc}}
#'     \tab precrec
#'     \tab Make a data frame with pAUC scores \cr
#'     \code{\link{auc_ci}}
#'     \tab precrec
#'     \tab Calculate confidence intervals of AUC scores
#'   }
#'
#' @section Performance measure calculations:
#' The \code{\link{evalmod}} function calculates ROC and Precision-Recall
#'   curves and returns an \code{S3} object. The generated \code{S3} object can
#'   be used with several different \code{S3} generics, such as \code{print} and
#'   \code{\link{plot}}. The \code{\link{evalmod}} function can also
#'   calculate basic evaluation measures - error rate, accuracy, specificity,
#'   sensitivity, precision, Matthews correlation coefficient, and F-Score.
#'
#' @section Data preparation:
#' The \code{\link{mmdata}} function creates an input dataset for
#'  the \code{\link{evalmod}} function. The generated dataset contains
#'  formatted scores and labels.
#'
#' \code{\link{join_scores}} and \code{\link{join_labels}} are helper
#'  functions to combine multiple scores and labels.
#'
#' The \code{\link{create_sim_samples}} function creates test datasets with
#'  five different performance levels.
#'
#' @section Data visualization:
#' \code{\link{plot}} takes an \code{S3} object generated
#' by \code{\link{evalmod}} as input and plot corresponding curves.
#'
#' \code{\link{autoplot}} uses \code{ggplot} to plot curves.
#'
#' @section Result retrieval:
#' \code{\link{as.data.frame}} takes an \code{S3} object generated
#' by \code{\link{evalmod}} as input and and returns a data frame
#' with calculated curve points.
#'
#' \code{\link{auc}} and \code{\link{pauc}} returns a data frame with AUC scores
#' and partial AUC scores, respectively. \code{\link{auc_ci}}
#' returns confidence intervals of AUCs for both ROC
#' and precision-recall curves.
#'
#'
#' @docType package
#' @name precrec
#'
#' @useDynLib precrec, .registration = TRUE
#' @importFrom Rcpp sourceCpp
#' @importFrom ggplot2 autoplot
#' @importFrom ggplot2 fortify
#' @importFrom grDevices col2rgb rainbow rgb
#' @importFrom graphics abline layout legend lines
#' @importFrom graphics matplot plot plot.new polygon
#' @importFrom methods is
#' @importFrom rlang sym
#' @importFrom stats qnorm rbeta rnorm sd qt
#' @importFrom data.table frank
#'
NULL

.onUnload <- function(libpath) {
  library.dynam.unload("precrec", libpath)
}

#' Balanced data with 500 positives and 500 negatives.
#'
#' A list contains labels and scores of five different performance levels.
#' All scores were randomly generated.
#'
#' @format A list with 8 items.
#' \describe{
#'   \item{np}{number of positives: 500}
#'   \item{nn}{number of negatives: 500}
#'   \item{labels}{labels of observed data}
#'   \item{random_scores}{scores of a random performance level}
#'   \item{poor_er_scores}{scores of a poor early retrieval level}
#'   \item{good_er_scores}{scores of a good early retrieval level}
#'   \item{excel_scores}{scores of an excellent level}
#'   \item{perf_scores}{scores of the perfect level}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name B500
#' @usage data(B500)
NULL

#' Balanced data with 1000 positives and 1000 negatives.
#'
#' A list contains labels and scores of five different performance levels.
#' All scores were randomly generated.
#'
#' @format A list with 8 items.
#' \describe{
#'   \item{np}{number of positives: 1000}
#'   \item{nn}{number of negatives: 1000}
#'   \item{labels}{labels of observed data}
#'   \item{random_scores}{scores of a random performance level}
#'   \item{poor_er_scores}{scores of a poor early retrieval level}
#'   \item{good_er_scores}{scores of a good early retrieval level}
#'   \item{excel_scores}{scores of an excellent level}
#'   \item{perf_scores}{scores of the perfect level}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name B1000
#' @usage data(B1000)
NULL

#' Imbalanced data with 500 positives and 5000 negatives.
#'
#' A list contains labels and scores of five different performance levels.
#' All scores were randomly generated.
#'
#' @format A list with 8 items.
#' \describe{
#'   \item{np}{number of positives: 500}
#'   \item{nn}{number of negatives: 5000}
#'   \item{labels}{labels of observed data}
#'   \item{random_scores}{scores of a random performance level}
#'   \item{poor_er_scores}{scores of a poor early retrieval level}
#'   \item{good_er_scores}{scores of a good early retrieval level}
#'   \item{excel_scores}{scores of an excellent level}
#'   \item{perf_scores}{scores of the perfect level}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name IB500
#' @usage data(IB500)
NULL

#' Imbalanced data with 1000 positives and 10000 negatives.
#'
#' A list contains labels and scores of five different performance levels.
#' All scores were randomly generated.
#'
#' @format A list with 8 items.
#' \describe{
#'   \item{np}{number of positives: 1000}
#'   \item{nn}{number of negatives: 10000}
#'   \item{labels}{labels of observed data}
#'   \item{random_scores}{scores of a random performance level}
#'   \item{poor_er_scores}{scores of a poor early retrieval level}
#'   \item{good_er_scores}{scores of a good early retrieval level}
#'   \item{excel_scores}{scores of an excellent level}
#'   \item{perf_scores}{scores of the perfect level}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name IB1000
#' @usage data(IB1000)
NULL

#' A small example dataset with several tied scores.
#'
#' A list contains labels and scores for 10 positives and 10 negatives.
#'
#' @format A list with 4 items.
#' \describe{
#'   \item{np}{number of positives: 10}
#'   \item{nn}{number of negatives: 10}
#'   \item{labels}{20 labels of observed data}
#'   \item{scores}{20 scores with some ties}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name P10N10
#' @usage data(P10N10)
NULL

#' 5-fold cross validation sample.
#'
#' A data frame contains labels and scores for 5-fold test sets.
#'
#' @format A data frame with 4 columns.
#' \describe{
#'   \item{score1}{50 random scores}
#'   \item{score2}{50 random scores}
#'   \item{label}{50 labels as 'pos' or 'neg'}
#'   \item{fold}{50 fold IDs as 1:5}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name M2N50F5
#' @usage data(M2N50F5)
NULL

Try the precrec package in your browser

Any scripts or data that you put into this service are public.

precrec documentation built on Oct. 12, 2023, 1:06 a.m.