# The 'bc_estimate' function
# Written by Kevin Potter
# email: kevin.w.potter@gmail.com
# Please email me directly if you
# have any questions or comments
# Last updated 2018-10-04
# Table of contents
# 1) bc_estimate |*
# 1.1) is.bc_estimate | tested
# 1.2) coef.bc_estimate | tested
# 1.3) subset.bc_estimate |
# 1.4) levels.bc_estimate |
# 1.5) dimnames.bc_estimate |
# 1.6) print.bc_estimate |
# 1.7) features.bc_estimate |
###
### 1) bc_estimate
###
#' Wrapper for Binary Classification Estimation
#'
#' Fits different types of models for binary classification
#' to a training subset of data, and then computes fit metrics
#' over a test subset.
#'
#' @param type The type of fitting algorithm. Options
#' include \code{glm} and \code{glmnet}.
#' @param dat An R object of class \code{train_test}.
#' @param control An optional list of additional estimation
#' parameters.
#'
#' @details
#'
#' The \code{control} argument is a list that can supply any
#' of the following components:
#' \itemize{
#' \item \code{col_sel} = An index for the subset of predictors to use;
#' \item \code{second_pass} = Logical; if \code{TRUE}, refits a
#' glm model with only the significant predictors;
#' \item \code{prev_fit} = Allows previous output from \code{glm} or
#' \code{glmnet} to be passed in;
#' \item \code{error_rate} = The cutoff for determining whether a
#' variable is statistically significant or not;
#' \item \code{prev_coef} = An optional vector with previous coefficients
#' to use as offsets in a null model for \code{glm};
#' \item \code{alpha} = The mixing proportion ridge (0) versus lasso (1)
#' regression;
#' \item \code{nfolds} = The number of folds for cross-validation step to
#' select the best penalty term for \code{glmnet};
#' \item \code{costf} = The type of cost function to use for the
#' \code{cv.glmnet} function, where options include 'auc' (Area
#' under the curve), 'mae' (mean absolute error), 'class'
#' (misclassification error), and 'deviance'.
#' }
#'
#' The method \code{subset} can extract the specified fit metric for
#' either the training (\code{train = TRUE}) or test subsets. The
#' \code{features} method extracts the labels for the predictors
#' deemed significant.
#'
#' @return An R object of class 'bc_estimate'.
#'
#' @export
#' @examples
#' # Simulate data
#' sim = bc_simulate( 300, 8, 4 )
#' # Create training and test data
#' index = cv_index( 3, 300 )
#' dat = train_test( 3, index, sim$y, sim$X )
#' fit = bc_estimate( 'glm', dat )
#' fit_2 = bc_estimate( 'glmnet', dat )
bc_estimate = function( type, dat, control = NULL ) {
# Intialize output
out = NULL
# Standard logistic regression
if ( type == 'glm' )
out = estimate.lr( dat, control = control )
# Elastic net logistic regression
if ( type == 'glmnet' )
out = estimate.en_lr( dat, control = control )
return( out )
}
# 1.1)
#' @rdname bc_estimate
#' @export
is.bc_estimate = function( x ) {
# Purpose:
# Checks if an object is of class 'bc_estimate'
# Arguments:
# x - An R object
# Returns:
# TRUE if the object is of class 'bc_estimate',
# FALSE otherwise.
return( inherits( x, 'bc_estimate' ) )
}
# 1.2)
#' @rdname bc_estimate
#' @export
coef.bc_estimate = function( x, int = FALSE, sig = T ) {
# Purpose:
# Extracts coefficients from
# an object of class 'bc_estimate'.
# Arguments:
# x - An R object of class 'bc_estimate'
# int - Logical; if TRUE, includes
# the intercept
# sig - Logical; if TRUE, includes only
# significant/non-zero values
# Returns:
# The coefficients from the fitted model.
if ( sig ) {
# If reporting only significant predictors
if ( int ) {
# If intercept should be included in values reported
out = c( x$intercept, x$coefficients )
names( out ) = c(
'Intercept',
names( x$coefficients )
)
} else {
# Significant coefficients, no intercept
out = x$coefficients
}
} else {
# All coefficients are included
# Initialize output
out = c( 0, rep( 0, length( x$predictors ) ) )
cf = c( x$intercept,
x$coefficients )
names(cf) = c( 'Intercept', names( x$coefficients ) )
sel = names( cf )
names( out ) = c(
'Intercept', x$predictors )
out[sel] = cf
}
return( out )
}
# 1.3)
#' @rdname bc_estimate
#' @export
subset.bc_estimate = function( x, train = F, metric = 'AUC' ) {
# Purpose:
# Extracts the specified fit metric based on
# the model fit to either the training or
# test data subsets.
# Arguments:
# x - An R object of class 'bc_estimate'
# train - Logical; if TRUE, returns the
# metric for the training data subset
# metric - The type of metric to return; valid
# options are 'TPR', 'FPR', 'AUC',
# 'd_prime', 'criterion', 'CE', 'R',
# 'Accuracy', 'CM', 'AUC_curve',
# 'theta', and 'residuals'
# Returns:
# The specified metric for the data subset.
# Extract 'fit_metric' object
fm = x$fit_metric
# Extract metric of interest
out = subset( fm, train = train, metric = metric )
return( out )
}
# 1.4)
#' @rdname bc_estimate
#' @export
levels.bc_estimate = function( x ) {
# Purpose:
# Extracts the binary levels for the
# factor representing the dependent variable.
# Arguments:
# x - An R object of class 'bc_estimate'
# Returns:
# The binary levels for the factor.
return( x$levels )
}
# 1.5)
#' @rdname bc_estimate
#' @export
dimnames.bc_estimate = function( x ) {
# Purpose:
# Extracts the column labels for the matrix
# of predictors.
# Arguments:
# x - An R object of class 'train_test'
# Returns:
# A vector with the column labels for the
# predictors.
# Extract labels for predictors
out = x$predictors
return( out )
}
# 1.6)
#' @rdname bc_estimate
#' @export
print.bc_estimate = function( x, digits = 2, metric = 'AUC' ) {
# Purpose:
# Displays basic details for an object of
# class 'bc_estimate'.
# Arguments:
# x - An R object of class 'bc_estimate'
# digits - The max number of digits when rounding
# metric - The type of fit metric to report
string = paste( 'Estimation type:', x$type )
cat( string, '\n' )
print( round( x$run_time, digits ) )
cat( '\n' )
cat( 'Coefficients:', '\n' )
val = coef( x )
print( round( val, digits ) )
cat( '\n' )
cat( 'Fit:', '\n' )
val = subset( x, train = F, metric = metric )
string = paste( metric, '=', round( val, 2 ) )
cat( string, '\n' )
}
# 1.7
#' @rdname bc_estimate
#' @export
features.bc_estimate = function( x ) {
# Purpose:
# Returns the labels for the predictors
# selected by the model as major
# determinants in classifying the
# binary dependent variable.
# Arguments:
# x - An R object of class 'bc_estimate'
# Returns:
# A character vector with the labels for
# the selected predictors.
out = x$selected_vars
return( out )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.