R/bc_estimate.R

# The 'bc_estimate' function
# Written by Kevin Potter
# email: kevin.w.potter@gmail.com
# Please email me directly if you
# have any questions or comments
# Last updated 2018-10-04

# Table of contents
# 1) bc_estimate                     |*
#   1.1) is.bc_estimate              | tested
#   1.2) coef.bc_estimate            | tested
#   1.3) subset.bc_estimate          |
#   1.4) levels.bc_estimate          |
#   1.5) dimnames.bc_estimate        |
#   1.6) print.bc_estimate           |
#   1.7) features.bc_estimate        |

###
### 1) bc_estimate
###

#' Wrapper for Binary Classification Estimation
#'
#' Fits different types of models for binary classification
#' to a training subset of data, and then computes fit metrics
#' over a test subset.
#'
#' @param type The type of fitting algorithm. Options
#'   include \code{glm} and \code{glmnet}.
#' @param dat An R object of class \code{train_test}.
#' @param control An optional list of additional estimation
#'   parameters.
#'
#' @details
#'
#' The \code{control} argument is a list that can supply any
#' of the following components:
#' \itemize{
#'   \item \code{col_sel} = An index for the subset of predictors to use;
#'   \item \code{second_pass} = Logical; if \code{TRUE}, refits a
#'     glm model with only the significant predictors;
#'   \item \code{prev_fit} = Allows previous output from \code{glm} or
#'     \code{glmnet} to be passed in;
#'   \item \code{error_rate} = The cutoff for determining whether a
#'     variable is statistically significant or not;
#'   \item \code{prev_coef} = An optional vector with previous coefficients
#'     to use as offsets in a null model for \code{glm};
#'   \item \code{alpha} = The mixing proportion ridge (0) versus lasso (1)
#'     regression;
#'   \item \code{nfolds} = The number of folds for cross-validation step to
#'     select the best penalty term for \code{glmnet};
#'   \item \code{costf} = The type of cost function to use for the
#'     \code{cv.glmnet} function, where options include 'auc' (Area
#'     under the curve), 'mae' (mean absolute error), 'class'
#'     (misclassification error), and 'deviance'.
#' }
#'
#' The method \code{subset} can extract the specified fit metric for
#' either the training (\code{train = TRUE}) or test subsets. The
#' \code{features} method extracts the labels for the predictors
#' deemed significant.
#'
#' @return An R object of class 'bc_estimate'.
#'
#' @export
#' @examples
#' # Simulate data
#' sim = bc_simulate( 300, 8, 4 )
#' # Create training and test data
#' index = cv_index( 3, 300 )
#' dat = train_test( 3, index, sim$y, sim$X )
#' fit = bc_estimate( 'glm', dat )
#' fit_2 = bc_estimate( 'glmnet', dat )

bc_estimate = function( type, dat, control = NULL ) {

  # Intialize output
  out = NULL

  # Standard logistic regression
  if ( type == 'glm' )
    out = estimate.lr( dat, control = control )

  # Elastic net logistic regression
  if ( type == 'glmnet' )
    out = estimate.en_lr( dat, control = control )

  return( out )
}


# 1.1)
#' @rdname bc_estimate
#' @export

is.bc_estimate = function( x ) {
  # Purpose:
  # Checks if an object is of class 'bc_estimate'
  # Arguments:
  # x - An R object
  # Returns:
  # TRUE if the object is of class 'bc_estimate',
  # FALSE otherwise.

  return( inherits( x, 'bc_estimate' ) )
}

# 1.2)
#' @rdname bc_estimate
#' @export

coef.bc_estimate = function( x, int = FALSE, sig = T ) {
  # Purpose:
  # Extracts coefficients from
  # an object of class 'bc_estimate'.
  # Arguments:
  # x   - An R object of class 'bc_estimate'
  # int - Logical; if TRUE, includes
  #       the intercept
  # sig - Logical; if TRUE, includes only
  #       significant/non-zero values
  # Returns:
  # The coefficients from the fitted model.

  if ( sig ) {
    # If reporting only significant predictors

    if ( int ) {
      # If intercept should be included in values reported

      out = c( x$intercept, x$coefficients )
      names( out ) = c(
        'Intercept',
        names( x$coefficients )
      )

    } else {
      # Significant coefficients, no intercept

      out = x$coefficients

    }
  } else {
    # All coefficients are included

    # Initialize output
    out = c( 0, rep( 0, length( x$predictors ) ) )

    cf = c( x$intercept,
            x$coefficients )
    names(cf) = c( 'Intercept', names( x$coefficients ) )
    sel = names( cf )

    names( out ) = c(
      'Intercept', x$predictors )
    out[sel] = cf

  }

  return( out )
}

# 1.3)
#' @rdname bc_estimate
#' @export

subset.bc_estimate = function( x, train = F, metric = 'AUC' ) {
  # Purpose:
  # Extracts the specified fit metric based on
  # the model fit to either the training or
  # test data subsets.
  # Arguments:
  # x      - An R object of class 'bc_estimate'
  # train  - Logical; if TRUE, returns the
  #          metric for the training data subset
  # metric - The type of metric to return; valid
  #          options are 'TPR', 'FPR', 'AUC',
  #          'd_prime', 'criterion', 'CE', 'R',
  #          'Accuracy', 'CM', 'AUC_curve',
  #          'theta', and 'residuals'
  # Returns:
  # The specified metric for the data subset.

  # Extract 'fit_metric' object
  fm = x$fit_metric
  # Extract metric of interest
  out = subset( fm, train = train, metric = metric )

  return( out )
}

# 1.4)
#' @rdname bc_estimate
#' @export

levels.bc_estimate = function( x ) {
  # Purpose:
  # Extracts the binary levels for the
  # factor representing the dependent variable.
  # Arguments:
  # x - An R object of class 'bc_estimate'
  # Returns:
  # The binary levels for the factor.

  return( x$levels )
}

# 1.5)
#' @rdname bc_estimate
#' @export

dimnames.bc_estimate = function( x ) {
  # Purpose:
  # Extracts the column labels for the matrix
  # of predictors.
  # Arguments:
  # x   - An R object of class 'train_test'
  # Returns:
  # A vector with the column labels for the
  # predictors.

  # Extract labels for predictors
  out = x$predictors

  return( out )
}

# 1.6)
#' @rdname bc_estimate
#' @export

print.bc_estimate = function( x, digits = 2, metric = 'AUC' ) {
  # Purpose:
  # Displays basic details for an object of
  # class 'bc_estimate'.
  # Arguments:
  # x      - An R object of class 'bc_estimate'
  # digits - The max number of digits when rounding
  # metric - The type of fit metric to report

  string = paste( 'Estimation type:', x$type )
  cat( string, '\n' )
  print( round( x$run_time, digits ) )
  cat( '\n' )

  cat( 'Coefficients:', '\n' )
  val = coef( x )
  print( round( val, digits ) )
  cat( '\n' )

  cat( 'Fit:', '\n' )
  val = subset( x, train = F, metric = metric )
  string = paste( metric, '=', round( val, 2 ) )
  cat( string, '\n' )

}

# 1.7
#' @rdname bc_estimate
#' @export

features.bc_estimate = function( x ) {
  # Purpose:
  # Returns the labels for the predictors
  # selected by the model as major
  # determinants in classifying the
  # binary dependent variable.
  # Arguments:
  # x - An R object of class 'bc_estimate'
  # Returns:
  # A character vector with the labels for
  # the selected predictors.

  out = x$selected_vars

  return( out )
}
rettopnivek/binclass documentation built on May 13, 2019, 4:46 p.m.