RemixAutoML: AutoQuant

Documented in ModelInsightsReport

# AutoQuant is a package for quickly creating high quality visualizations under a common and easy api.
# Copyright (C) <year>  <name of author>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

#' @title Run_ModelInsightsReport
#'
#' @description Run_ModelInsightsReport is an Rmarkdown report for viewing the model insights generated by AutoQuant supervised learning functions
#'
#' @author Adrian Antico
#' @family Reports
#'
#' @param TrainDataInclude Default FALSE. If FALSE, no derived visuals or metrics for TrainData
#' @param FeatureColumnNames NULL
#' @param SampleSize Default 100000
#' @param ModelObject Output from a AutoQuant supervised learning function
#' @param ModelID ModelID used in the AutoQuant supervised learning function
#' @param SourcePath Path to directory with DataQuant Model Output
#' @param OutputPath Path to directory where the html will be saved
#'
#' @noRd
Run_ModelInsightsReport <- function(TrainDataInclude = FALSE,
                                    FeatureColumnNames = NULL,
                                    SampleSize = 100000,
                                    ModelObject = NULL,
                                    ModelID = NULL,
                                    SourcePath = NULL,
                                    OutputPath = NULL) {

  # Directory reference
  appDir <- system.file("r-markdowns", package = "AutoQuant")

  # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  # Globalize the parameters
  # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

  # ModelObject
  ModelObject <<- ModelObject

  # Finalize Passthrough
  if(!is.null(ModelObject)) {

    # DataSets
    ModelObject[['TestData']] <- ModelObject[['TestData']][order(runif(.N))][seq_len(min(.N, SampleSize))]
    if(TrainDataInclude) {
      ModelObject[['TrainData']] <- ModelObject[['TrainData']][order(runif(.N))][seq_len(min(.N, SampleSize))]
    } else {
      ModelObject[['TrainData']] <- NULL
    }

    # Meta info
    TargetType <- ModelObject[["ArgsList"]][["TargetType"]]
    if(TargetType == "Binary Classification") TargetType <- "classification"
    TargetColumnName <- ModelObject[['ArgsList']][['TargetColumnName']]
    PredictionColumnName <- ModelObject[["ArgsList"]][["PredictionColumnName"]]
    TargetLevels <- ModelObject[["ArgsList"]][["TargetLevels"]]
    Algo <- tolower(ModelObject[["ArgsList"]][["Algo"]])
    if(is.null(FeatureColumnNames)) {
      FeatureColumnNames <- ModelObject[['ColNames']][[1L]]
    }
    ArgsList <- ModelObject[['ArgsList']]

  } else {
    return(NULL)
  }

  if(length(ModelObject[['TestData']]) == 0L && length(ModelObject[['TrainData']]) == 0L) return(NULL)

  # Metadata args
  TargetType <- TargetType
  TargetLevels <- TargetLevels
  ModelID <- ModelID
  Algo <- Algo
  SourcePath <- SourcePath
  OutputPath <- OutputPath
  OutputPathName <- file.path(OutputPath, paste0('MLReport-', ModelID, '-', TargetType, '.html'))

  # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  # Select Rmarkdown Report and Run it
  # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

  # Regression Markdown
  if(tolower(TargetType) == 'regression') {
    rmarkdown::render(
      input = file.path(appDir, 'Regression_ModelInsights.Rmd'),
      output_file = OutputPathName)
  }

  # Classification Markdown
  if(tolower(TargetType) == 'classification') {
    rmarkdown::render(
      input = file.path(appDir, 'Classification_ModelInsights.Rmd'),
      output_file = OutputPathName)
  }

  # MultiClass Markdown
  if(tolower(TargetType) == 'multiclass') {
    rmarkdown::render(
      input = file.path(appDir, 'MultiClass_ModelInsights.Rmd'),
      output_file = OutputPathName)
  }
}

#' @title ModelInsightsReport
#'
#' @description ModelInsightsReport is an Rmarkdown report for viewing the model insights generated by AutoQuant supervised learning functions
#'
#' @author Adrian Antico
#' @family Reports
#'
#' @param TrainDataInclude Default FALSE. If FALSE, no derived visuals or metrics for TrainData
#' @param FeatureColumnNames NULL. Feature column names as character vector.
#' @param SampleSize Default 100000
#' @param ModelObject Returned output from regression, classificaiton, and multiclass Remix Auto_() models. Currenly supports CatBoost, XGBoost, and LightGBM models
#' @param ModelID ModelID used in the AutoQuant supervised learning function
#' @param SourcePath Path to directory with AutoQuant Model Output
#' @param OutputPath Path to directory where the html will be saved
#' @param GlobalVars ls() don't use
#' @param KeepOutput NULL A list of output names to select. Pass in as a character vector. E.g. c('Test_VariableImportance', 'Train_VariableImportance')
#'
#' @export
ModelInsightsReport <- function(TrainDataInclude = FALSE,
                                FeatureColumnNames = NULL,
                                SampleSize = 100000,
                                ModelObject = NULL,
                                ModelID = 'ModelTest',
                                SourcePath = NULL,
                                OutputPath = NULL,
                                KeepOutput = NULL,
                                GlobalVars = ls()) {

  # Run Function
  Run_ModelInsightsReport(
    TrainDataInclude = TrainDataInclude,
    FeatureColumnNames = FeatureColumnNames,
    SampleSize = SampleSize,
    ModelObject = ModelObject,
    ModelID = ModelID,
    SourcePath = SourcePath,
    OutputPath = OutputPath)

  # Remove objects
  GlobalVarsNew <- ls()
  rm(list = c(setdiff(GlobalVarsNew, c(GlobalVars, KeepOutput))))
}