#' Preps numeric targets using vtreat using cross frames and and fixes names to my preferences. Assumes
#' there is a variable named "target" in the input data.frame
#'
#' @param df_train Training data.frame.
#' @param df_test Testing data.frame.
#' @param ncross Number of cross validation frames in treatment design.
#' @param use_parallel Logical, if TRUE use parallel methods.
#' @param var_types_return Specifies what types of variables to produce (character array
#' of level codes, NULL means no restriction).
#' @return List with cross validation fold indices
prep_numeric <- function(df_train,
df_test,
ncross = 5,
use_parallel = F,
var_types_return = c('clean', 'isBAD', 'catN')){
testit::assert("Training and testing frames are not data.frames.", is.data.frame(df_train) & is.data.frame(df_test))
testit::assert('df_train is missing "target" column.', "target" %in% colnames(df_train))
testit::assert('df_test is missing "target" columns.', "target" %in% colnames(df_test))
cf_exp <- vtreat::mkCrossFrameNExperiment(dframe = df_train,
varlist = setdiff(colnames(df_train), "target"),
outcomename = "target",
ncross = ncross,
use_parallel = use_parallel,
codeRestriction = var_types_return)
#preference vs vtreat's naming conventions
df_train <- janitor::clean_names(cf_exp$crossFrame)
df_test <- vtreat::prepare(treatmentplan = cf_exp$treatments,
dframe = df_test) %>% janitor::clean_names()
out <- list()
out$df_train <- df_train
out$df_test <- df_test
out$treatment_plan <- cf_exp
return(out)
}
#' Preps binary targets using vtreat using cross frames and and fixes names to my preferences. Assumes
#' there is a variable named "target" in the input data.frame
#'
#' @param df_train Training data.frame.
#' @param df_test Testing data.frame.
#' @param outcome_target Value/level of outcome to be considered "success", and there must be a cut such that dframe[[outcomename]]==outcometarget at least twice and dframe[[outcomename]]!=outcometarget at least twice.
#' @param ncross Number of cross validation frames in treatment design.
#' @param use_parallel Logical, if TRUE use parallel methods.
#' @param var_types_return Specifies what types of variables to produce (character array
#' of level codes, NULL means no restriction).
#' @return List with cross validation fold indices
prep_bin <- function(df_train,
df_test,
outcome_target,
ncross = 5,
use_parallel = F,
var_types_return = c('clean', 'isBAD', 'catP')){
testit::assert("Training and testing frames are not data.frames.", is.data.frame(df_train) & is.data.frame(df_test))
testit::assert('df_train is missing "target" column.', "target" %in% colnames(df_train))
testit::assert('df_test is missing "target" columns.', "target" %in% colnames(df_test))
cf_exp <- vtreat::mkCrossFrameCExperiment(dframe = df_train,
varlist = setdiff(colnames(df_train), "target"),
outcomename = "target",
outcometarget = outcome_target,
ncross = ncross,
use_parallel = use_parallel,
codeRestriction = var_types_return)
#preference vs vtreat's naming conventions
df_train <- janitor::clean_names(cf_exp$crossFrame)
df_test <- vtreat::prepare(treatmentplan = cf_exp$treatments,
dframe = df_test) %>% janitor::clean_names()
out <- list()
out$df_train <- df_train
out$df_test <- df_test
out$treatment_plan <- cf_exp
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.