f_model_plot_variable_dependency_regression: plot model dependency on most important variables
In erblast/oetteR: Collection of personal R functions

Description Usage Arguments Value See Also Examples

response variable will be plotted against the entire range of each variable staring with the most important ones. All other variables will be set to median or most common factor. This function requires a ranked list of the most important variables as returned by f_model_importance()

f_model_plot_variable_dependency_regression(m, ranked_variables,
  title = unlist(stringr::str_split(class(m)[1], "\\."))[1],
  data = NULL, formula, data_ls,
  variable_color_code = f_plot_color_code_variables(data_ls),
  limit = 12, log_y = F, set_manual = list(), ...)

`m`	a regression model
`ranked_variables`	datafram as returned by f_model_importance()
`title`	character vector as plot title, Default: unlist(stringr::str_split(class(m)[1], "\."))[1]
`data`	a dataframe, only necessary if it differs from data_ls$data, Default: NULL
`formula`	the formula used to train the model
`data_ls`	data_ls object generated by f_clean_data(), or a named list list( data = <dataframe>, numericals = < vector with column names of numerical columns>)
`variable_color_code`	dataframe created by f_plot_color_code_variables()
`limit`	integer limit the number of variables to be plotted, Default: 12
`log_y`	boolean log_scale for y axis
`set_manual`	named list, set some variables manually instead of defaulting to median or most common factor. !! Values need to be of the same variable type as in the original data.
`...`	arguments passed to facet_wrap e.g. usefull for nrow, ncol

plot

str_split

# regular version--------------------------------------
data_ls             = f_clean_data(mtcars)
data                = data_ls$data
formula             = disp~hp+mpg+cyl
m                   = randomForest::randomForest(formula, data)
ranked_variables    = f_model_importance( m, data)
variable_color_code = f_plot_color_code_variables(data_ls)
limit               = 12
f_model_plot_variable_dependency_regression( m
                                             , ranked_variables
                                             , title = unlist( stringr::str_split( class(m)[1], '\\.') )[1]
                                             , formula = formula
                                             , data_ls = data_ls
                                             , variable_color_code = variable_color_code
                                             , limit = limit
                                             )

#pipe version ------------------------------------------

data_ls = f_clean_data(mtcars)
form = as.formula('disp~hp+cyl+wt')
variable_color_code = f_plot_color_code_variables(data_ls)
limit            = 10

 pl = pipelearner::pipelearner( data_ls$data ) %>%
  pipelearner::learn_models( rpart::rpart, form ) %>%
  pipelearner::learn_models( randomForest::randomForest, form ) %>%
  pipelearner::learn_models( e1071::svm, form ) %>%
  pipelearner::learn() %>%
  mutate( imp   = map2(fit, train, f_model_importance)
          ,plot = pmap( list( m = fit, ranked_variables = imp, title = model, data = train)
                        , .f = f_model_plot_variable_dependency_regression
                        , formula = form
                        , data_ls = data_ls
                        , variable_color_code = variable_color_code
                       , limit = limit
         )
  )