inst/doc/prettyglm-vignette.R

## ----include=FALSE------------------------------------------------------------
# knitr::opts_chunk$set(
#   collapse = TRUE,
#   comment = "#>"
# )

## ----load data, echo=TRUE, message=FALSE, warning=FALSE-----------------------
library(dplyr)
library(prettyglm)
data('titanic')
head(titanic) %>%
  select(-c(PassengerId, Name, Ticket)) %>% 
  knitr::kable(table.attr = "style='width:10%;'" ) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

## ----preprocessing 1, echo=TRUE, message=FALSE, warning=FALSE-----------------
# Easy way to convert multiple columns to a factor.
columns_to_factor <- c('Pclass',
                       'Sex',
                       'Cabin', 
                       'Embarked',
                       'Cabintype')
meanage <- base::mean(titanic$Age, na.rm=T)
titanic  <- titanic  %>%
  dplyr::mutate_at(columns_to_factor, list(~factor(.))) %>%
  dplyr::mutate(Age =base::ifelse(is.na(Age)==T,meanage,Age)) 


## ----build model, echo=TRUE---------------------------------------------------
survival_model <- stats::glm(Survived ~ Pclass + 
                                        Sex + 
                                        Fare +
                                        Age +
                                        Embarked + 
                                        SibSp + 
                                        Parch, 
                             data = titanic, 
                             family = binomial(link = 'logit'))

## ----visualise coefficients, echo=TRUE, eval=FALSE, include=TRUE--------------
#  pretty_coefficients(model_object = survival_model)

## ----visualise coefficients type iii, echo=TRUE, eval=FALSE, include=TRUE-----
#  pretty_coefficients(survival_model, type_iii = 'Wald', significance_level = 0.1)

## ----visualise coefficients vi,echo=TRUE, eval=FALSE, include=TRUE------------
#  pretty_coefficients(model_object = survival_model,
#                      type_iii = 'Wald',
#                      significance_level = 0.1,
#                      vimethod = 'permute',
#                      target = 'Survived',
#                      metric = 'auc',
#                      pred_wrapper = predict.glm,
#                      reference_class = 0)

## ----visualise rels, echo=TRUE, eval=FALSE, include=TRUE----------------------
#  pretty_relativities(feature_to_plot= 'Embarked',
#                      model_object = survival_model,
#                      relativity_label = 'Liklihood of Survival'
#                      )

## ----visualise rels 2, echo=TRUE, eval=FALSE, include=TRUE--------------------
#  pretty_relativities(feature_to_plot= 'Fare',
#                      model_object = survival_model,
#                      relativity_label = 'Liklihood of Survival',
#                      upper_percentile_to_cut = 0.1)

## ----build model 2, echo=TRUE-------------------------------------------------
survival_model2 <- stats::glm(Survived ~ Pclass:Fare +
                                         Age +
                                         Embarked:Sex +
                                         SibSp +
                                         Parch,
                              data = titanic,
                              family = binomial(link = 'logit'))

## ----visualise ff F, echo=TRUE, eval=FALSE, include=TRUE----------------------
#  pretty_relativities(feature_to_plot= 'Embarked:Sex',
#                      model_object = survival_model2,
#                      relativity_label = 'Liklihood of Survival',
#                      iteractionplottype = 'facet',
#                      facetorcolourby = 'Sex'
#                      )

## ----visualise ff C, echo=TRUE, eval=FALSE, include=TRUE----------------------
#  pretty_relativities(feature_to_plot= 'Embarked:Sex',
#                      model_object = survival_model2,
#                      relativity_label = 'Liklihood of Survival',
#                      iteractionplottype = 'colour',
#                      facetorcolourby = 'Embarked'
#                      )

## ----visualise ff N, echo=TRUE, eval=FALSE, include=TRUE----------------------
#  pretty_relativities(feature_to_plot= 'Embarked:Sex',
#                      model_object = survival_model2,
#                      relativity_label = 'Liklihood of Survival'
#                      )

## ----visualise cf C, echo=TRUE, eval=FALSE, include=TRUE----------------------
#  pretty_relativities(feature_to_plot= 'Pclass:Fare',
#                      model_object = survival_model2,
#                      relativity_label = 'Liklihood of Survival',
#                      upper_percentile_to_cut = 0.03
#                      )

## ----visualise cf F, echo=TRUE, eval=FALSE, include=TRUE----------------------
#  pretty_relativities(feature_to_plot= 'Pclass:Fare',
#                      model_object = survival_model2,
#                      relativity_label = 'Liklihood of Survival',
#                      iteractionplottype = 'facet',
#                      upper_percentile_to_cut = 0.03,
#                      height = 800
#                      )

## ----fit the splines----------------------------------------------------------
titanic  <- titanic  %>%
  dplyr::mutate(Age_0_18 = prettyglm::splineit(Age,0,18),
                Age_18_35 = prettyglm::splineit(Age,18,35),
                Age_35_120 = prettyglm::splineit(Age,35,120)) %>%
  dplyr::mutate(Fare_0_55 = prettyglm::splineit(Fare,0,55),
                Fare_55_600 = prettyglm::splineit(Fare,55,600))

## ----build model 4, echo=TRUE-------------------------------------------------
survival_model4 <- stats::glm(Survived ~ Pclass +
                                         Sex:Fare_0_55 +
                                         Sex:Fare_55_600 +
                                         Age_0_18 +
                                         Age_18_35 +
                                         Age_35_120 +
                                         Embarked +
                                         SibSp +
                                         Parch,
                              data = titanic,
                              family = binomial(link = 'logit'))

## ----visualise coefficients type spline, echo=TRUE, eval=FALSE, include=TRUE----
#  pretty_coefficients(survival_model4, significance_level = 0.1, spline_seperator = '_')

## ----visualise age spine, echo=TRUE, eval=FALSE, include=TRUE-----------------
#  pretty_relativities(feature_to_plot= 'Age',
#                      model_object = survival_model4,
#                      relativity_label = 'Liklihood of Survival',
#                      spline_seperator = '_'
#                      )

## ----visualise fare spine, echo=TRUE, eval=FALSE, include=TRUE----------------
#  pretty_relativities(feature_to_plot= 'Sex:Fare',
#                      model_object = survival_model4,
#                      relativity_label = 'Liklihood of Survival',
#                      spline_seperator = '_',
#                      upper_percentile_to_cut = 0.03
#                      )

## ----visualise fare Facet, echo=TRUE, eval=FALSE, include=TRUE----------------
#  pretty_relativities(feature_to_plot= 'Sex:Fare',
#                      model_object = survival_model4,
#                      relativity_label = 'Liklihood of Survival',
#                      spline_seperator = '_',
#                      upper_percentile_to_cut = 0.03,
#                      iteractionplottype = 'facet'
#                      )

## ----oneway cts, echo=TRUE, eval=FALSE, include=TRUE--------------------------
#  one_way_ave(feature_to_plot = 'Age',
#              model_object = survival_model4,
#              target_variable = 'Survived',
#              data_set = titanic,
#              upper_percentile_to_cut = 0.1,
#              lower_percentile_to_cut = 0.1)

## ----oneway discrete, echo=TRUE, eval=FALSE, include=TRUE---------------------
#  one_way_ave(feature_to_plot = 'Cabintype',
#              model_object = survival_model4,
#              target_variable = 'Survived',
#              data_set = titanic)

## ----oneway cts facet, echo=TRUE, eval=FALSE, include=TRUE--------------------
#  one_way_ave(feature_to_plot = 'Age',
#              model_object = survival_model4,
#              target_variable = 'Survived',
#              facetby = 'Sex',
#              data_set = titanic,
#              upper_percentile_to_cut = 0.1,
#              lower_percentile_to_cut = 0.1)

## ----custom predict, echo=TRUE, eval=FALSE, include=TRUE----------------------
#  # Custom Predict Function and facet
#  a_custom_predict_function <- function(target, model_object, dataset){
#    dataset <- base::as.data.frame(dataset)
#    Actual_Values <- dplyr::pull(dplyr::select(dataset, tidyselect::all_of(c(target))))
#    if(class(Actual_Values) == 'factor'){
#      Actual_Values <- base::as.numeric(as.character(Actual_Values))
#    }
#    Predicted_Values <- base::as.numeric(stats::predict(model_object, dataset, type='response'))
#  
#    to_return <-  base::data.frame(Actual_Values = Actual_Values,
#                                   Predicted_Values = Predicted_Values)
#  
#    to_return <- to_return %>%
#      dplyr::mutate(Predicted_Values = base::ifelse(Predicted_Values > 0.4,0.4,Predicted_Values))
#    return(to_return)
#  }
#  
#  one_way_ave(feature_to_plot = 'Age',
#              model_object = survival_model4,
#              target_variable = 'Survived',
#              data_set = titanic,
#              upper_percentile_to_cut = 0.1,
#              lower_percentile_to_cut = 0.1,
#              predict_function = a_custom_predict_function)

## ----bucketed aves, echo=TRUE, eval=FALSE, include=TRUE-----------------------
#  actual_expected_bucketed(target_variable = 'Survived',
#                           model_object = survival_model4,
#                           data_set = titanic)

## ----bucketed aves facet, echo=TRUE, eval=FALSE, include=TRUE-----------------
#  actual_expected_bucketed(target_variable = 'Survived',
#                           model_object = survival_model4,
#                           data_set = titanic,
#                           facetby = 'Sex')

Try the prettyglm package in your browser

Any scripts or data that you put into this service are public.

prettyglm documentation built on Sept. 8, 2023, 5:56 p.m.