inst/doc/stranded_model.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----load, warning=FALSE, message=FALSE---------------------------------------
library(NHSRdatasets)
library(dplyr)
library(ggplot2)
library(caret)
library(rsample)
library(varhandle)

data("stranded_data")
glimpse(stranded_data)
prop.table(table(stranded_data$stranded.label))

## ----feature_engineering------------------------------------------------------

stranded_data <- stranded_data %>% 
  dplyr::mutate(stranded.label=factor(stranded.label)) %>% 
  dplyr::select(everything(), -c(admit_date))



## ----dummy_encode-------------------------------------------------------------
cats <- select_if(stranded_data, is.character)
cat_dummy <- varhandle::to.dummy(cats$frailty_index, "frail_ind") 
#Converts the frailty index column to dummy encoding and sets a column called "frail_ind" prefix
cat_dummy <- cat_dummy %>% 
  as.data.frame() %>% 
  dplyr::select(-frail_ind.No_index_item) #Drop the field of interest
# Drop the frailty index from the stranded data frame and bind on our new encoding categorical variables
stranded_data <- stranded_data %>% 
  dplyr::select(-frailty_index) %>% 
  bind_cols(cat_dummy) %>% na.omit(.)


## ----train_test_split---------------------------------------------------------
split <- rsample::initial_split(stranded_data, prop = 3/4)
train <- rsample::training(split)
test <- rsample::testing(split)


## ----class_model--------------------------------------------------------------
set.seed(123)
glm_class_mod <- caret::train(factor(stranded.label) ~ ., data = train, 
                 method = "glm")
print(glm_class_mod)


## ----predicting---------------------------------------------------------------
preds <- predict(glm_class_mod, newdata = test) # Predict class
pred_prob <- predict(glm_class_mod, newdata = test, type="prob") #Predict probs

# Join prediction on to actual test data frame and evaluate in confusion matrix

predicted <- data.frame(preds, pred_prob)
test <- test %>% 
  bind_cols(predicted) %>% 
  dplyr::rename(pred_class=preds)

glimpse(test)

## ----evaluation---------------------------------------------------------------

caret::confusionMatrix(test$stranded.label, test$pred_class, positive="Stranded")

Try the NHSRdatasets package in your browser

Any scripts or data that you put into this service are public.

NHSRdatasets documentation built on March 14, 2021, 1:06 a.m.