data-raw/dx_heart_failure.R

library(dplyr)
set.seed(20191104)

# Originally obtained from Kaggle: https://www.kaggle.com/imnikhilanand/heart-attack-prediction
ha_data <- readr::read_csv("data-raw/heart_failure.csv", na = c("", " ", "?", -9))

# Select variables for prediction and keep compltete cases
ha_data <- ha_data %>%
  select(num, cp, age, sex, trestbps, chol, restecg, fbs, thalach, oldpeak) %>%
  filter_all(all_vars(!is.na(.)))

# Generate model and predictions
pred <- glm(num ~ cp + age + sex + trestbps + chol + restecg + fbs + thalach + oldpeak, family = "binomial", data = ha_data)
pred <- predict(pred, type = "response")

dx_heart_failure <- data.frame(
  AgeGroup = ha_data$age,
  Sex = ha_data$sex,
  truth = ha_data$num,
  predicted = pred,
  stringsAsFactors = FALSE
)

dx_heart_failure$AgeGroup <- cut(dx_heart_failure$AgeGroup, breaks = c(20, 50, 80))
dx_heart_failure$Sex <- factor(dx_heart_failure$Sex, levels = c(0, 1), labels = c("Female", "Male"))
dx_heart_failure$AgeSex <- factor(paste0(dx_heart_failure$AgeGroup, " - ", dx_heart_failure$Sex))


rf_model <- randomForest::randomForest(
  as.factor(num) ~ cp + age + sex + trestbps + chol + restecg + fbs + thalach + oldpeak,
  data=ha_data
)
rf_pred <- predict(rf_model, type="prob")[,2]
dx_heart_failure$predicted_rf <- rf_pred


usethis::use_data(dx_heart_failure, overwrite = TRUE)
overdodactyl/diagnosticSummary documentation built on Jan. 28, 2024, 10:07 a.m.