knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
knitr::opts_chunk$set(fig.width = 6, fig.height = 6)

library(erikmisc)
library(dplyr)

Load data

data(dat_mtcars_e)

dat_mtcars_e <-
  dat_mtcars_e |>
  dplyr::mutate(
    vs_V = ifelse(vs == "V-shaped", 1, 0) # 0-1 binary for logistic regression
  )
#labelled::var_label(dat_mtcars_e[[ "vs_V"  ]]) <- "Engine: 1=V-shaped, 0=straight binary for logistic regression"

str(dat_mtcars_e)
#summary(dat_mtcars_e)

Classification models

Logistic regression

Fit model

fit_glm_vs <-
  glm(
    cbind(vs_V, 1 - vs_V) ~ disp + wt + carb
  , family = binomial
  , data = dat_mtcars_e
  )

cat("Test residual deviance for lack-of-fit (if > 0.10, little-to-no lack-of-fit)\n")
dev_p_val <- 1 - pchisq(fit_glm_vs$deviance, fit_glm_vs$df.residual)
dev_p_val |> print()

car::Anova(fit_glm_vs, type = 3)
summary(fit_glm_vs)

Effect plots

# all contrasts from model, probability scale
glm_contrasts <-
  e_plot_model_contrasts(
    fit                = fit_glm_vs
  , dat_cont           = dat_mtcars_e
  , sw_glm_scale       = c("link", "response")[2]
  , sw_print           = FALSE
  , sw_marginal_even_if_interaction = TRUE
  , sw_TWI_plots_keep  = "both"
  )
glm_contrasts$plots

Classification results

glm_roc <-
  e_plot_roc(
    actual_labels = dat_mtcars_e$vs_V
  , pred_values   = fit_glm_vs$fitted.values
  , sw_plot       = TRUE
  , cm_mode       = c("sens_spec", "prec_recall", "everything")[3]
  )

glm_roc$roc_curve_best |> print(width = Inf)
glm_roc$p_roc
glm_roc$confusion_matrix
## Random forests
### Fit model
### Classification results


#### Before variable selection
dat.class <-
  dat_sub |>
  select(DX, one_of(Var$list[[this_var_list]])) |>
  na.omit() |>
  as.data.frame()
dim(dat.class)
table(dat.class$DX)

set.seed(my.seed)
y.class <- colnames(dat.class)[1] # "DX"
x.class <- colnames(dat.class)[-1] # c()
out.summary <- f_rf_cat2(y.class, x.class, dat.class, sw.plots=FALSE, sw.return=TRUE, sw.seed=-76543)
#out.summary

v_imp <-
  out.summary$importance |>
  as_tibble(rownames = "Var") |>
  arrange(desc(all))
v_imp |> print(n = Inf)

#### After variable selection
v_list <- v_imp$Var
v_list_ind <- which(v_list %notin%
  c(
  #  "SEX"
  #, "AGE"
  #, "Zemek5P_parent"
  #, "PCSI_percentage"
  #, "Hippocampus_pct"
  #, "ETIV"
  #, "rMFG_Thickness"
  ))
v_list <-
  v_list[v_list_ind]

dat.class <-
  dat_sub |>
  select(DX, one_of(v_list)) |>  # Var$list$var_demo[c(3,4)],
  mutate(DX = factor(DX)) |>
  na.omit() |>
  as.data.frame()
dim(dat.class)
table(dat.class$DX)

set.seed(my.seed)
y.class <- colnames(dat.class)[1] # "DX"
x.class <- colnames(dat.class)[-1] # c("sex", "age", var_list_comp)
out.summary <- f_rf_cat2(y.class, x.class, dat.class, sw.plots=FALSE, sw.return=TRUE, sw.seed=-76543)
#out.summary

v_imp <-
  out.summary$importance |>
  as_tibble(rownames = "Var") |>
  arrange(desc(all))
v_imp |> print(n = Inf)

out_rf <-
  f_RF_label_plot_summary(
    dat.class
  , out.summary
  , v_imp
  , var_list_label_match |> filter(var_type %in% c("var_demo", this_var_list))
  )

out_tab_plot$vimp_tab [[this_var_list]]$DX    <- out_rf$out_rf_v_imp
out_tab_plot$vimp_plot[[this_var_list]]$DX    <- out_rf$out_rf_v_imp_plot
out_tab_plot$class_tab[[this_var_list]]$DX    <- out_rf$out_rf_confusion
out_tab_plot$roc_dat  [[this_var_list]]$DX    <- out_rf$roc_dat

ggsave(paste0("out/", "rf_vimp_roc_", this_var_list, "NoAgeSex.png") , plot=out_rf$out_rf_v_imp_plot2, width=8, height=5, units="in", bg="white", dpi=300)


if (sw_marginal_plots) {
  # plot.variable(out.summary, xvar.names = x.class, partial = TRUE, smooth.lines = TRUE)

  p.var <- randomForestSRC::plot.variable(out.summary, xvar.names = x.class, partial = TRUE, smooth.lines = TRUE, show.plots = FALSE, npts = 250)

  library(ggRandomForests) # ggplot2 random forest figures
  gg_p <- ggRandomForests::gg_partial(p.var)
  #plot(gg_p, panel=TRUE, notch=TRUE)
  p.all <- plot(gg_p)

  # Add smooth line
  p.all.smooth <- NULL
  for (i in seq_along(x.class)) {   #length(p.all)) {
    p.all.smooth[[i]] <- p.all[[i]]
    p.all.smooth[[i]] <- p.all.smooth[[i]] + coord_cartesian(ylim=c(0.1,0.9))
    p.all.smooth[[i]] <- p.all.smooth[[i]] + geom_smooth()
    # note, we can't add colored values by diag since nonunique x-values.
  }

  # Arrange plots
  # https://cran.r-project.org/web/packages/gridExtra/vignettes/arrangeGrob.html
  library(gridExtra)
  library(grid)
  #lay <- rbind(c(1,2,3,4,5),
  #             c(6,7,8,9,10))
  lay <- rbind(c(1,2,NA)
               , c(NA,NA,NA)
               )
  p.arranged <- grid.arrange( grobs = p.all.smooth
              , layout_matrix = lay
              , top="RF Marginal Pr(Patient) for important variables"
              #, bottom="bottom\nlabel"
              #, left="left label"
              #, right="right label"
              )
  out_tab_plot$marginal [[this_var_list]]$DX    <- p.arranged

  ggsave(paste0("out/", "rf_marginal_", this_var_list, "NoAgeSex.png") , plot=p.arranged, width=8, height=5, units="in", bg="white", dpi=300)
} # sw_marginal_plots


erikerhardt/erikmisc documentation built on April 17, 2025, 10:48 a.m.