knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = TRUE, error = TRUE )
This section reports a series of univariate summary checks of the CRASH-2 dataset.
library(here) library(tidyverse) library(Hmisc) source(here("R", "ida_plot_univar.R")) ## function to plot univariate summaries. # Define sets of predictors by type - helpful for selection demog_vars <- c("age", "sex") pyhiso_vars <- c("sbp", "hr", "rr", "gcs", "cc") injury_vars <- c("injurytime", "injurytype") ## Load the dataset. load(here::here("data", "a_crash2.rda"))
Using the Hmisc describe function, we provide an overview of the data set. The descriptive report also provides histograms of continuous variables. For ease of scanning the information, we group the report by measurement type.
a_crash2 %>% dplyr::select(all_of(demog_vars)) %>% Hmisc::describe(descript = "Demographic variables") %>% Hmisc::html(size = 80)
a_crash2 %>% dplyr::select(all_of(pyhiso_vars)) %>% Hmisc::describe(descript = "Physiological measurements") %>% Hmisc::html(size = 80)
a_crash2 %>% dplyr::select(all_of(injury_vars)) %>% Hmisc::describe(descript = "Characteristics of injury") %>% Hmisc::html(size = 80)
We now provide a closer visual examination of the categorical predictors.
a_crash2 %>% dplyr::select(sex, injurytype) %>% dplyr::mutate_all(forcats::as_factor) %>% dplyr::mutate_all(forcats::fct_explicit_na, "NA") %>% tidyr::pivot_longer( dplyr::everything(), names_to = "var", values_to = "value", values_drop_na = FALSE ) %>% dplyr::group_by(var, value) %>% dplyr::summarize(N = n()) %>% dplyr::mutate( freq = N / sum(N), pct = round((freq * 100), 1), axis_lab = paste0(value, ' ', '(N = ', N, ')'), var_label = case_when(var == "sex" ~ "Sex", var == "injurytype" ~ "Injury type") ) %>% ggplot(aes( x = reorder(axis_lab, pct), y = pct, label = pct )) + geom_text(nudge_y = 7) + geom_pointrange(aes(ymin = 0, ymax = pct), alpha = 1, size = 1, color = "grey") + geom_point(color = "firebrick2", alpha = 0.6, size = 3) + ylab("Percentage (%)") + scale_y_continuous(limits = c(0, 100)) + labs(caption = "Number of subjects with a non-missing value reported in brackets.\nNA = missing") + facet_wrap(~ var_label, ncol = 1, scales = "free_y") + coord_flip() + theme_minimal(base_size = 12) + theme(axis.title.y = element_blank(), panel.grid.minor = element_blank())
The Glasgow coma score, an ordinal categorical variable, is also displayed separately.
a_crash2 %>% dplyr::select(gcs) %>% dplyr::mutate(gcs = as_factor(gcs)) %>% tidyr::pivot_longer( dplyr::everything(), names_to = "var", values_to = "value", values_drop_na = FALSE ) %>% dplyr::group_by(var, value) %>% dplyr::summarize(N = n()) %>% dplyr::mutate( freq = N / sum(N), pct = round((freq * 100), 1), value = as_factor(value), axis_lab = as_factor(paste0(value, ' ', '(N = ', N, ')')) ) %>% ggplot(aes( x = axis_lab, y = pct, label = pct )) + geom_text(nudge_y = 7) + geom_pointrange(aes(ymin = 0, ymax = pct), alpha = 1, size = 1, color = "grey") + geom_point(color = "firebrick2", alpha = 0.6, size = 3) + ylab("Percentage (%)") + scale_y_continuous(limits = c(0, 100)) + labs(title = "Glasgow coma score (point scale)", caption = "Number of subjects with a non-missing value reported in brackets.\nNA = missing") + coord_flip() + theme_minimal(base_size = 12) + theme(axis.title.y = element_blank(), panel.grid.minor = element_blank())
A closer visual examination of continuous predictors.
a_crash2 %>% dplyr::select(age, sbp, rr, cc, hr, injurytime) %>% dplyr::mutate() %>% dplyr::mutate_all(as.numeric) %>% tidyr::pivot_longer(dplyr::everything(), names_to = "var", values_to = "value") %>% dplyr::mutate( var_label = case_when( var == "age" ~ "Age (years)", var == "sbp" ~ "Systolic blood pressure (mmHg)", var == "hr" ~ "Heart rate (1/min)", var == "rr" ~ "Respiratory rate (1/min)", var == "cc" ~ "Central capillary refill time (seconds)", var == "injurytime" ~ "Injury time (days)" )) %>% ggplot(aes(value)) + ylab("Number of subjects") + geom_histogram( binwidth = 0.5, center = 0, alpha = 0.6, fill = "firebrick2" ) + facet_wrap(~ var_label, scales = "free", ncol = 2) + theme_minimal() + theme(axis.title.x = element_blank(), panel.grid.minor = element_blank())
There is evidence of digit preference. Explore further with targeted summaries. A more detailed univariate summaries for the variables of interest are also provided below.
## This is a function the plots a strip plot, histogram and boxplot, including five number summary. ida_plot_univar(a_crash2, "age")
Five patients under the age of 17, the inclusion criteria for the study, with one patient aged 1.
ida_plot_univar(a_crash2, "sbp", bin_width = 0.7)
ida_plot_univar(a_crash2, "rr", n_dodge = 2)
ida_plot_univar(a_crash2, "hr")
ida_plot_univar(a_crash2, "cc", bin_width = 0.5)
ida_plot_univar(a_crash2, "injurytime", n_dodge = 3, bin_width = 0.25)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.