# """
# Script defining functions used in cleaning, summarizing,
# and plotting clinical center labs exported from BTRIS
# """
library(tidyverse)
library(lubridate)
library(janitor)
library(here)
loaded_packages <- function(){
for (i in .packages()) {
cat(paste0(i, ": ", packageVersion(i)), sep = "\n")
}
}
#### QUICKPLOT---------------------------------------------------------------------------------------
# Quick plot of non-transformed labs
quickplot_numeric_btris_lab <- function(data, lab_id, group = mrn){
group <- enquo(group)
y_units <- data[which(data$cluster_id == lab_id),'unit_of_measure'] %>% sample_n(1) %>% c()
y_name <- data[which(data$cluster_id == lab_id),'cluster_name'] %>% sample_n(1) %>% c()
y_name <- str_extract(y_name, "[^(]+")
title <- paste0(y_name, "vs Age")
data %>% filter(data$cluster_id == lab_id) %>% mutate(observation_value = as.numeric(observation_value)) %>%
ggplot(aes(age_collected, observation_value)) +
geom_smooth(method = "lm", se = FALSE,
color = "grey50", alpha = 0.5, linetype = 'dashed') +
geom_jitter(aes(color = !!group), alpha = 0.75) +
geom_smooth(aes(color = !!group), method = "lm", se = FALSE) +
labs(
x = "Age (Years)",
y = paste0(y_name, " (", y_units, ")"),
title = title) +
theme(axis.ticks.y.left = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = 'bottom',
legend.title = element_blank()) +
#scale_color_brewer(palette = "Dark2")
viridis::scale_color_viridis(discrete = TRUE)
}
#### ELAPSED TIME-----------------------------------------------------------------------------------------------
# Calculating ages / time between dates
elapsed_years <- function(end_date, start_date) {
lubridate::time_length(difftime(as.POSIXlt(end_date), as.POSIXlt(start_date)), "years")
}
elapsed_days <- function(end_date, start_date) {
lubridate::time_length(difftime(as.POSIXlt(end_date), as.POSIXlt(start_date)), "days")
}
#### CLEANING & EXTRACTING--------------------------------------------------------
# CLEANING
clean_btris_lab_df <- function(file_path_csv) {
df <- readr::read_csv(file_path_csv)
df <- janitor::clean_names(df)
df %>% rename(date_collected = collected_date_time) %>%
mutate(date_collected = date(mdy_hm(date_collected)),
mrn = as_factor(mrn))
}
##### If BTRIS query did not include demogs, it's possible to merge them afterwards.
merge_btris_demogs <- function(clean_lab_df, demogs_file_path_csv) {
df_lab <- clean_lab_df
df_demogs <- readr::read_csv(demogs_file_path_csv)
df_demogs <- janitor::clean_names(df_demogs)
df_lab$mrn <- as.character(df_lab$mrn)
df_demogs$mrn <- as.character(df_demogs$mrn)
demogs_columns <- c("mrn", "date_of_birth", "gender", "first_name", "last_name")
df_merged <- dplyr::left_join(df_lab, df_demogs[, demogs_columns], by = "mrn")
# Change class of merged columns:
df_merged <- df_merged %>%
mutate_at(vars(contains("name|mrn|gender|cluster")), as_factor) %>%
mutate(date_of_birth = mdy(date_of_birth))
df_merged
}
# EXTRACTING
list_numeric_labs <- function(){
c(
aldolase = "E994",
basophil_abs = "E7",
basophil_percent = "E6",
calcium = "E11",
c3_complement = "E2299",
c4_complement = "E2300",
cd3_percent = "E416",
cd3_count = "E415",
cd8_cd3_percent = "E714",
cd8_cd3_count = "E713",
cd4_cd3_percent = "E630",
cd4_cd3_count = "E629",
cd19_percent = "E336",
cd19_count = "E335",
eosinophils_abs = "E19",
eosinophils_percent = "E18",
fibrinogen = "E2620",
haptoglobin = "E2628",
ferritin = "E124",
hematocrit = "E21",
hemoglobin = "E25",
iga = "E227",
igd = "E247",
ige = "E249",
igg = "E251",
igm = "E256",
immature_granulocytes_percent = "E31",
immature_granulocytes_abs = "E32",
iron = "E93",
iron_sat_persent = "E92",
ldh = "E33",
lymphocytes_abs = "E35",
lymphocytes_percent = "E34",
mch = "E37",
mchc = "E38",
mcv = "E39",
mpv = "E42",
monocytes_abs = "E41",
monocytes_percent = "E40",
neutrophils_abs = "E44",
neutrophils_percent = "E43",
nk_percent = "E824",
nk_count = "E823",
platelets = "E48",
rbc = "E51",
transferrin = "E124",
crp = "E74",
wbc = "E55"
)
}
table_numeric_labs <- function(){
df <- list_numeric_labs() %>% enframe(value = "cluster_id")
df %>% arrange(name)
}
n_missing_units <- function(df_labs, percent_cutoff = 0){
# Summary of observations missing units, percent_cutoff filters for proportion missing.
df_labs %>%
group_by(cluster_name) %>%
summarise(n_missing_units = sum(is.na(unit_of_measure)),
total_observations = n(),
perc_missing_units = round((n_missing_units / n() * 100), 1)) %>%
filter(perc_missing_units >= percent_cutoff)
}
unique_units <- function(df_labs, n_unique_cutoff = 1){
df_labs %>%
filter(!is.na(unit_of_measure)) %>%
group_by(cluster_name) %>%
summarise(units = str_c(unique(unit_of_measure), collapse = "; "),
n_unique = length(unique(unit_of_measure))) %>%
filter(n_unique >= n_unique_cutoff) %>%
arrange(cluster_name)
}
clean_numeric_values <- function(df_labs) {
df_labs %>%
mutate(observation_value =
as.numeric(str_extract(df_labs$observation_value, "\\d+\\.*\\d*"))) %>%
mutate(observation_value =
case_when(
cluster_id == "E74" & unit_of_measure == "mg/dL" ~
observation_value * 10,
TRUE ~ observation_value
),
unit_of_measure =
case_when(
cluster_id == "E994" ~ "U/L",
cluster_id == "E38" ~ "g/dL",
unit_of_measure %in% c("%", "PERCENT") ~ "%",
unit_of_measure %in% c("K/uL", "THOU/MM3","K/UL",
"K/mcL", "/MM3", "/uL", "UL", "K/uL", "/mcL") ~ "K/uL",
unit_of_measure %in% c("g/dL", "G/DL", "G/100ML") ~ "g/dL",
unit_of_measure %in% c("pg", "UUG") ~ "pg",
unit_of_measure %in% c("M/uL", "MILL/MM3", "M/mcL") ~ "M/uL",
unit_of_measure %in% c("fL", "CU MCRON") ~ "fL",
unit_of_measure %in% c("mg/dL", "mg/L") & cluster_id == "E74" ~ "mg/L",
unit_of_measure %in% c("mg/dL", "MG/DL") & cluster_id != "E74" ~ "mg/dL",
TRUE ~ unit_of_measure
))
}
### PLOTS & TABLES---------------------------------------------------------------------------------
#plot_lab <- function(lab_dataframe, lab_id, )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.