inst/doc/ricu.R

## ----setup, include = FALSE-----------------------------------------------
options(
  width = 76,
  kableExtra.latex.load_packages = FALSE,
  crayon.enabled = FALSE
)

library(ricu)
library(data.table)
library(forestmodel)
library(survival)
library(ggplot2)
library(kableExtra)

source(system.file("extdata", "vignettes", "helpers.R", package = "ricu"))

srcs <- c("mimic", "eicu", "aumc", "hirid", "miiv")

## ---- assign-src, echo = FALSE--------------------------------------------
src  <- "mimic_demo"

## ---- assign-demo, echo = FALSE-------------------------------------------
demo <- c(src, "eicu_demo")

## ---- demo-miss, echo = FALSE, eval = !srcs_avail(demo), results = "asis"----
#  demo_missing_msg(demo, "ricu.pdf")
#  knitr::opts_chunk$set(eval = FALSE)

## ----demo-data, eval = FALSE----------------------------------------------
#  install.packages(
#    c("mimic.demo", "eicu.demo"),
#    repos = "https://eth-mds.github.io/physionet-demo"
#  )

## ----mimic-tbls, eval = TRUE----------------------------------------------
mimic

## ----width-inc, include = FALSE---------------------------------------------
old_width <- options(width = 78)[["width"]]

## ----eicu-tbls, eval = TRUE-------------------------------------------------
eicu

## ----width-dec, include = FALSE-------------------------------------------
options(width = old_width)

## ----hirid-tbls, eval = TRUE----------------------------------------------
hirid

## ----aumc-tbls, eval = TRUE-----------------------------------------------
aumc

## ----miiv-tbls, eval = TRUE-----------------------------------------------
miiv

## ----src-overview, echo = FALSE, results = "asis", cache = TRUE-----------
as_quant <- function(x) {

  if (is_id_tbl(x)) {
    x <- data_col(x)
  }

  if (identical(length(x), 0L) || isTRUE(is.na(x))) {
    return("-")
  }

  res <- format_2(quantile(x, probs = seq(0.25, 0.75, 0.25), na.rm = TRUE))

  paste0(res[2L], " (", res[1L], "--", res[3L], ")")
}

big_mark <- function(x) {

  if (identical(length(x), 0L) || isTRUE(is.na(x))) {
    return("-")
  }

  formatC(x, big.mark = ",", format = "d")
}

format_2 <- function(x) {
  formatC(x, digits = 2L, format = "f")
}

n_patient <- function(x, type) {
  if (type %in% names(as_id_cfg(x))) nrow(stay_windows(x, type)) else NA
}

feat_freq <- function(src, concept, time_span = "hours") {
  res <- load_concepts(concept, src, interval = mins(1L), verbose = FALSE)
  res <- res[, 1 / diff(as.double(get(index_var(res)), units = time_span)),
             by = c(id_var(res))]
  res
}

years <- function(src) {
  switch(src,
    mimic = "2001--2012",
    eicu = "2014--2015",
    hirid = "2008--2016",
    aumc = "2003--2016",
    miiv = "2008--2019",
    NA
  )
}

country <- function(src) {
  switch(src,
    mimic = "United States",
    eicu = "United States",
    hirid = "Switzerland",
    aumc = "Netherlands",
    miiv = "United States",
    NA
  )
}

summarize <- function(src, avail) {

  ids <- as_id_cfg(src)
  cnc <- avail[, src]

  nrow(stay_windows(src, "icustay"))

  los_icu <- load_concepts("los_icu", src, verbose = FALSE)

  hosp_len <- if ("hadm" %in% names(ids)) {
    load_concepts("los_hosp", src, id_type = "hadm", verbose = FALSE)
  }

  fil <- list.files(src_data_dir(src), recursive = TRUE, full.names = TRUE)
  siz <- sum(vapply(fil, file.size, numeric(1L))) * 1e-9
  row <- vapply(as_src_env(src), nrow, integer(1L))

  c(`Number of tables` = big_mark(length(as_src_env(src))),
    `Disk storage [GB]` = format_2(siz),
    `Largest table [rows]` = big_mark(max(row)),
    `Available concepts` = sum(cnc),
    `Time span` = years(src),
    `Country of origin` = country(src),
    `ICU` = big_mark(n_patient(src, "icustay")),
    `Hospital` = big_mark(n_patient(src, "hadm")),
    `Unique patients` = big_mark(n_patient(src, "patient")),
    `ICU stays` = as_quant(los_icu),
    `Hospital stays` = as_quant(hosp_len),
    `Heart rate` = as_quant(feat_freq(src, "hr")),
    `Mean arterial pressure` = as_quant(feat_freq(src, "map")),
    `Bilirubin` = as_quant(feat_freq(src, "bili", "days")),
    `Lactate` = as_quant(feat_freq(src, "lact", "days"))
  )
}

if (srcs_avail(demo) && (!srcs_avail(srcs) || quick_build())) {
  srcs <- demo
}

src_names <- c(
  mimic = "MIMIC-III", eicu = "eICU", hirid = "HiRID", aumc = "AmsterdamUMCdb",
  miiv = "MIMIC-IV", mimic_demo = "MIMIC (demo)", eicu_demo = "eICU (demo)"
)[srcs]

src_names[is.na(src_names)] <- srcs[is.na(src_names)]

dict <- load_dictionary(srcs)
avai <- concept_availability(dict, include_rec = FALSE)
summ <- vapply(srcs, summarize, character(15L), avai)

colnames(summ)     <- src_names
rownames(summ)     <- rownames(summ)
rownames(summ)[4L] <- paste0(rownames(summ)[4L],
                             footnote_marker_symbol(1, "latex"))

n_rec_cpt <- nrow(concept_availability(dict, include_rec = TRUE)) -
             nrow(avai)

capt <- paste(
  "Comparison of datasets supported by \\pkg{ricu}, highlighting some of",
  "the major similarities and distinguishing features among the five data",
  "sources described in the preceding sections. Values followed by",
  "parenthesized ranges represent medians and are accompanied by",
  "interquartile ranges."
)

tbl <- kable(summ, format = "latex", escape = FALSE, booktabs = TRUE,
             caption = capt, label = "datasets")
tbl <- pack_rows(tbl, "Data collection", 5, 6)
tbl <- pack_rows(tbl, "Admission counts", 7, 9)
tbl <- pack_rows(tbl, "Stay lengths [day]", 10, 11)
tbl <- pack_rows(tbl, "Vital signs [1/hour]", 12, 13)
tbl <- pack_rows(tbl, "Lab tests [1/day]", 14, 15)
tbl <- footnote(tbl, symbol = paste(
  "These values represent the number of atomic concepts per data source.",
  "Additionally,", n_rec_cpt, "recursive concepts are available, which",
  "build on data source specific atomic concepts in a source agnostic manner",
  "(see Section \\\\ref{concept-specification} for details)."),
  threeparttable = TRUE, escape = FALSE
)

if (identical(srcs, demo)) {
  tbl
} else {
  landscape(tbl)
}

## ---- full-miss, echo = FALSE, eval = identical(srcs, demo), results = "asis"----
#  demo_instead_full_msg(demo, srcs, "ricu.pdf")

## ---- load-conc, eval = srcs_avail(demo)----------------------------------
src  <- "mimic_demo"
demo <- c(src, "eicu_demo")

load_concepts("hr", demo, verbose = FALSE)

## ----id-tbl, eval = TRUE--------------------------------------------------
(dat <- ts_tbl(a = 1:5, b = hours(1:5), c = rnorm(5)))
dat[["b"]] <- dat[["b"]] + mins(30)
dat

## ----win-tbl, eval = TRUE-------------------------------------------------
(dat <- win_tbl(a = 1:5, b = hours(1:5), c = mins(rep(90, 5)),
                d = runif(5)))
expand(dat)

## ---- load-dict, eval = srcs_avail(demo)----------------------------------
dict <- load_dictionary(demo)
head(dict)
explain_dictionary(head(dict))

## ---- dict-cat, eval = srcs_avail(demo)-----------------------------------
table(vapply(dict, `[[`, character(1L), "category"))

## ---- load-phys, eval = srcs_avail(src)-----------------------------------
load_concepts(c("alb", "glu"), src, interval = mins(15L),
              verbose = FALSE)

## ---- load-demog, eval = srcs_avail(src)----------------------------------
load_concepts(c("age", "glu"), src, verbose = FALSE)

## ---- load-treat, eval = srcs_avail(src)----------------------------------
load_concepts(c("abx", "vent_ind", "norepi_rate", "norepi_dur"), src,
              verbose = FALSE)

## ---- load-dex, eval = srcs_avail("mimic_demo")---------------------------
load_concepts("dex", "mimic_demo", verbose = FALSE)

## ---- load-out, eval = srcs_avail(src)------------------------------------
load_concepts(c("sirs", "death"), src, verbose = FALSE,
              keep_components = TRUE)

## ----mimic-adm, eval = srcs_avail("mimic_demo")---------------------------
mimic_demo$admissions

## ----mimic-sub, eval = srcs_avail("mimic_demo")---------------------------
subset(mimic_demo$admissions, subject_id > 44000, language:ethnicity)

## ----mimic-tidy, eval = srcs_avail("mimic_demo")--------------------------
subject_id <- 44000:45000
subset(mimic_demo$admissions, .data$subject_id %in% .env$subject_id,
       subject_id:dischtime)

## ----load-src, eval = srcs_avail("mimic_demo")----------------------------
load_src(mimic_demo$admissions, subject_id > 44000,
         cols = c("hadm_id", "admittime", "dischtime"))

## ----load-dt, eval = FALSE------------------------------------------------
#  load_difftime(mimic_demo$admissions, subject_id > 44000,
#                cols = c("hadm_id", "admittime", "dischtime"))

## ----load-dt-print, eval = srcs_avail("mimic_demo"), echo = FALSE---------
load_difftime(mimic_demo$admissions, subject_id > 44000,
              cols = c("hadm_id", "admittime", "dischtime"))[]

## ----load-id, eval = FALSE------------------------------------------------
#  load_id(mimic_demo$admissions, subject_id > 44000,
#          cols = c("admittime", "dischtime"), id_var = "hadm_id")

## ----load-id-print, eval = srcs_avail("mimic_demo"), echo = FALSE---------
load_id(mimic_demo$admissions, subject_id > 44000,
        cols = c("admittime", "dischtime"), id_var = "hadm_id")[]

## ----id-win, eval = srcs_avail("mimic_demo")------------------------------
id_windows(mimic_demo)

## ----id-orig, eval = srcs_avail("mimic_demo")-----------------------------
id_origin(mimic_demo, "icustay_id")

## ----mimic-cfg, eval = TRUE-----------------------------------------------
cfg <- load_src_cfg("mimic_demo")
str(cfg, max.level = 3L, width = 70L)
mi_cfg <- cfg[["mimic_demo"]]

## ----mimic-ids, eval = TRUE-----------------------------------------------
as_id_cfg(mi_cfg)

## ----mimic-col, eval = TRUE-----------------------------------------------
as_col_cfg(mi_cfg)

## ----mimic-tbl, eval = TRUE-----------------------------------------------
as_tbl_cfg(mi_cfg)

## ---- ext-difftime--------------------------------------------------------
ms_as_min <- function(x) {
  as.difftime(as.integer(x / 6e4), units = "mins")
}

aumc_difftime <- function(x, rows, cols = colnames(x),
                          id_hint = id_vars(x),
                          time_vars = ricu::time_vars(x), ...) {

  if (id_hint %in% colnames(x)) {
    id_sel <- id_hint
  } else {
    id_opt <- id_var_opts(sort(as_id_cfg(x), decreasing = TRUE))
    id_sel <- intersect(id_opt, colnames(x))[1L]
  }

  stopifnot(is.character(id_sel), length(id_sel) == 1L)

  if (!id_sel %in% cols) {
    cols <- c(id_sel, cols)
  }

  time_vars <- intersect(time_vars, cols)

  dat <- load_src(x, {{ rows }}, cols)
  dat <- dat[, c(time_vars) := lapply(.SD, ms_as_min),
             .SDcols = time_vars]

  as_id_tbl(dat, id_vars = id_sel, by_ref = TRUE)
}

## ---- ext-win-------------------------------------------------------------
aumc_windows <- function(x) {

  ids <- c("admissionid", "patientid")
  sta <- c("admittedat", "firstadmittedat")
  end <- c("dischargedat", "dateofdeath")

  tbl <- as_src_tbl(x, "admissions")

  res <- tbl[, c(ids, sta[1L], end)]
  res <- res[, c(sta[2L]) := 0L]
  res <- res[, c(sta, end) := lapply(.SD, ms_as_min),
             .SDcols = c(sta, end)]

  res <- data.table::setcolorder(res, c(ids, sta, end))
  res <- rename_cols(res, c(ids, paste0(ids, "_start"),
                                 paste0(ids, "_end")), by_ref = TRUE)

  as_id_tbl(res, ids[2L], by_ref = TRUE)
}

## ----cox-surv, eval = srcs_avail("mimic_demo")----------------------------
src <- "mimic_demo"

cohort <- load_id("icustays", src, dbsource == "metavision",
                  cols = NULL)
cohort <- load_concepts("age", src, patient_ids = cohort,
                        verbose = FALSE)

dat <- load_concepts(c("lact", "death", "sofa"), src,
                     patient_ids = cohort[age > 20 & age < 90, ],
                     verbose = FALSE)

dat <- dat[,
  head(.SD, n = match(TRUE, death, .N)), by = c(id_vars(dat))
]

dat <- fill_gaps(dat)

dat <- replace_na(dat, c(NA, FALSE), type = c("locf", "const"),
                  by_ref = TRUE, vars = c("lact", "death"),
                  by = id_vars(dat))

cox_mod <- coxph(
  Surv(charttime - 1L, charttime, death) ~ lact + sofa,
  data = dat
)

## ----cox-plot, eval = srcs_avail(src), echo = FALSE, warning = FALSE, message = FALSE, fig.width = 8----
theme_fp <- function(...) {
  theme_bw(...) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        axis.title.y = element_blank(), axis.title.x = element_blank(),
        axis.text.y = element_blank(), axis.ticks.y = element_blank())
}

forest_model(cox_mod, theme = theme_fp(16))

## ----ins24, eval = srcs_avail(src)----------------------------------------
ins_breaks <- c(0, 1, 10, 20, 40, Inf)

ins_cb <- function(ins, ...) {

  day_one <- function(x) x >= hours(0L) & x <= hours(24L)

  idx_var <- index_var(ins)
  ids_var <- id_vars(ins)

  ins <- ins[
    day_one(get(idx_var)), list(ins24 = sum(ins)), by = c(ids_var)
  ]

  ins <- ins[,
    ins24 := list(cut(ins24, breaks = ins_breaks, right = FALSE))
  ]

  ins
}

ins24 <- load_dictionary(src, "ins")
ins24 <- concept("ins24", ins24, "insulin in first 24h",
                 aggregate = "sum", callback = ins_cb,
                 target = "id_tbl", class = "rec_cncpt")

## ----diab, eval = srcs_avail(src)-----------------------------------------
grep_diab <- function(x) {
  grepl("^250\\.?[0-9]{2}$", x)
}

diab  <- item(src, table = "diagnoses_icd",
              callback = transform_fun(grep_diab),
              class = "col_itm")

diab  <- concept("diab", diab, "diabetes", target = "id_tbl",
                 class = "lgl_cncpt")

dat <- load_concepts(c(ins24, diab), id_type = "icustay",
                     verbose = FALSE)
dat <- replace_na(dat, "[0,1)", vars = "ins24")

dat

## ----diabetes-visualize, echo = FALSE, eval = srcs_avail(src), fig.height = 3----
dat <- dat[, weight := 1 / .N, by = diab]
ggplot(dat, aes(x = ins24, fill = diab)) +
  stat_count(aes(weight = weight), alpha = 0.75, position = "dodge") +
  labs(x = "Amount of administered insulin in first 24h of ICU stay [units]",
       y = "Proportion of patients",
       fill = "Diabetic") +
  theme_bw(10)

## ----session-info, include = FALSE----------------------------------------
sessionInfo()

Try the ricu package in your browser

Any scripts or data that you put into this service are public.

ricu documentation built on Sept. 8, 2023, 5:45 p.m.