download_pysionet_schema <- function(url) {
dat <- ricu:::download_pysionet_file(
url, dest = NULL, user = NULL, pass = NULL
)
message("downloading schema at ", url)
xml2::as_list(xml2::read_xml(rawToChar(dat)))
}
get_table_info <- function(url) {
res <- download_pysionet_schema(url)
res <- res[["database"]][["tables"]]
res <- lapply(res, function(x) {
cols <- names(x) == "column"
names <- vapply(x[cols], attr, character(1L), "name")
types <- vapply(x[cols], attr, character(1L), "type")
list(
table_name = tolower(attr(x, "name")),
num_rows = as.integer(attr(x, "numRows")),
cols = as_col_spec(names, types)
)
})
unname(res)
}
col_spec_map <- function(type) {
switch(type,
bool = list(spec = "col_logical"),
int2 = ,
int4 = ,
int8 = list(spec = "col_integer"),
numeric = ,
float8 = list(spec = "col_double"),
bpchar = ,
text = ,
varchar = list(spec = "col_character"),
timestamp = list(spec = "col_datetime", format = "%Y-%m-%d %H:%M:%S"),
stop("unknown type")
)
}
as_col_spec <- function(names, types) {
Map(c, Map(list, name = tolower(names), col = names),
lapply(types, col_spec_map))
}
as_tbl_spec <- function(files, defaults, time_vars, tbl_info, partitioning) {
mod_col <- function(x) {
x[["name"]] <- x[["col"]]
x[["col"]] <- NULL
x
}
do_as <- function(file, default, time, tbl, part) {
all_cols <- vapply(tbl[["cols"]], `[[`, character(1L), "name")
stopifnot(all(vapply(default, `%in%`, logical(1L), all_cols)))
if (length(time)) {
default <- c(default, list(time_vars = unname(time)))
}
tbl <- c(list(files = file, defaults = default), tbl)
tbl[["table_name"]] <- NULL
tbl[["cols"]] <- setNames(lapply(tbl[["cols"]], mod_col), all_cols)
if (!is.null(part)) {
stopifnot(isTRUE(names(part) %in% all_cols))
tbl <- c(tbl, list(partitioning = list(col = names(part),
breaks = part[[1L]])))
}
tbl
}
tbls <- vapply(tbl_info, `[[`, character(1L), "table_name")
res <- Map(do_as, files[tbls], defaults[tbls], time_vars[tbls], tbl_info,
partitioning[tbls])
names(res) <- tolower(tbls)
res
}
as_minimal_tbl_spec <- function(x) {
x[setdiff(names(x), c("files", "num_rows", "cols"))]
}
eicu_tbl_cfg <- function(info, is_demo = FALSE) {
files <- c("admissionDrug.csv.gz",
"admissionDx.csv.gz",
"allergy.csv.gz",
"apacheApsVar.csv.gz",
"apachePatientResult.csv.gz",
"apachePredVar.csv.gz",
"carePlanCareProvider.csv.gz",
"carePlanEOL.csv.gz",
"carePlanGeneral.csv.gz",
"carePlanGoal.csv.gz",
"carePlanInfectiousDisease.csv.gz",
"customLab.csv.gz",
"diagnosis.csv.gz",
"hospital.csv.gz",
"infusionDrug.csv.gz",
"intakeOutput.csv.gz",
"lab.csv.gz",
"medication.csv.gz",
"microLab.csv.gz",
"note.csv.gz",
"nurseAssessment.csv.gz",
"nurseCare.csv.gz",
"nurseCharting.csv.gz",
"pastHistory.csv.gz",
"patient.csv.gz",
"physicalExam.csv.gz",
"respiratoryCare.csv.gz",
"respiratoryCharting.csv.gz",
"treatment.csv.gz",
"vitalAperiodic.csv.gz",
"vitalPeriodic.csv.gz")
names(files) <- sub("\\.csv\\.gz", "", tolower(files))
if (is_demo) {
files <- sub("Drug\\.csv\\.gz", "drug.csv.gz", files)
}
defaults <- list(
admissiondrug = list(
index_var = "drugoffset",
val_var = "drugdosage",
unit_var = "drugunit"
),
admissiondx = list(
index_var = "admitdxenteredoffset",
val_var = "admitdxtext"
),
allergy = list(
index_var = "allergyoffset",
val_var = "allergyname"
),
apacheapsvar = list(),
apachepatientresult = list(
val_var = "apachescore"
),
apachepredvar = list(),
careplancareprovider = list(
index_var = "careprovidersaveoffset",
val_var = "specialty"
),
careplaneol = list(
index_var = "cpleoldiscussionoffset"
),
careplangeneral = list(
index_var = "cplitemoffset",
val_var = "cplitemvalue"
),
careplangoal = list(
index_var = "cplgoaloffset",
val_var = "cplgoalvalue"
),
careplaninfectiousdisease = list(
index_var = "cplinfectdiseaseoffset",
val_var = "infectdiseasesite"
),
customlab = list(
index_var = "labotheroffset",
val_var = "labotherresult"
),
diagnosis = list(
index_var = "diagnosisoffset",
val_var = "icd9code"
),
hospital = list(
id_var = "hospitalid",
val_var = "numbedscategory"
),
infusiondrug = list(
index_var = "infusionoffset",
val_var = "drugrate"
),
intakeoutput = list(
index_var = "intakeoutputoffset",
val_var = "cellvaluenumeric"
),
lab = list(
index_var = "labresultoffset",
val_var = "labresult",
unit_var = "labmeasurenameinterface"
),
medication = list(
index_var = "drugstartoffset",
val_var = "dosage"
),
microlab = list(
index_var = "culturetakenoffset",
val_var = "organism"
),
note = list(
index_var = "noteoffset",
val_var = "notetext"
),
nurseassessment = list(
index_var = "nurseassessoffset",
val_var = "cellattributevalue"
),
nursecare = list(
index_var = "nursecareoffset",
val_var = "cellattributevalue"
),
nursecharting = list(
index_var = "nursingchartoffset",
val_var = "nursingchartvalue"
),
pasthistory = list(
index_var = "pasthistoryoffset",
val_var = "pasthistoryvalue"
),
patient = list(
val_var = "unitdischargestatus"
),
physicalexam = list(
index_var = "physicalexamoffset",
val_var = "physicalexamvalue"
),
respiratorycare = list(
index_var = "respcarestatusoffset"
),
respiratorycharting = list(
index_var = "respchartoffset",
val_var = "respchartvalue"
),
treatment = list(
index_var = "treatmentoffset",
val_var = "treatmentstring"
),
vitalaperiodic = list(
index_var = "observationoffset"
),
vitalperiodic = list(
index_var = "observationoffset"
)
)
part <- list(
nursecharting = list(
patientunitstayid = `if`(is_demo,
1775421L,
c(514528L, 1037072L, 1453997L, 1775421L, 2499831L, 2937948L, 3213286L)
)
),
vitalperiodic = list(
patientunitstayid = `if`(is_demo,
1775421L,
c(514528L, 1037072L, 1453997L, 1775421L, 2499831L, 2937948L, 3213286L)
)
)
)
if (is_demo) {
info <- lapply(info, `[[<-`, "num_rows", NULL)
} else {
tbl <- vapply(info, `[[`, character(1L), "table_name") == "respiratorycare"
new <- info[[which(tbl)]][["cols"]]
col <- vapply(new, `[[`, character(1L), "col") == "apneaparams"
new[[which(col)]][["col"]] <- "apneaparms"
info[[which(tbl)]][["cols"]] <- new
}
time_vars <- lapply(info, function(x) {
nme <- vapply(x[["cols"]], `[[`, character(1L), "name")
typ <- vapply(x[["cols"]], `[[`, character(1L), "spec")
nme[typ == "col_integer" & grepl("offset$", nme)]
})
names(time_vars) <- vapply(info, `[[`, character(1L), "table_name")
as_tbl_spec(files, defaults, time_vars, info, part)
}
mimic_tbl_cfg <- function(info, is_demo = FALSE) {
files <- c("ADMISSIONS.csv.gz",
"CALLOUT.csv.gz",
"CAREGIVERS.csv.gz",
"CHARTEVENTS.csv.gz",
"CPTEVENTS.csv.gz",
"DATETIMEEVENTS.csv.gz",
"DIAGNOSES_ICD.csv.gz",
"DRGCODES.csv.gz",
"D_CPT.csv.gz",
"D_ICD_DIAGNOSES.csv.gz",
"D_ICD_PROCEDURES.csv.gz",
"D_ITEMS.csv.gz",
"D_LABITEMS.csv.gz",
"ICUSTAYS.csv.gz",
"INPUTEVENTS_CV.csv.gz",
"INPUTEVENTS_MV.csv.gz",
"LABEVENTS.csv.gz",
"MICROBIOLOGYEVENTS.csv.gz",
"NOTEEVENTS.csv.gz",
"OUTPUTEVENTS.csv.gz",
"PATIENTS.csv.gz",
"PRESCRIPTIONS.csv.gz",
"PROCEDUREEVENTS_MV.csv.gz",
"PROCEDURES_ICD.csv.gz",
"SERVICES.csv.gz",
"TRANSFERS.csv.gz")
names(files) <- sub("\\.csv\\.gz", "", tolower(files))
if (is_demo) {
files <- sub("\\.gz$", "", files)
}
defaults <- list(
admissions = list(
val_var = "admission_type"
),
callout = list(
index_var = "outcometime",
val_var = "callout_outcome"
),
caregivers = list(
id_var = "cgid",
val_var = "label"
),
chartevents = list(
index_var = "charttime",
val_var = "valuenum",
unit_var = "valueuom"
),
cptevents = list(
index_var = "chartdate",
val_var = "cpt_cd"
),
d_cpt = list(
id_var = "subsectionrange",
val_var = "subsectionheader"
),
d_icd_diagnoses = list(
id_var = "icd9_code",
val_var = "short_title"
),
d_icd_procedures = list(
id_var = "icd9_code",
val_var = "short_title"
),
d_items = list(
id_var = "itemid",
val_var = "label"
),
d_labitems = list(
id_var = "itemid",
val_var = "label"
),
datetimeevents = list(
index_var = "charttime",
val_var = "itemid"
),
diagnoses_icd = list(
val_var = "icd9_code"
),
drgcodes = list(
val_var = "drg_code"
),
icustays = list(
index_var = "intime",
val_var = "last_careunit"
),
inputevents_cv = list(
index_var = "charttime",
val_var = "rate",
unit_var = "rateuom"
),
inputevents_mv = list(
index_var = "starttime",
val_var = "rate",
unit_var = "rateuom"
),
labevents = list(
index_var = "charttime",
val_var = "valuenum",
unit_var = "valueuom"
),
microbiologyevents = list(
index_var = "chartdate",
val_var = "isolate_num"
),
noteevents = list(
index_var = "chartdate",
val_var = "text"
),
outputevents = list(
index_var = "charttime",
val_var = "value",
unit_var = "valueuom"
),
patients = list(
val_var = "expire_flag"
),
prescriptions = list(
index_var = "startdate",
val_var = "dose_val_rx",
unit_var = "dose_unit_rx"
),
procedureevents_mv = list(
index_var = "starttime",
val_var = "value",
unit_var = "valueuom"
),
procedures_icd = list(
val_var = "icd9_code"
),
services = list(
index_var = "transfertime",
val_var = "curr_service"
),
transfers = list(
index_var = "intime",
val_var = "curr_careunit"
)
)
part <- list(
chartevents = list(
itemid = `if`(is_demo,
100000L,
c( 127L, 210L, 425L, 549L, 643L, 741L, 1483L,
3458L, 3695L, 8440L, 8553L, 220274L, 223921L, 224085L,
224859L, 227629L
)
)
)
)
info <- info[
!grepl("^chartevents_", vapply(info, `[[`, character(1L), "table_name"))
]
if (is_demo) {
info <- lapply(info, `[[<-`, "num_rows", NULL)
info <- info[
vapply(info, `[[`, character(1L), "table_name") != "noteevents"
]
} else {
info <- lapply(info, function(x) {
x[["cols"]] <- Map(`[[<-`, x[["cols"]], "col",
toupper(vapply(x[["cols"]], `[[`, character(1L), "col"))
)
x
})
}
time_vars <- lapply(info, function(x) {
nme <- vapply(x[["cols"]], `[[`, character(1L), "name")
typ <- vapply(x[["cols"]], `[[`, character(1L), "spec")
nme[typ == "col_datetime"]
})
names(time_vars) <- vapply(info, `[[`, character(1L), "table_name")
as_tbl_spec(files, defaults, time_vars, info, part)
}
hirid_tbl_cfg <- function() {
info <- list(
general = list(
patientid = list(spec = "col_integer"),
admissiontime = list(spec = "col_datetime",
format = "%Y-%m-%d %H:%M:%S"),
sex = list(spec = "col_character"),
age = list(spec = "col_integer")
),
observations = list(
patientid = list(spec = "col_integer"),
datetime = list(spec = "col_datetime", format = "%Y-%m-%d %H:%M:%S"),
entertime = list(spec = "col_datetime", format = "%Y-%m-%d %H:%M:%S"),
status = list(spec = "col_integer"),
stringvalue = list(spec = "col_character"),
type = list(spec = "col_character"),
value = list(spec = "col_double"),
variableid = list(spec = "col_integer")
),
ordinal = list(
variableid = list(spec = "col_integer"),
code = list(spec = "col_integer"),
stringvalue = list(spec = "col_character")
),
pharma = list(
patientid = list(spec = "col_integer"),
pharmaid = list(spec = "col_integer"),
givenat = list(spec = "col_datetime", format = "%Y-%m-%d %H:%M:%S"),
enteredentryat = list(spec = "col_datetime",
format = "%Y-%m-%d %H:%M:%S"),
givendose = list(spec = "col_double"),
cumulativedose = list(spec = "col_double"),
fluidamount_calc = list(spec = "col_double"),
cumulfluidamount_calc = list(spec = "col_double"),
doseunit = list(spec = "col_character"),
route = list(spec = "col_character"),
infusionid = list(spec = "col_integer"),
typeid = list(spec = "col_integer"),
subtypeid = list(spec = "col_double"),
recordstatus = list(spec = "col_integer")
),
variables = list(
`Source Table` = list(spec = "col_character"),
ID = list(spec = "col_integer"),
`Variable Name` = list(spec = "col_character"),
Unit = list(spec = "col_character"),
`Additional information` = list(spec = "col_character")
)
)
files <- list(
general = "general_table.csv",
observations = file.path(
"observation_tables", "csv", paste0("part-", 0L:249L, ".csv")
),
ordinal = "ordinal_vars_ref.csv",
pharma= file.path(
"pharma_records", "csv", paste0("part-", 0L:249L, ".csv")
),
variables = "hirid_variable_reference.csv"
)
defaults <- list(
general = list(
index_var = "admissiontime"
),
observations = list(
index_var = "datetime",
val_var = "value"
),
ordinal = list(
id_var = "variableid"
),
pharma = list(
index_var = "givenat",
val_var = "givendose",
unit_var = "doseunit"
),
variables = list(
id_var = "id"
)
)
n_row <- list(
general = 33905L,
variables = 712L,
ordinal = 72L,
pharma = 16270399L,
observations = 776921131L
)
zip_files <- list(
general = "reference_data.tar.gz",
variables = "reference_data.tar.gz",
ordinal = "reference_data.tar.gz",
pharma = "raw_stage/pharma_records_csv.tar.gz",
observations = "raw_stage/observation_tables_csv.tar.gz"
)
part <- list(
observations = list(variableid = c(
110L, 120L, 200L, 210L, 211L, 300L, 620L,
2010L, 2610L, 3110L, 4000L, 5685L, 15001565L, 30005075L)
),
pharma = list(pharmaid = 431L)
)
info <- lapply(info, function(x) {
Map(c, Map(list, name = sub(" ", "_", tolower(names(x))),
col = names(x)), x)
})
info <- Map(list, table_name = names(info), cols = info,
num_rows = n_row[names(info)],
zip_file = zip_files[names(info)])
time_vars <- lapply(info, function(x) {
nme <- vapply(x[["cols"]], `[[`, character(1L), "name")
typ <- vapply(x[["cols"]], `[[`, character(1L), "spec")
nme[typ == "col_datetime"]
})
names(time_vars) <- vapply(info, `[[`, character(1L), "table_name")
as_tbl_spec(files, defaults, time_vars, info, part)
}
pkg_dir <- rprojroot::find_root(rprojroot::is_r_package)
cfg_dir <- file.path(pkg_dir, "inst", "extdata", "config")
eicu_id_cfg <- list(
hadm = list(id = "patienthealthsystemstayid", position = 1L,
start = "hospitaladmitoffset",
end = "hospitaldischargeoffset", table = "patient"),
icustay = list(id = "patientunitstayid", position = 2L,
start = "unitadmitoffset", end = "unitdischargeoffset",
table = "patient")
)
mimic_id_cfg <- list(
patient = list(id = "subject_id", position = 1L, start = "dob",
end = "dod", table = "patients"),
hadm = list(id = "hadm_id", position = 2L, start = "admittime",
end = "dischtime", table = "admissions"),
icustay = list(id = "icustay_id", position = 3L, start = "intime",
end = "outtime", table = "icustays")
)
eicu <- get_table_info(
"https://mit-lcp.github.io/eicu-schema-spy/eicu.eicu_crd.xml"
)
mimic <- get_table_info(
"https://mit-lcp.github.io/mimic-schema-spy/mimic.mimiciii.xml"
)
eicu_demo_tbls <- eicu_tbl_cfg(eicu, is_demo = TRUE)
mimic_demo_tbls <- mimic_tbl_cfg(mimic, is_demo = TRUE)
cfg <- list(
list(
name = "eicu",
url = "https://physionet.org/files/eicu-crd/2.0",
id_cfg = eicu_id_cfg,
tables = eicu_tbl_cfg(eicu, is_demo = FALSE)
),
list(
name = "eicu_demo",
class_prefix = c("eicu_demo", "eicu"),
url = "https://physionet.org/files/eicu-crd-demo/2.0",
id_cfg = eicu_id_cfg,
tables = eicu_demo_tbls
),
list(
name = "mimic",
url = "https://physionet.org/files/mimiciii/1.4",
id_cfg = mimic_id_cfg,
tables = mimic_tbl_cfg(mimic, is_demo = FALSE)
),
list(
name = "mimic_demo",
class_prefix = c("mimic_demo", "mimic"),
url = "https://physionet.org/files/mimiciii-demo/1.4",
id_cfg = mimic_id_cfg,
tables = mimic_demo_tbls
),
list(
name = "hirid",
url = "https://physionet.org/files/hirid/1.0",
unzip = c(
"reference_data.tar.gz",
"raw_stage/observation_tables_csv.tar.gz",
"raw_stage/pharma_records_csv.tar.gz"
),
id_cfg = list(
icustay = list(id = "patientid", position = 1L, start = "admissiontime",
table = "general")
),
tables = hirid_tbl_cfg()
)
)
ricu::set_config(cfg, "data-sources", cfg_dir)
cfg <- list(
list(
name = "eicu_demo",
class_prefix = c("eicu_demo", "eicu"),
id_cfg = eicu_id_cfg,
tables = lapply(eicu_demo_tbls, as_minimal_tbl_spec)
),
list(
name = "mimic_demo",
class_prefix = c("mimic_demo", "mimic"),
id_cfg = mimic_id_cfg,
tables = lapply(mimic_demo_tbls, as_minimal_tbl_spec)
)
)
ricu::set_config(cfg, "demo-sources", cfg_dir)
devtools::install(pkg_dir)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.