#' Import BIOCARD Data
#'
#' Import BIOCARD data from source files and generate the analysis dataset.
#'
#' @inheritParams parameters
#'
#' @return
#'
#' Returned the analysis dataset with: patients' ids, baseline times,
#' corresponding biomarkers, biomarker test times, etc.
#'
#' @examples
#' \dontrun{
#' ## with default unoverlaped window
#' dt_biocard <- get_biocard(path)
#'
#' ## with costomized window
#' dt_biocard <- get_biocard(path,
#' window_setting = window_Set)
#'
#' ## with dictionary provided by user
#' dt_biocard <- get_biocard(path,
#' src_tables = "dict_src_tables.xlsx")
#' }
#'
#' @export
#'
#'
adt_get_biocard <- function(path = ".",
reference_time = NULL,
window_setting = NULL,
pattern = "*.xls",
src_files = NULL,
src_tables = NULL,
...,
verbose = TRUE) {
## --------- internal functions -------------------------------------
## window setting
if (is.null(window_setting)) {
window_setting = list(cog = c(730, FALSE),
diag = c(730, FALSE),
csf = c(730, FALSE),
hippo = c(730, FALSE),
amy = c(730, FALSE),
ec = c(730, FALSE))
}
## convert date
f_date <- function(code, date_name, dta) {
mvar <- a_map_var("BIOCARD", code, date_name, dict_src_tables)
dfmt <- dict_src_files %>%
filter(adt_table_code == code)
stopifnot(1 == nrow(dfmt))
dta %>%
mutate(!!date_name :=
as.Date(!!as.name(mvar),
dfmt[["src_date_format"]])) %>%
select(- all_of(mvar))
}
## map var
f_map <- function(code, var, dta, fc = NULL) {
mvar <- a_map_var("BIOCARD", code, var, dict_src_tables)
dta <- dta %>%
rename(!!var := mvar)
if (!is.null(fc))
dta[var] <- lapply(dta[var], fc)
dta
}
## --------- prepare files -------------------------------------
## list all file names matching the pattern
file_names <- list.files(path = path,
pattern = pattern,
full.names = TRUE)
## dictionary of src files
dict_src_files <- adt_get_dict("src_files", csv_fname = src_files)
dict_src_tables <- adt_get_dict("src_tables", csv_fname = src_tables)
dict_data <- adt_get_dict("ana_data")
## --------- read tables -------------------------------------
vec_tbls <- c("COG", "DIAG", "CSF", "DEMO",
"HIPPO", "AMY", "EC", "GE")
chk_all <- NULL
for (i in vec_tbls) {
cur_dat <- a_read_file(i, file_names, dict_src_files, verbose)
cur_chk <- a_check_src(i, cur_dat, dict_src_tables)
assign(paste("dat_", tolower(i), sep = ""),
cur_dat)
chk_all <- rbind(chk_all, cur_chk)
}
if (dim(chk_all)[1] > 0) {
err_msg <- a_err_msg("biocard_load_error")
message(err_msg)
print(chk_all)
}
dat_lsta <- a_read_file("LIST_A", file_names, dict_src_files, verbose)
dat_lstb <- a_read_file("LIST_B", file_names, dict_src_files, verbose)
## ---------- manipulation ----------------------------------
a_print("Formatting data ...", verbose)
dat_cog <- f_date("COG", "date_cog", dat_cog)
dat_cog <- f_map("COG", "subject_id", dat_cog)
dat_diag <- f_date("DIAG", "date_diag", dat_diag)
dat_diag <- f_map("DIAG", "subject_id", dat_diag)
dat_csf <- f_date("CSF", "date_csf", dat_csf)
dat_csf <- f_map("CSF", "abeta", dat_csf)
dat_csf <- f_map("CSF", "subject_id", dat_csf)
dat_demo <- f_map("DEMO", "subject_id", dat_demo)
dat_demo <- dat_demo %>%
select(-c("jhuanonid", "lettercode", "nihid"))
dat_hippo <- f_date("HIPPO", "date_hippo", dat_hippo)
dat_hippo <- f_map("HIPPO", "subject_id", dat_hippo)
dat_hippo <- f_map("HIPPO", "intracranial_vol_hippo",
dat_hippo, as.numeric)
dat_hippo <- f_map("HIPPO", "l_hippo", dat_hippo, as.numeric)
dat_hippo <- f_map("HIPPO", "r_hippo", dat_hippo, as.numeric)
## MRI amygdala
dat_amy <- f_date("AMY", "date_amy", dat_amy)
dat_amy <- f_map("AMY", "subject_id", dat_amy)
dat_amy <- f_map("AMY", "intracranial_vol_amy",
dat_amy, as.numeric)
dat_amy <- f_map("AMY", "l_amy", dat_amy, as.numeric)
dat_amy <- f_map("AMY", "r_amy", dat_amy, as.numeric)
## MRI EC volume
dat_ec <- f_date("EC", "date_ec", dat_ec)
dat_ec <- f_map("EC", "subject_id", dat_ec)
dat_ec <- f_map("EC", "intracranial_vol_ec", dat_ec, as.numeric)
dat_ec <- f_map("EC", "l_ec_vol", dat_ec, as.numeric)
dat_ec <- f_map("EC", "r_ec_vol", dat_ec, as.numeric)
dat_ec <- f_map("EC", "l_ec_thick", dat_ec, as.numeric)
dat_ec <- f_map("EC", "r_ec_thick", dat_ec, as.numeric)
## process data
dat_hippo$bi_hippo <- (dat_hippo$l_hippo + dat_hippo$r_hippo) / 2
dat_amy$bi_amy <- (dat_amy$l_amy + dat_amy$r_amy) / 2
dat_ec$bi_ec_vol <- (dat_ec$l_ec_vol + dat_ec$r_ec_vol) / 2
dat_ec$bi_ec_thick <- (dat_ec$l_ec_thick + dat_ec$r_ec_thick) / 2
## race
dat_ge <- dat_ge %>%
select(-c("jhuanonid", "lettercode", "nihid"))
dat_ge <- f_map("GE", "subject_id", dat_ge)
## exclude subjects from list A and list B
id_name <- a_map_var("BIOCARD", "LIST_A", "subject_id", dict_src_tables)
exid <- c(dat_lsta[[id_name]],
dat_lstb[[id_name]])
## ------------- merge all data -------------------------------------
# use vec_tbl
a_print("Combining all data ...", verbose)
dat_all <- dat_cog %>%
select(subject_id, date = date_cog) %>%
rbind(dat_diag %>% select(subject_id, date = date_diag )) %>%
rbind(dat_amy %>% select(subject_id, date = date_amy )) %>%
rbind(dat_csf %>% select(subject_id, date = date_csf )) %>%
rbind(dat_ec %>% select(subject_id, date = date_ec )) %>%
rbind(dat_hippo %>% select(subject_id, date = date_hippo)) %>%
distinct() %>%
arrange(subject_id, date)
s_cog <- a_add_suffix(dat_cog, "_cog")
s_diag <- a_add_suffix(dat_diag, "_diag")
s_amy <- a_add_suffix(dat_amy, "_amy")
s_csf <- a_add_suffix(dat_csf, "_csf")
s_ec <- a_add_suffix(dat_ec, "_ec")
s_hippo <- a_add_suffix(dat_hippo, "_hippo")
variable_list <- list(cog = names(s_cog),
diag = names(s_diag),
amy = names(s_amy),
csf = names(s_csf) ,
ec = names(s_ec) ,
hippo = names(s_hippo))
dat_info <- dat_all %>%
left_join(s_cog , by = c("subject_id", "date")) %>%
left_join(s_diag , by = c("subject_id", "date")) %>%
left_join(s_amy , by = c("subject_id", "date")) %>%
left_join(s_csf , by = c("subject_id", "date")) %>%
left_join(s_ec , by = c("subject_id", "date")) %>%
left_join(s_hippo , by = c("subject_id", "date")) %>%
left_join(dat_demo, by = c("subject_id")) %>%
left_join(dat_ge , by = c("subject_id")) %>%
select(!(ends_with(".x") | ends_with(".y")))
if (is.null(reference_time)) {
reference_time <- s_cog %>% select(subject_id, date)
}
if (reference_time == "COG") {
reference_time <- s_cog %>% select(subject_id, date)
}
if (reference_time == "DIAG") {
reference_time <- s_diag %>% select(subject_id, date)
}
## ------------- combine data --------------------------------------
a_print("Merging analysis dataset...", verbose)
dat_se <- reference_time %>%
rename(impute_date = date)
for (bmk in tolower(setdiff(vec_tbls, c("DEMO", "GE")))) {
a_print(sprintf("Extracting biomarker from %s", bmk), verbose)
dat <- dat_info %>%
select(variable_list[[bmk]]) %>%
filter(!!as.name(paste0("indicator_", bmk)) == 1)
window <- window_setting[[bmk]][1]
window_overlap <- window_setting[[bmk]][2] == 1
dat_bmk <- a_select_bmk(reference_time, dat, window, window_overlap) %>%
rename("{paste0('date_', bmk)}" := date)
dat_se <- dat_se %>%
left_join(dat_bmk, by = c("subject_id", "impute_date"))
}
a_print("Merging demo ...", verbose)
a_print("Merging ge ...", verbose)
dat_se <- dat_se %>%
left_join(dat_demo, by = c("subject_id")) %>%
left_join(dat_ge, by = c("subject_id"))
## load ApoE-4
dat_se$apoe4 <- adt_apoe(dat_se$apoecode, ...)
## drop duplicates
dat_se <- dat_se %>%
select(!(ends_with(".x") | ends_with(".y")))
## add exid
dat_se <- dat_se %>%
rowwise() %>%
mutate(exclude = subject_id %in% exid)
# format variable
a_print("Done.", verbose)
## return
rtn <- list(ana_dt = dat_se,
dat_ty = "biocard",
window_setting = window_setting,
reference_time = reference_time)
class(rtn) <- "ad_ana_data"
return(rtn)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.