Nothing
#' Get duplicated caseIDs
#'
#' Retrieve the duplicated caseIDs to remove from the analysis.
#'
#' @param duplicates_dir directory path where the text files with the duplicates information are.
#'
#' @return an integer vector with all the caseids to be removed
#' @export
get_duplicate_caseids <- function(duplicates_dir = NULL){
if (is.null(duplicates_dir)){return(000000)}
if (!dir.exists(duplicates_dir)) {
stop("directory does not exist")
}
deleted_files <- list.files(duplicates_dir, full.names = TRUE, pattern = ".txt")
caseid_to_be_removed <- c()
for (d_file in deleted_files){
read_file <- data.table::fread(d_file, sep = "$", quote = "", fill = FALSE)
caseid_to_be_removed <- c(caseid_to_be_removed, unique(read_file$V1)) %>%
as.numeric()
}
return(caseid_to_be_removed)
}
#' Convert a date string into a date format
#'
#' @param date_string A string vector with multiple formats (8, 6 or 4 digits)
#'
#' @return A converted Date
#' @export
#' @examples arrange_date("2020")
#' arrange_date("202006")
#' arrange_date("20200601")
arrange_date <- function(date_string) {
if (any(stringr::str_detect(
string = date_string,
pattern = "[:alpha:]"
), na.rm = TRUE)) {
stop("string contains letters")
}
date_string_modif <- dplyr::case_when(
(nchar(date_string) == 4) ~ paste0(date_string, "0630"),
(nchar(date_string) == 6) ~ paste0(date_string, "15"),
(nchar(date_string) == 8) ~ as.character(date_string),
TRUE ~ as.character(date_string)
)
date_string_modif <- as.Date(date_string_modif, format = "%Y%m%d")
return(unname(date_string_modif))
}
#' Read FAERS ascii files
#'
#' Read ASCII files from a directory, removing the duplicates.
#'
#' @param ascii_dir directory path where ascii files are
#' @param cache_path (optional) a string. Must have a ".Rdata" extension to save the read tabular formats in each loop.
#' @param drug_indication_pattern (optional) a string.filter ADRs with a specific drug indication pattern (**stringr** sintax)
#' @param drug_pattern (optional) a string. filter ADRs with a specific drug name pattern (**stringr** sintax)
#' @param primary_suspect (optional) a string.
#' @param ... directory with duplicate information to be passed to \link{get_duplicate_caseids}
#'
#' @return A list with binded tibbles retrieved from files.
#' @export
retrieve_faersascii <- function(ascii_dir, cache_path = NULL, drug_indication_pattern = NULL, drug_pattern = NULL,
primary_suspect = TRUE, ...) {
#setting global vars
caseid <- role_cod <- drugname <- indi_pt <- primaryid <- indi_drug_seq <-
ichicsr <- . <- NULL
if (!dir.exists(ascii_dir)) {
stop("directory does not exist")
}
ascii_files <- list.files(ascii_dir, full.names = TRUE, pattern = ".txt")
ascii_drug_files <- ascii_files[which(stringr::str_detect(ascii_files, pattern = "drug|DRUG|indi|INDI"))]
#getduplicated caseIDs
duplicated_caseids <- get_duplicate_caseids(...)
# gather drug files
drug_info <- tibble::tibble()
indi_info <- tibble::tibble()
drug_indi_info <- tibble::tibble()
# iterating over drug and indication files to filter the chosen ADRs
for (ascii_drug_file in ascii_drug_files) {
message(paste(
"Retrieving drug/indication information: binding", ascii_drug_file,
which(ascii_drug_file == ascii_drug_files), "out of", length(ascii_drug_files)
))
if (stringr::str_detect(ascii_drug_file, "drug|DRUG")) {
#read drug_info
read_file <- data.table::fread(ascii_drug_file, sep = "$", quote = "", fill = FALSE) %>%
mutate_at(c("primaryid", "caseid"), ~as.numeric(.)) %>%
filter(!caseid %in% duplicated_caseids) %>%
{
if ("nda_num" %in% names(.)) {
.$nda_num <- as.character(.$nda_num)
.
} else {
.
}
} %>%
{
if ("dose_amt" %in% names(.)) {
.$dose_amt <- as.character(.$dose_amt)
.
} else {
.
}
} %>%
mutate_at(vars(ends_with("_dt")), list(~ arrange_date(.)))
#filter drug_file and bind it
drug_info <- bind_rows(drug_info, read_file %>%
{
if (primary_suspect) {
filter(., role_cod == "PS")
} else {
.
}
} %>%
{
if (!is.null(drug_pattern)) {
filter(., stringr::str_detect(string = drugname, pattern = drug_pattern))
} else {
.
}
})
#remove file to free space
rm(list= c("read_file"))
gc()
}
if (stringr::str_detect(ascii_drug_file, "indi|INDI")) {
#read indication_info
read_file <- data.table::fread(ascii_drug_file, sep = "$", quote = "", fill = FALSE) %>%
mutate_at(c("primaryid", "caseid"), ~as.numeric(.)) %>%
filter(!caseid %in% duplicated_caseids) %>%
mutate_at(vars(ends_with("_dt")), list(~ arrange_date(.)))
#filter indi_file and bind it
indi_info <- bind_rows(indi_info, read_file %>%
{
if (!is.null(drug_indication_pattern)) {
filter(., stringr::str_detect(string = indi_pt, pattern = drug_indication_pattern))
} else {
.
}
})
#remove file to free space
rm(list= c("read_file"))
gc()
}
}
#set final structure for drug_indi_info
if (!is.null(drug_indication_pattern) & !is.null(drug_pattern)) {
drug_indi_info <- drug_info %>%
inner_join(indi_info %>% group_by(primaryid, caseid, indi_drug_seq) %>%
summarise(indi_pt = paste(sort(unique(indi_pt)), collapse = "/"), .groups = "keep") %>%
ungroup(), by = c("primaryid", "caseid", "drug_seq" = "indi_drug_seq"))
}
if (!is.null(drug_indication_pattern) & is.null(drug_pattern)) {
if (primary_suspect == TRUE) {
drug_indi_info <- drug_info %>%
inner_join(indi_info %>% group_by(primaryid, caseid, indi_drug_seq) %>%
summarise(indi_pt = paste(sort(unique(indi_pt)), collapse = "/"), .groups = "keep") %>%
ungroup(), by = c("primaryid", "caseid", "drug_seq" = "indi_drug_seq"))
} else {
drug_indi_info <- indi_info
}
}
if (is.null(drug_indication_pattern) & !is.null(drug_pattern)) {
drug_indi_info <- drug_info %>%
filter(!caseid %in% duplicated_caseids)
}
if (is.null(drug_indication_pattern) & is.null(drug_pattern)) {
drug_indi_info <- drug_info %>%
full_join(indi_info %>% group_by(primaryid, caseid, indi_drug_seq) %>%
summarise(indi_pt = paste(sort(unique(indi_pt)), collapse = "/"), .groups = "keep") %>%
ungroup(), by = c("primaryid", "caseid", "drug_seq" = "indi_drug_seq"))
}
#filter drug_info & indi_info
drug_info <- drug_info %>% filter(primaryid %in% drug_indi_info$primaryid) %>%
mutate_at("drugname", ~stringr::str_replace_all(., pattern = "AND", replacement = "\\\\")) %>%
mutate_at("drugname", ~stringr::str_squish(.))
indi_info <- indi_info %>% filter(primaryid %in% drug_indi_info$primaryid)
# cache objects to be filled
cache <- list()
cache_files <- c()
# files to be filled
demo_info <- tibble::tibble()
outc_info <- tibble::tibble()
reac_info <- tibble::tibble()
rpsr_info <- tibble::tibble()
ther_info <- tibble::tibble()
#read the files and remove the duplicated cases, impute date, and filter by caseIDs with specified drug and drug indication patterns
for (ascii_file in ascii_files) {
message(paste("binding", ascii_file, which(ascii_file == ascii_files), "out of", length(ascii_files)))
#garbage collection
gc()
if (stringr::str_detect(ascii_file, "demo|DEMO")) {
demo_info <- demo_info %>% bind_rows(data.table::fread(ascii_file, sep = "$", quote = "", fill = FALSE)%>%
mutate_at(c("primaryid", "caseid"), ~as.numeric(.)) %>%
filter(!caseid %in% duplicated_caseids) %>%
mutate_at(vars(ends_with("_dt")), list(~ arrange_date(.))) %>%
{
if ("nda_num" %in% names(.)) {
.$nda_num <- as.character(.$nda_num)
.
} else {
.
}
} %>%
mutate_at(c("age", "wt"), ~ suppressWarnings(as.numeric(.))) %>%
filter(primaryid %in% drug_indi_info$primaryid))
}
if (stringr::str_detect(ascii_file, "outc|OUTC")) {
outc_info <- outc_info %>% bind_rows(data.table::fread(ascii_file, sep = "$", quote = "", fill = FALSE) %>%
mutate_at(c("primaryid", "caseid"), ~as.numeric(.)) %>%
filter(!caseid %in% duplicated_caseids) %>%
{
if ("outc_cod" %in% names(.)) {
dplyr::rename(., "outc_code" = "outc_cod")
} else {
.
}
} %>%
mutate_at(vars(ends_with("_dt")), list(~ arrange_date(.))) %>%
filter(primaryid %in% drug_indi_info$primaryid))
}
if (stringr::str_detect(ascii_file, "reac|REAC")) {
reac_info <- reac_info %>% bind_rows(data.table::fread(ascii_file, sep = "$", quote = "", fill = FALSE) %>%
mutate_at(c("primaryid", "caseid"), ~as.numeric(.)) %>%
filter(!caseid %in% duplicated_caseids) %>%
mutate_at(vars(ends_with("_dt")), list(~ arrange_date(.))) %>%
filter(primaryid %in% drug_indi_info$primaryid))
}
if (stringr::str_detect(ascii_file, "rpsr|RPSR")) {
rpsr_info <- rpsr_info %>% bind_rows(data.table::fread(ascii_file, sep = "$", quote = "", fill = FALSE)%>%
mutate_at(c("primaryid", "caseid"), ~as.numeric(.)) %>%
filter(!caseid %in% duplicated_caseids) %>%
mutate_at(vars(ends_with("_dt")), list(~ arrange_date(.))) %>%
filter(primaryid %in% drug_indi_info$primaryid))
}
if (stringr::str_detect(ascii_file, "ther|THER")) {
ther_info <- ther_info %>% bind_rows(data.table::fread(ascii_file, sep = "$", quote = "", fill = FALSE)%>%
mutate_at(c("primaryid", "caseid"), ~as.numeric(.)) %>%
filter(!caseid %in% duplicated_caseids) %>%
mutate_at(vars(ends_with("_dt")), list(~ arrange_date(.))) %>%
{
if ("dur" %in% names(.)) {
.$dur <- as.character(.$dur)
.
} else {
.
}
} %>%
filter(primaryid %in% drug_indi_info$primaryid))
}
if (!is.null(cache_path)) {
message(paste("saving", ascii_file, "into cache:", cache_path))
cache_files <- c(cache_files, ascii_file)
ascii_list <- list(
demographics = demo_info,
drug = drug_info,
indication = indi_info,
outcome = outc_info,
reaction = reac_info,
report_source = rpsr_info,
therapy = ther_info
)
cache$files <- cache_files
save(cache, file = cache_path)
}
}
return(list(
demographics = demo_info,
drug = drug_info,
indication = indi_info,
outcome = outc_info,
reaction = reac_info,
report_source = rpsr_info,
therapy = ther_info
))
}
#' Unify the list to a tabular format
#'
#' Turn the list elements returned from \link{retrieve_faersascii} into a tabular format
#'
#' @param ascii_list list from \link{retrieve_faersascii}
#' @examples
#' unify_tabular_ascii(ascii_list = als_faers_data)
#'
#'
#' @return A data frame representing FAERS data, with all components from the list joined.
#' @export
unify_tabular_ascii <- function(ascii_list) {
#setting global vars
primaryid <- caseid <- indi_drug_seq <- indi_pt <- dsg_drug_seq <- drugname <-
role_cod <- start_dt <- reporter_country <- sex <- event_dt <- age <- age_YR <-
indi_pt_all <- drugname_all <- start_dt_ps <- caseversion <- NULL
drug_indi_info <- ascii_list$drug %>%
mutate_at("drugname", ~stringr::str_squish(.)) %>%
left_join(ascii_list$indication %>% group_by(primaryid, caseid, indi_drug_seq) %>%
summarise(indi_pt = paste(sort(unique(indi_pt)), collapse = "/"), .groups = "keep") %>%
ungroup(), by = c("primaryid", "caseid", "drug_seq" = "indi_drug_seq")) %>%
left_join(ascii_list$therapy %>%
distinct(primaryid, caseid, dsg_drug_seq, .keep_all = TRUE),
by = c("primaryid", "caseid", "drug_seq" = "dsg_drug_seq"))
outcome_info <- ascii_list$outcome %>%
tidyr::pivot_wider(
id_cols = c("primaryid", "caseid"), names_from = "outc_code",
values_fill = "0", values_from = "outc_code", values_fn = ~ ifelse(. == "0", "0", "1"),
names_prefix = "outcome_"
)
report_source_info <- ascii_list$report_source %>%
tidyr::pivot_wider(
id_cols = c("primaryid", "caseid"), names_from = "rpsr_cod",
values_fill = "0", values_from = "rpsr_cod", values_fn = ~ ifelse(. == "0", "0", "1"),
names_prefix = "report_source_"
)
unified_faers <- ascii_list$reaction %>%
left_join(distinct(ascii_list$demographics, primaryid, caseid, .keep_all = TRUE),
by = c("primaryid", "caseid"), relationship = "many-to-many") %>%
mutate(patient_drug = lapply(as.integer(primaryid), FUN = function(x) {
drug_indi_info %>%
filter(primaryid == x) %>%
mutate_at(c("prod_ai", "drugname", "indi_pt"), ~stringr::str_squish(.)) %>%
suppressMessages()
})) %>%
left_join(outcome_info, by = c("primaryid", "caseid")) %>%
left_join(report_source_info, by = c("primaryid", "caseid")) %>%
left_join(drug_indi_info %>%
group_by(primaryid, caseid) %>%
summarise(
indi_pt_all = paste(sort(unique(indi_pt)), collapse = " + "),
drugname_all = paste(sort(unique(drugname)), collapse = " + "),
.groups = "keep"
) %>%
ungroup(), by = c("primaryid", "caseid")) %>%
left_join(drug_indi_info %>%
filter(role_cod == "PS") %>%
group_by(primaryid, caseid) %>%
summarise(
indi_pt_ps = sort(unique(indi_pt)),
drugname_ps = paste(sort(unique(drugname)), collapse = " + "),
start_dt_ps = unique(start_dt),
.groups = "keep"
) %>%
ungroup() %>% suppressWarnings(), by = c("primaryid", "caseid")) %>%
mutate(age_YR = case_when(age_cod == "YR" ~ as.numeric(age),
age_cod == "DEC" ~ as.numeric(age)*10,
age_cod == "MON" ~ as.numeric(age)/12,
age_cod == "WK" ~ as.numeric(age)/(365/7),
age_cod == "DY" ~ as.numeric(age)/365,
age_cod == "HR" ~ as.numeric(age)/(365*24),
TRUE ~ as.numeric(age))) %>%
relocate(age_YR, .after = "age")
#de-duplication
unified_faers <- unified_faers %>%
mutate_at("pt", ~stringr::str_squish(.))
#latest case version
unified_faers <- unified_faers %>%
group_by(caseid) %>%
filter(caseversion == max(caseversion)) %>%
ungroup()
#
# #remove suspected duplicated cases
index_to_remove <- which(duplicated(unified_faers [,c( "reporter_country",
"sex", "event_dt",
"age", "pt",
"indi_pt_all",
"drugname_all",
"start_dt_ps")]) &
!is.na(unified_faers$reporter_country) &
!is.na(unified_faers$sex) &
!is.na(unified_faers$event_dt) &
!is.na(unified_faers$age) &
!is.na(unified_faers$pt) &
!is.na(unified_faers$indi_pt_all) &
!is.na(unified_faers$drugname_all) &
!is.na(unified_faers$start_dt_ps)
)
if(length(index_to_remove) >=1){unified_faers <- unified_faers[-index_to_remove,]}
#trim
message("unification and de-duplication applied.")
return(unified_faers)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.