#' compute_bio_phewas
#' computes the PheWAS analysis on numeric biological test results
#'
#' @param bio bio data.frame from the `get_data_from_num` function
#' @param patients patients data.frame from the `get_data_from_num` function
#' @param direction one of c('SUP', 'INF') to perform the analysis on results above the normal range (SUP) or below the normal range(INF)
#' @param bio_min_occur integer. minimum number of occurrences of the biological test result in the dataset to be analyzed
#' @param zero_controls_add integer. number of simulated patients to add to the control group for a given if there is no occurrence of this concept in the control group. This trick allows to compute a pvalue in this situation.
#' @param patients_min_concept integer. minimum number of unique concept for a patient to be included in the analysis
#' @param with_boot logical. perform a bootstrap on the results
#' @param perm integer. number of permutations for the bootstrap
#' @param boot_all logical. perform a bootstrap on all the results or only on the results with FDR corrected p.value < fdr_threshold
#' @param fdr_threshold maximum FDR corrected p.value to perform the bootstrap
#' @param analysis_type one of c('poisson', 'binomial') for the type of analysis to perform
#' @param progress progress bar object
#' @export
compute_bio_phewas <- function(bio,
patients,
direction = "SUP",
bio_min_occur = 1,
zero_controls_add = 2,
patients_min_concept = NULL,
with_boot = FALSE,
perm = 1000,
boot_all = FALSE,
fdr_threshold = 0.05,
analysis_type = 'poisson',
progress= NULL,
cores = 1) {
require(dplyr, quietly = TRUE)
#require(parallel, quietly = TRUE)
require(tidyr, quietly = TRUE)
require(broom, quietly = TRUE)
require(future)
if(!is.null(progress)) {
progress$set(1, detail = 'Computing phewas')
} else {
cat('Computing phewas on bio ...')
}
dir_to_suppress <- ifelse(direction == "SUP", "INF", "SUP")
n_patients <- bio %>% dplyr::group_by(PATIENT_NUM,group) %>%
dplyr::summarise(bla = n()) %>% dplyr::group_by(group) %>% dplyr::summarise(n = n())
#n_patients <- patients %>% dplyr::group_by(group) %>% dplyr::summarise(n = n())
n_cases <- n_patients %>% dplyr::filter(group == 'cases') %>% dplyr::select(n)
n_control <- n_patients %>% dplyr::filter(group == 'control') %>% dplyr::select(n)
bio <- dplyr::rename_(bio, VALUE = direction)
#bio$VALUE = bio[,direction]
bio$count = 1
bio_list <- bio %>%
distinct(CODE,CODE_LABEL,PARENT_LABEL, PATIENT_NUM, group) %>%
group_by(CODE, CODE_LABEL,PARENT_LABEL, group)
if (analysis_type == 'poisson') {
bio_list <- summarize(bio_list, count = n())
} else if (analysis_type == 'binomial') {
bio_list <- summarize(bio_list, count = 1)
}
bio_list <- tidyr::spread(bio_list, group,count) %>%
ungroup() %>%
mutate(cases = ifelse(is.na(cases), 0, cases),
control = ifelse(is.na(control), 0, control),
cases0 = as.numeric(n_cases) - cases,
control0 = as.numeric(n_control) - control,
total = cases + control)
bio_keep <- bio_list %>%
dplyr::filter(cases > 0, total >= bio_min_occur)
bio_with_0 <- bio_list %>%
dplyr::filter(control == 0 , total >= bio_min_occur) %>%
dplyr::select(CODE)
if (nrow(bio_with_0)>0) {
adjust_facts <- bio %>% group_by(CODE) %>%
dplyr::filter(CODE %in% bio_with_0$CODE) %>%
slice(zero_controls_add) %>%
mutate(group = "control", PATIENT_NUM = 0) %>% data.frame()
bio <- rbind(bio %>% ungroup(), adjust_facts)
}
bio_wide <- bio %>%
dplyr::filter(CODE %in% bio_keep$CODE) %>%
dplyr::select(PATIENT_NUM, CODE,group, VALUE, SEX) %>%
group_by(PATIENT_NUM, CODE, group, SEX) %>%
summarise(VALUE = sum(VALUE)) %>%
ungroup()
if (analysis_type == 'binomial') {
bio_wide$VALUE = 1
}
bio_wide <- spread(bio_wide, key =CODE,value = VALUE, fill = 0) %>%
mutate(group = factor(group, levels =c('control','cases')))
result = NULL
#cores <- availableCores()
if (cores > 1 & Sys.info()[['sysname']] != 'Windows') {
plan(multiprocess)
} else {
plan(sequential)
}
cstart = 4
clist <- cstart:ncol(bio_wide)
seq_along(clist)
chunks <- split(clist, ceiling(seq_along(clist)/ceiling(length(clist)/cores)))
f <- list()
for (i in 1:cores) {
f[[i]] <- future({
res_temp <- lapply(chunks[[i]], FUN= function(x, bio_wide, family) {
concept = names(bio_wide[x])
form <- as.formula(stringr::str_interp("`${concept}`~ group",
list(concept = concept)))
fit <- glm(form, data=bio_wide, family= family)
p <- fit %>%
tidy() %>%
dplyr::filter(term == 'groupcases') %>%
dplyr::select(p.value) %>%
as.numeric()
res <- exp(cbind(coef(fit), suppressMessages(confint.default(fit, trace = TRUE)))) %>%
tidy() %>% as.data.frame() %>%
rename(term = .rownames, OR = V1, CI_inf = X2.5.., CI_sup = X97.5..) %>%
dplyr::filter(term == 'groupcases') %>%
mutate(CODE = concept, p.value = p, fit = list(tidy((fit)))) %>% dplyr::select(-term)
res
},bio_wide = bio_wide, family = ifelse(analysis_type == 'poisson', poisson, binomial))
lapply(res_temp, FUN=rbind) %>% data.table::rbindlist()
})
}
res_temp <- lapply(f, value)
result <- lapply(res_temp, FUN=rbind) %>% data.table::rbindlist()
result <- result %>%
mutate( p_fdr = p.adjust(p.value, method = 'fdr'),
p_bonf =p.adjust(p.value, method = 'bonferroni'),
direction = direction) %>%
left_join(bio_list, by = 'CODE') %>%
dplyr::select(CODE, CODE_LABEL, cases, control, cases0,control0, OR, CI_inf, CI_sup, p.value, p_fdr, p_bonf, PARENT_LABEL,fit, direction) %>%
arrange(p_fdr)
result <- result[!duplicated(result[,c('CODE', 'CODE_LABEL')]),]
if (with_boot) {
if(!is.null(progress)) {
progress$set(2, detail = 'Bootstrapping')
} else {
cat('Bootstrapping on ICD ...')
}
result <- bootstrap_results(result, fdr_threshold = fdr_threshold, concepts_wide = bio_wide, family = analysis_type,perm = perm, boot_all = boot_all, cores = cores)
}
result <- rename(result, CODE_LABEL = CODE_LABEL, CAT = PARENT_LABEL)
list(result = result, boot = with_boot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.