R/bootstrap_results.R

#' bootstrap_results
#' 
#' @param result result data.frame produced by one of the compute_phewas functions
#' @param fdr_threshold all the results with a p_fdr value under this threshold will be bootsrapped 
#' @param concepts_wide wide data.frame with concepts as columns and patients as rows
#' @param perm number of permutations to perform
#' @param family family of the glm model c('poisson', 'binomial')
#' @param boot_all bootstrap all the results (time consuming)
#' @return result with bootstrapped 95%CI for OR and for pvalue

bootstrap_results <- function( result, fdr_threshold, concepts_wide, perm, family, boot_all = FALSE, cores = 1) {
  
  require(future)
  
  if (boot_all) {
    fdr_list = result$CODE
  } else {
    fdr_list <- result$CODE[result$p_fdr < fdr_threshold]
  }
  
  if (length(fdr_list) ==0) {
    result$CI_OR_inf = NA
    result$CI_OR_sup = NA
    result$term = NA
    result$p_inf = NA
    result$p_sup = NA
    return(result)
  }
  
  print('bootstraping')
  
  #cores <- 4
  
  print(stringr::str_interp("number of cores available: ${cores}"))
  
  clist <- fdr_list
  
  clist_length <- length(clist)
  
  print(stringr::str_interp("number of concepts to bootstrap: ${clist_length}"))
  
  cores <- min(clist_length, cores)
  print(stringr::str_interp("number of cores used: ${cores}"))
  
  chunks <- split(clist, ceiling(seq_along(clist)/cores))
  print(paste("chunk length", length(chunks)))
  
  if (cores > 1 & Sys.info()[['sysname']] != 'Windows') {
    plan(multiprocess)
  } else {
    plan(sequential)
  }
  
  f <- list()
  for (i in 1:length(chunks)) {
    
    f[[i]] <- future({
      
      res_temp <- lapply(chunks[[i]], FUN=function(x, concepts_wide, perm, family) {
        
        concept = x
        form <- as.formula(stringr::str_interp("`${concept}` ~ group"))
        
        alpha = .05
        boot <- concepts_wide %>%
          group_by(group) %>%
          broom::bootstrap(perm, by_group = TRUE) %>%
          dplyr::do(broom::tidy(glm(form, data = ., family = family)))  %>% 
          dplyr::filter(term == 'groupcases') %>% 
          dplyr::group_by (term) %>% 
          dplyr::summarize(CI_OR_inf = exp(quantile(estimate, alpha /2, na.rm = T)),
                           CI_OR_sup = exp(quantile(estimate, 1 - alpha/2, na.rm = T)),
                           p_inf = (quantile(p.value, alpha /2, na.rm = T)),
                           p_sup = (quantile(p.value, 1 - alpha/2, na.rm = T)))
        
        boot$CODE = x
        boot
        
      }, concepts_wide = concepts_wide, perm = perm, family = family)
      
      lapply(res_temp, FUN=rbind) %>% data.table::rbindlist()
      
    } )
  }

  res_temp <- lapply(f, value)
  result_boot <- lapply(res_temp, FUN=rbind) %>% data.table::rbindlist()
  # 
  # res_temp <- parLapply(cl, fdr_list, function(x, concepts_wide, perm, family) {
  #   
  #   concept = x
  #   form <- as.formula(stringr::str_interp("`${concept}` ~ group"))
  #   
  #   alpha = .05
  #   boot <- concepts_wide %>%
  #     group_by(group) %>%
  #     broom::bootstrap(perm, by_group = TRUE) %>%
  #     dplyr::do(broom::tidy(glm(form, data = ., family = family)))  %>% 
  #     dplyr::filter(term == 'groupcases') %>% 
  #     dplyr::group_by (term) %>% 
  #     dplyr::summarize(CI_OR_inf = exp(quantile(estimate, alpha /2, na.rm = T)),
  #                      CI_OR_sup = exp(quantile(estimate, 1 - alpha/2, na.rm = T)),
  #                      p_inf = (quantile(p.value, alpha /2, na.rm = T)),
  #                      p_sup = (quantile(p.value, 1 - alpha/2, na.rm = T)))
  #   
  #   boot$CODE = x
  #   boot
  #   
  # }, concepts_wide = concepts_wide, perm = perm, family = family)
  # 
  # result_boot <- lapply(res_temp, FUN=rbind) %>% data.table::rbindlist()
  
  result <- result_boot %>%
    #    dplyr::select(-term) %>%
    dplyr::right_join(result, by = 'CODE')
  
  result
}
aneuraz/multiWAS documentation built on May 14, 2019, 2:37 p.m.