R/extract_gains.R

Defines functions bd_extract_gain

Documented in bd_extract_gain

#' Extract and parse gain/loss information by the selected best models
#'
#' @param best_models Best models data frame generated by `bd_model_select()`
#' @param results_dir Directory where results was saved with `bd_collect()`.
#'
#' @return A dataframe with (orthogroups * branches) rows and 10 columns.
#'
#' @import dplyr
#' @importFrom stringr str_sub
#' @importFrom readr read_delim
#' @importFrom purrr map
#' @importFrom tidyr unnest
#'
#' @export
#'
bd_extract_gain <- function(best_models, results_dir = "./results") {

  best_models %>%
    dplyr::left_join(
      dplyr::tibble(file_path = list.files(path = results_dir, pattern =  ".gains.txt")) %>%
        dplyr::mutate(model = file_path %>% stringr::str_sub(0,2),
                      replicates = file_path %>% stringr::str_sub(3,4) %>% as.numeric()) %>%
        dplyr::mutate(data = file_path %>% purrr::map(function(x){paste(results_dir, "/", x, sep = "") %>%
            readr::read_delim(col_names=c("filename", "nas", "branch_code", "gains", "losses"),
                              delim = "\t",
                              trim_ws = TRUE,
                              col_types = cols(filename = col_character(), nas = readr::col_skip(), branch_code = col_character(), gains = col_double(),  losses = col_double()))})) %>%
        tidyr::unnest() %>%
        dplyr::mutate(og = filename %>% str_sub(-21,-13)),
       by = c("og", "model", "replicates"))%>%
    dplyr::select(-file_path, -filename) %>%
    return()

}
palfalvi/badirater documentation built on June 26, 2022, 2:11 a.m.