R/readGliphTable.R

Defines functions getGliphTable readGliph

Documented in getGliphTable readGliph

#' Read GLIPH files
#'
#' \code{readGliph} Imports tab-separated value (.tsv) files generated by GLIPH
#' 
#' #' @details The files contain three columns, the gliph convergence group count,
#' The specificity group sequence and a space separated list of junction_aa sequences.
#' The function reads these files into a tibble with three columns, expanding
#' the junction_aa column such that each row in the tibble corresponds to one junction_aa
#' 
#' @param gliph_path Path containing GLIPH convergence group files. The file name will be used 
#' as repertoire_id in the output, it is advisable to rename the GLIPH files to match input 
#' TRB file names so that the GLIPH table and repertoire table can be merged
#' 
#' @return Tibble with four columns, repertoire_id, gliph count, specificity group and
#' junction amino acid sequence. The GLIPH file name is used as the repertoire_id
#' 
#' @examples
#' file_path <- base::system.file("extdata", "TCRB_gliph", package = "LymphoSeq2") 
#' gliph_table <- LymphoSeq2::readGliph(file_path)
#' 
#' @export
readGliph <- function(gliph_path) {
  gliph_files <- base::list.files(path = gliph_path, 
                                  pattern = "*.tsv",
                                  full.names = TRUE)
  progress_bar <- progress::progress_bar$new(format = "Reading GLIPH files [:bar] :percent eta: :eta",
                                             total = length(gliph_files), clear = FALSE, width = 60)
  progress_bar$tick(0)
  gliph_table <- gliph_files %>%
                 purrr::map(~getGliphTable(.x, progress_bar)) %>%
                 dplyr::bind_rows()
  return(gliph_table)
}

#' Group productive sequences by repertoire
#' 
#' @param gliph_path Path to individual GLIPH file
#' @param progress_bar Progress bar variable
getGliphTable <- function(gliph_path, progress_bar) {
  progress_bar$tick()
  sample <- tools::file_path_sans_ext(base::basename(gliph_path))
  base::options(readr.show_progress = FALSE)
  gliph_table <- readr::read_tsv(gliph_path, col_names = c("gliph_count", "spec_group", "junction_aa"), col_types = readr::cols()) %>%
                 dplyr::mutate(repertoire_id = sample) %>%
                 tidyr::separate_rows(junction_aa, sep=" ")
  return(gliph_table)
}
elulu3/LymphoSeqTest documentation built on Aug. 27, 2022, 5:47 a.m.