R/readGliphTable.R

Defines functions getGliphTable readGliph

Documented in getGliphTable readGliph

#' Read GLIPH files
#'
#' [readGliph()] Imports tab-separated value (.tsv) files generated by GLIPH
#'
#' @param gliph_path Path containing GLIPH convergence group files. The file
#' name will be used as repertoire_id in the output, it is advisable to rename
#' the GLIPH2 files to match input TRB file names so that the GLIPH2 table and
#' repertoire table can be merged
#' @return Tibble with four columns, repertoire_id, gliph count, specificity
#' group and junction amino acid sequence. The GLIPH file name is used as the
#' repertoire_id
#' @details The files contain three columns, the GLIPH2 convergence group count,
#' The specificity group sequence and a space separated list of junction_aa
#' sequences. The function reads these files into a tibble with three columns,
#' expanding the junction_aa column such that each row in the tibble corresponds
#' to one junction_aa
#' @examples
#' file_path <- base::system.file("extdata", "TCRB_gliph",
#'  package = "LymphoSeq2")
#' gliph_table <- LymphoSeq2::readGliph(file_path)
#' @export
readGliph <- function(gliph_path) {
  gliph_files <- base::list.files(
    path = gliph_path,
    pattern = "*.tsv",
    full.names = TRUE
  )
  progress_bar <- progress::progress_bar$new(
    format = "Reading GLIPH files [:bar] :percent eta: :eta",
    total = length(gliph_files), clear = FALSE, width = 60
  )
  progress_bar$tick(0)
  gliph_table <- gliph_files |>
    purrr::map(~ getGliphTable(.x, progress_bar)) |>
    dplyr::bind_rows()
  return(gliph_table)
}

#' Group productive sequences by repertoire
#'
#' @param gliph_path Path to individual GLIPH file
#' @param progress_bar Progress bar variable
getGliphTable <- function(gliph_path, progress_bar) {
  progress_bar$tick()
  sample <- tools::file_path_sans_ext(base::basename(gliph_path))
  base::options(readr.show_progress = FALSE)
  gliph_table <- readr::read_tsv(gliph_path,
      col_names = c("gliph_count", "spec_group", "junction_aa"),
      col_types = readr::cols()) |>
    dplyr::mutate(repertoire_id = sample) |>
    tidyr::separate_rows(junction_aa, sep = " ")
  return(gliph_table)
}
shashidhar22/LymphoSeq2 documentation built on Jan. 16, 2024, 4:29 a.m.