R/cloneTrack.R

Defines functions cloneTrack

Documented in cloneTrack

#' Clone tracking plot
#' 
#' Creates line plot tracking amino acid frequencies across multiple samples
#' 
#' @param study_table A tibble of productive amino acid sequences 
#' generated by LymphoSeq function productiveSeq where the aggregate parameter 
#' was set to "junction_aa"
#' @param sample_list A character vector of one or more repertoire_ids to track.
#' If set to NULL (default), all repertoire_ids in the sequence matrix will be tracked.
#' @param sequence_track An optional character vector of one or more amino acid 
#' sequences to track. If set to NULL (default), will pull all junction_aa sequences
#' from the sequence matrix.
#' @param unassigned 
#' @return Returns a line plot showing the amino acid frequencies across 
#' multiple samples in the sequence matrix where each line represents one 
#' unique sequence.
#' @details The plot is made using the package ggplot2 and can be reformatted
#' using ggplot2 functions.  See examples below.
#' @seealso An excellent resource for examples on how to reformat a ggplot can 
#' be found in the R Graphics Cookbook online (\url{http://www.cookbook-r.com/Graphs/}).
#' @examples
#' file_path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq2")
#' 
#' stable <- readImmunoSeq(path = file_path)
#' 
#' atable <- productiveSeq(study_table = stable, aggregate = "junction_aa")
#' 
#' top_freq <- topFreq(atable, frequency = 0.001)
#' 
#' # Track clones without mapping or tracking specific sequences
#' cloneTrack(atable)
#' 
#' # Track top 20 clones mapping to the CD4 and CD8 samples
#' cloneTrack(atable, sample_list = c("TRB_CD4_949", "TRB_CD8_949"),
#'    sequence_track = top_freq$junction_aa[1:20], unassigned = TRUE) 
#' 
#' # Track the top 10 clones from top.freq
#' cloneTrack(study_table = atable, sequence_track = top_freq$junction_aa[1:10], 
#'            unassigned = FALSE) 
#' 
#' # Track clones mapping to the CD4 and CD8 samples while ignoring all others
#' cloneTrack(study_table = atable, sample_list = c("TRB_CD4_949", "TRB_CD8_949"),
#'    unassigned = FALSE) 
#' 
#' # Track clones mapping to the CD4 and CD8 samples and track 2 specific sequences
#' cloneTrack(study_table = atable, sample_list = c("TRB_CD4_949", "TRB_CD8_949"),
#'    sequence_track = c("CASSPPTGERDTQYF", "CASSQDRTGQYGYTF"), unassigned = FALSE)
#' 
#' @export
cloneTrack <- function(study_table, sample_list = NULL, sequence_track = NULL, 
                       unassigned = TRUE) {
  if (base::is.null(sample_list)) {
    sample_list <- study_table %>% 
                   dplyr::pull(repertoire_id) %>%
                   base::unique()
  }
  if (is.null(sequence_track)) {
    sequence_track <- study_table %>%
                      dplyr::pull(junction_aa) %>%
                      base::unique()
  }
  study_table <- study_table %>%
                    dtplyr::lazy_dt()
  tracker_table <- study_table %>%
                   dplyr::filter(repertoire_id %in% sample_list & junction_aa %in% sequence_track) %>%
                   dplyr::group_by(junction_aa) %>%
                   dplyr::mutate(seen = dplyr::n()) %>%
                   dplyr::ungroup() %>%
                   dplyr::as_tibble()
  return(tracker_table)
}
elulu3/LymphoSeqTest documentation built on Aug. 27, 2022, 5:47 a.m.