R/seqMatrix.R

Defines functions seqMatrix

Documented in seqMatrix

#' Sequence matrix
#' 
#' Creates a data frame with unique, productive amino acid sequences as rows and 
#' repertoire_id names as headers.  Each value in the data frame represents the 
#' frequency that the sequence appeared in the repertoire_id.
#' 
#' @param productive_aa A tibble of productive amino acid sequences 
#' generated by LymphoSeq function productiveSeq where the aggregate parameter 
#' was set to "junction_aa". 
#' @param sequences A character vector of amino acid sequences of interest.  It 
#' is useful to specify the output from the LymphoSeq functions uniqueSeqs or 
#' topSeqs and subsetting the "junction_aa" column.  See examples below.
#' @param by Available options are "duplicate_frequency" and "duplicate_count".
#' Default is "duplicate_frequence".
#' @return Returns a data frame of unique, productive amino acid sequences as 
#' rows and the \% frequency it appears in each repertoire_id as columns.
#' @seealso \code{\link{topSeqs}} and \code{\link{uniqueSeqs}}
#' @examples
#' file_path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq2
#' stable <- readImmunoSeq(path = file_path)
#' atable <- productiveSeq(stable,
#'                         aggregate = "junction_aa")
#' top_seqs <- topSeqs(atable,
#'                     top = 1)
#' sequence_matrix <- seqMatrix(atable,
#'                              sequences = top_seqs$junction_aa)
#' unique_seqs <- uniqueSeqs(atable)
#' sequence_matrix <- seqMatrix(atable, 
#'                              sequences = unique_seqs$junction_aa)
#' # It can be helpful to combine top.freq and sequence.matrix
#' top_freq <- topFreq(atable, frequency = 0.001)
#' sequence_matrix <- seqMatrix(atable, sequences = top_freq$junction_aa)
#' top_freq_matrix <- merge(top_freq, sequence_matrix)
#' @export
#' @import tidyverse
seqMatrix <- function(productive_aa, sequences = NULL, by = "duplicate_frequency") {
    if (is.null(sequences)) {
        sequences <- productive_aa %>%
                     dplyr::pull(junction_aa) %>%
                     base::unique()
    }
    if (by == "duplicate_count") {
        sequence_matrix <- productive_aa %>% 
                           tidyr::pivot_wider(id_cols = junction_aa, 
                                              names_from = repertoire_id, 
                                              values_from = duplicate_count, 
                                              values_fill= list(duplicate_count = 0L)) %>%
                           dplyr:: filter(junction_aa %in% sequences) 
    } else if (by == "duplicate_frequency") {
        sequence_matrix <- productive_aa %>% 
                           tidyr::pivot_wider(id_cols = junction_aa, 
                                              names_from = repertoire_id, 
                                              values_from = duplicate_frequency, 
                                              values_fill= list(duplicate_frequency = 0.0)) %>%
                           dplyr:: filter(junction_aa %in% sequences) 
    }
    return(sequence_matrix)
}
elulu3/LymphoSeqTest documentation built on Aug. 27, 2022, 5:47 a.m.