R/seqMatrix.R

Defines functions seqMatrix

Documented in seqMatrix

#' Sequence matrix
#'
#' Creates a data frame with unique, productive amino acid sequences as rows and
#' repertoire_id names as headers.  Each value in the data frame represents the
#' frequency that the sequence appeared in the repertoire_id.
#'
#' @param amino_table A tibble of productive amino acid sequences
#' generated by LymphoSeq2 function [productiveSeq()] where the aggregate
#' parameter was set to"junction_aa".
#' @param sequences A character vector of amino acid sequences of interest.  It
#' is useful to specify the output from the LymphoSeq functions [uniqueSeqs()]
#' or [topSeqs()] and sub-setting the "junction_aa" column.  See examples below.
#' @param by Available options are "duplicate_frequency" and "duplicate_count".
#' Default is "duplicate_frequency".
#' @return Returns a data frame of unique, productive amino acid sequences as
#' rows and the \% frequency it appears in each "repertoire_id" as columns.
#' @seealso [LymphoSeq2::topSeqs()] and [LymphoSeq2::uniqueSeqs()]
#' @examples
#' file_path <- system.file("extdata", "TCRB_sequencing",
#'  package = "LymphoSeq2")
#' study_table <- LymphoSeq2::readImmunoSeq(path = file_path, threads = 1)
#' study_table <- LymphoSeq2::topSeqs(study_table, top = 100)
#' amino_table <- LymphoSeq2::productiveSeq(study_table,
#'   aggregate = "junction_aa"
#' )
#' top_seqs <- LymphoSeq2::topSeqs(amino_table,
#'   top = 1
#' )
#' sequence_matrix <- LymphoSeq2::seqMatrix(amino_table,
#'   sequences = top_seqs$junction_aa, by = "duplicate_frequency"
#' )
#' unique_seqs <- LymphoSeq2::uniqueSeqs(amino_table)
#' sequence_matrix <- LymphoSeq2::seqMatrix(amino_table,
#'   sequences = unique_seqs$junction_aa, by = "duplicate_frequency"
#' )
#' # It can be helpful to combine top.freq and sequence.matrix
#' top_freq <- LymphoSeq2::topFreq(amino_table, frequency = 0.001)
#' sequence_matrix <- LymphoSeq2::seqMatrix(amino_table,
#'  sequences = top_freq$junction_aa)
#' top_freq_matrix <- merge(top_freq, sequence_matrix)
#' @export
seqMatrix <- function(amino_table,
                      sequences = NULL,
                      by = "duplicate_frequency") {
  if (is.null(sequences)) {
    sequences <- amino_table |>
      dplyr::pull(junction_aa) |>
      base::unique()
  }
  if (by == "duplicate_count") {
    sequence_matrix <- amino_table |>
      tidyr::pivot_wider(
        id_cols = junction_aa,
        names_from = repertoire_id,
        values_from = duplicate_count,
        values_fill = list(duplicate_count = 0L)
      ) |>
      dplyr::filter(junction_aa %in% sequences)
  } else if (by == "duplicate_frequency") {
    sequence_matrix <- amino_table |>
      tidyr::pivot_wider(
        id_cols = junction_aa,
        names_from = repertoire_id,
        values_from = duplicate_frequency,
        values_fill = list(duplicate_frequency = 0.0)
      ) |>
      dplyr::filter(junction_aa %in% sequences)
  }
  return(sequence_matrix)
}
shashidhar22/LymphoSeq2 documentation built on Jan. 16, 2024, 4:29 a.m.