R/trinucs.R

Defines functions count_trinucs

Documented in count_trinucs

#' Calculate trinucleotide frequency 
#' 
#' Compute the trinucleotide mutation counts in a dataset, with explicit zeroes
#' 
#' @param dataset A dataset generated from `extend_positions`
#' @export
#' @return An array of trinucleotide counts and frequencies
count_trinucs = function(dataset) {
  # Transform `NA` variants to refbases
  dataset$variant_pyrbased[is.na(dataset$variant_pyrbased)] <- dataset$nucl_pyrbased[is.na(dataset$variant_pyrbased)]
  
  mut_counts = array(0, dim = c(32,4), dimnames = list(trinuc = c(trinucs.c, trinucs.t), varbase = c("A", "C", "G", "T")))
  mc = table(dataset$trinuc_pyrbased, dataset$variant_pyrbased)
  mut_counts[rownames(mc), colnames(mc)] = mc
  return(mut_counts)
}


trinucs.c = {c(
  "ACA",
  "ACC",
  "ACG",
  "ACT",
  "CCA",
  "CCC",
  "CCG",
  "CCT",
  "GCA",
  "GCC",
  "GCG",
  "GCT",
  "TCA",
  "TCC",
  "TCG",
  "TCT"
)}

trinucs.t = {c(
  "ATA",
  "ATC",
  "ATG",
  "ATT",
  "CTA",
  "CTC",
  "CTG",
  "CTT",
  "GTA",
  "GTC",
  "GTG",
  "GTT",
  "TTA",
  "TTC",
  "TTG",
  "TTT"
)}
lindberg-m/contextendR documentation built on Jan. 8, 2022, 3:16 a.m.