R/RcppExports.R

Defines functions hamming_shift_distance hamming_distance levenshtein_distance mergeValues digestFastqsCpp groupSimilarSequences findClosestRefSeqEarlyStop findClosestRefSeq test_mergeReadPairPartial test_decomposeRead test_makeAAHGVS makeBaseHGVS translateString compareCodonPositions calcNearestStringDist

Documented in calcNearestStringDist groupSimilarSequences

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Calculate distances to the nearest string
#' 
#' Given a character vector, calculate the distance for each element
#' to the nearest neighbor amongst all the other elements.
#' 
#' @param x A character vector.
#' @param metric A character scalar defining the string distance metric. One
#'   of \code{"hamming"} (default), \code{"hamming_shift"} or
#'   \code{"levenshtein"}.
#' @param nThreads numeric(1), number of threads to use for parallel processing.
#' 
#' @return An integer vector of the same length as \code{x}.
#' 
#' @examples
#' calcNearestStringDist(c("lazy", "hazy", "crazy"))
#' calcNearestStringDist(c("lazy", "hazy", "crazy"), metric = "hamming_shift")
#' calcNearestStringDist(c("lazy", "hazy", "crazy"), metric = "levenshtein")
#' 
#' @export
calcNearestStringDist <- function(x, metric = "hamming", nThreads = 1L) {
    .Call(`_mutscan_calcNearestStringDist`, x, metric, nThreads)
}

compareCodonPositions <- function(a, b, mutNameDelimiter) {
    .Call(`_mutscan_compareCodonPositions`, a, b, mutNameDelimiter)
}

translateString <- function(s) {
    .Call(`_mutscan_translateString`, s)
}

makeBaseHGVS <- function(mutationsSorted, mutNameDelimiter, wtSeq, varSeq) {
    .Call(`_mutscan_makeBaseHGVS`, mutationsSorted, mutNameDelimiter, wtSeq, varSeq)
}

test_makeAAHGVS <- function(mutationsSorted, mutNameDelimiter, wtSeq) {
    .Call(`_mutscan_test_makeAAHGVS`, mutationsSorted, mutNameDelimiter, wtSeq)
}

test_decomposeRead <- function(sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength) {
    .Call(`_mutscan_test_decomposeRead`, sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength)
}

test_mergeReadPairPartial <- function(seqF, qualF, seqR, qualR, lenF, lenR, minOverlap = 0L, maxOverlap = 0L, minMergedLength = 0L, maxMergedLength = 0L, maxFracMismatchOverlap = 0, greedy = TRUE) {
    .Call(`_mutscan_test_mergeReadPairPartial`, seqF, qualF, seqR, qualR, lenF, lenR, minOverlap, maxOverlap, minMergedLength, maxMergedLength, maxFracMismatchOverlap, greedy)
}

findClosestRefSeq <- function(varSeq, wtSeq, upperBoundMismatch, sim) {
    .Call(`_mutscan_findClosestRefSeq`, varSeq, wtSeq, upperBoundMismatch, sim)
}

findClosestRefSeqEarlyStop <- function(varSeq, wtSeq, upperBoundMismatch, sim) {
    .Call(`_mutscan_findClosestRefSeqEarlyStop`, varSeq, wtSeq, upperBoundMismatch, sim)
}

#' Create a conversion table for collapsing similar sequences
#' @param seqs Character vector with nucleotide sequences (or pairs of 
#' sequences concatenated with "_") to be collapsed. The sequences must 
#' all be of the same length.
#' @param scores Numeric vector of "scores" for the sequences. Typically
#' the total read/UMI count. A higher score will be preferred when 
#' deciding which sequence to use as the representative for a group of 
#' collapsed sequences.
#' @param collapseMaxDist Numeric scalar defining the tolerance for collapsing 
#' similar sequences. If the value is in [0, 1), it defines the maximal 
#' Hamming distance in terms of a fraction of sequence length:
#' (\code{round(collapseMaxDist * nchar(sequence))}).
#' A value greater or equal to 1 is rounded and directly used as the maximum
#' allowed Hamming distance. Note that sequences can only be
#' collapsed if they are all of the same length.
#' @param collapseMinScore Numeric scalar, indicating the minimum score 
#' required for a sequence to be considered as a representative for a 
#' group of similar sequences (i.e., to allow other sequences to be 
#' collapsed into it).
#' @param collapseMinRatio Numeric scalar. During collapsing of
#' similar sequences, a low-frequency sequence will be collapsed 
#' with a higher-frequency sequence only if the ratio between the 
#' high-frequency and the low-frequency scores is at least this 
#' high. A value of 0 indicates that no such check is performed.
#' @param verbose Logical scalar, whether to print progress messages.
#' 
#' @return A data.frame with two columns, containing the input sequences 
#' and the representatives for the groups resulting from grouping similar
#' sequences, respectively.
#' 
#' @examples
#' seqs <- c("AACGTAGCA", "ACCGTAGCA", "AACGGAGCA", "ATCGGAGCA", "TGAGGCATA")
#' scores <- c(5, 1, 3, 1, 8)
#' groupSimilarSequences(seqs = seqs, scores = scores, 
#'                       collapseMaxDist = 1, collapseMinScore = 0, 
#'                       collapseMinRatio = 0, verbose = FALSE)
#'                             
#' @export
#' @author Michael Stadler, Charlotte Soneson
groupSimilarSequences <- function(seqs, scores, collapseMaxDist, collapseMinScore, collapseMinRatio, verbose) {
    .Call(`_mutscan_groupSimilarSequences`, seqs, scores, collapseMaxDist, collapseMinScore, collapseMinRatio, verbose)
}

digestFastqsCpp <- function(fastqForwardVect, fastqReverseVect, mergeForwardReverse, minOverlap, maxOverlap, minMergedLength, maxMergedLength, maxFracMismatchOverlap, greedyOverlap, revComplForward, revComplReverse, elementsForward, elementLengthsForward, elementsReverse, elementLengthsReverse, adapterForward, adapterReverse, primerForward, primerReverse, wildTypeForward, wildTypeForwardNames, wildTypeReverse, wildTypeReverseNames, constantForward, constantReverse, avePhredMinForward = 20.0, avePhredMinReverse = 20.0, variableNMaxForward = 0L, variableNMaxReverse = 0L, umiNMax = 0L, nbrMutatedCodonsMaxForward = 1L, nbrMutatedCodonsMaxReverse = 1L, nbrMutatedBasesMaxForward = -1L, nbrMutatedBasesMaxReverse = -1L, forbiddenMutatedCodonsForward = "NNW", forbiddenMutatedCodonsReverse = "NNW", useTreeWTmatch = FALSE, collapseToWTForward = FALSE, collapseToWTReverse = FALSE, mutatedPhredMinForward = 0.0, mutatedPhredMinReverse = 0.0, mutNameDelimiter = ".", constantMaxDistForward = -1L, constantMaxDistReverse = -1L, umiCollapseMaxDist = 0.0, filteredReadsFastqForward = "", filteredReadsFastqReverse = "", maxNReads = -1L, verbose = FALSE, nThreads = 1L, chunkSize = 100000L, maxReadLength = 1024L) {
    .Call(`_mutscan_digestFastqsCpp`, fastqForwardVect, fastqReverseVect, mergeForwardReverse, minOverlap, maxOverlap, minMergedLength, maxMergedLength, maxFracMismatchOverlap, greedyOverlap, revComplForward, revComplReverse, elementsForward, elementLengthsForward, elementsReverse, elementLengthsReverse, adapterForward, adapterReverse, primerForward, primerReverse, wildTypeForward, wildTypeForwardNames, wildTypeReverse, wildTypeReverseNames, constantForward, constantReverse, avePhredMinForward, avePhredMinReverse, variableNMaxForward, variableNMaxReverse, umiNMax, nbrMutatedCodonsMaxForward, nbrMutatedCodonsMaxReverse, nbrMutatedBasesMaxForward, nbrMutatedBasesMaxReverse, forbiddenMutatedCodonsForward, forbiddenMutatedCodonsReverse, useTreeWTmatch, collapseToWTForward, collapseToWTReverse, mutatedPhredMinForward, mutatedPhredMinReverse, mutNameDelimiter, constantMaxDistForward, constantMaxDistReverse, umiCollapseMaxDist, filteredReadsFastqForward, filteredReadsFastqReverse, maxNReads, verbose, nThreads, chunkSize, maxReadLength)
}

mergeValues <- function(mutNamesIn, valuesIn, delimiter = ',') {
    .Call(`_mutscan_mergeValues`, mutNamesIn, valuesIn, delimiter)
}

levenshtein_distance <- function(str1, str2, ignored_variable = -1L) {
    .Call(`_mutscan_levenshtein_distance`, str1, str2, ignored_variable)
}

hamming_distance <- function(str1, str2, ignored_variable = -1L) {
    .Call(`_mutscan_hamming_distance`, str1, str2, ignored_variable)
}

hamming_shift_distance <- function(str1, str2, max_abs_shift = -1L) {
    .Call(`_mutscan_hamming_shift_distance`, str1, str2, max_abs_shift)
}
fmicompbio/mutscan documentation built on March 30, 2024, 9:13 a.m.