R/RcppExports.R

Defines functions doAffineAlignmentCpp doAlignmentCpp alignChromatogramsCpp areaIntegrator getBaseGapPenaltyCpp constrainSimCpp getGlobalAlignMaskCpp getChromSimMatCpp getSeqSimMatCpp

Documented in alignChromatogramsCpp areaIntegrator constrainSimCpp doAffineAlignmentCpp doAlignmentCpp getBaseGapPenaltyCpp getChromSimMatCpp getGlobalAlignMaskCpp getSeqSimMatCpp

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Calculates similarity matrix for two sequences
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#' ORCID: 0000-0003-3500-8152
#' License: (c) Author (2019) + MIT
#' Date: 2019-03-05
#' @param seq1 (char) A single string.
#' @param seq2 (char) A single string.
#' @param match (double) Score for character match.
#' @param misMatch (double) score for character mismatch.
#' @return s (matrix) Numeric similarity matrix. Rows and columns expresses seq1 and seq2, respectively.
#' @examples
#' # Get sequence similarity of two DNA strings
#' Match=10; MisMatch=-2
#' seq1 = "GCAT"; seq2 = "CAGTG"
#' getSeqSimMatCpp(seq1, seq2, Match, MisMatch)
#' matrix(c(-2, 10, -2, -2, -2, -2, 10, -2, 10, -2, -2, -2, -2, -2, -2, 10, 10, -2, -2, -2),
#'  4, 5, byrow = FALSE)
#' @export
getSeqSimMatCpp <- function(seq1, seq2, match, misMatch) {
    .Call(`_DIAlignR_getSeqSimMatCpp`, seq1, seq2, match, misMatch)
}

#' Calculates similarity matrix of two fragment-ion chromatogram groups or extracted-ion chromatograms(XICs)
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#' ORCID: 0000-0003-3500-8152
#' License: (c) Author (2019) + MIT
#' Date: 2019-03-05
#' @param l1 (list) A list of vectors. Length should be same as of l2.
#' @param l2 (list) A list of vectors. Length should be same as of l1.
#' @param normalization (char) A character string. Normalization must be selected from (L2, mean or none).
#' @param simType (char) A character string. Similarity type must be selected from (dotProductMasked, dotProduct, cosineAngle, cosine2Angle, euclideanDist, covariance, correlation).\cr
#' Mask = s > quantile(s, dotProdThresh)\cr
#' AllowDotProd= [Mask × cosine2Angle + (1 - Mask)] > cosAngleThresh\cr
#' s_new= s × AllowDotProd
#' @param cosAngleThresh (numeric) In simType = dotProductMasked mode, angular similarity should be higher than cosAngleThresh otherwise similarity is forced to zero.
#' @param dotProdThresh (numeric) In simType = dotProductMasked mode, values in similarity matrix higher than dotProdThresh quantile are checked for angular similarity.
#' @return s (matrix) Numeric similarity matrix. Rows and columns expresses seq1 and seq2, respectively.
#' @examples
#' # Get similarity matrix of dummy chromatograms
#' r1 <- list(c(1.0,3.0,2.0,4.0), c(0.0,0.0,0.0,1.0), c(4.0,4.0,4.0,5.0))
#' r2 <- list(c(1.4,2.0,1.5,4.0), c(0.0,0.5,0.0,0.0), c(2.0,3.0,4.0,0.9))
#' round(getChromSimMatCpp(r1, r2, "L2", "dotProductMasked"), 3)
#' matrix(c(0.125, 0.162, 0.144, 0.208, 0.186, 0.240,
#' 0.213, 0.313, 0.233, 0.273, 0.253, 0.346, 0.101, 0.208, 0.154, 0.273), 4, 4, byrow = FALSE)
#'
#' round(getChromSimMatCpp(r1, r2, "L2", "dotProduct"), 3)
#' matrix(c(0.125, 0.162, 0.144, 0.208, 0.186,0.240, 0.213, 0.313, 0.233,
#' 0.273, 0.253, 0.346, 0.101, 0.208, 0.154, 0.273), 4, 4, byrow = FALSE)
#'
#' round(getChromSimMatCpp(r1, r2, "L2", "cosineAngle"), 3)
#' matrix(c(0.934, 0.999, 0.989, 0.986, 0.933, 0.989,
#'  0.983, 0.996, 0.994, 0.960, 0.995, 0.939, 0.450,
#'  0.761, 0.633, 0.772), 4, 4, byrow = FALSE)
#'
#' round(getChromSimMatCpp(r1, r2, "L2", "cosine2Angle"), 3)
#' matrix(c(0.744, 0.998, 0.957, 0.944, 0.740, 0.956, 0.932,
#' 0.985, 0.974, 0.842, 0.978, 0.764, -0.596, 0.158,
#' -0.200, 0.190), 4, 4, byrow = FALSE)
#'
#' round(getChromSimMatCpp(r1, r2, "mean", "euclideanDist"), 3)
#' matrix(c(0.608, 0.614, 0.680, 0.434, 0.530, 0.742,
#' 0.659, 0.641, 0.520, 0.541, 0.563, 0.511, 0.298,
#' 0.375, 0.334, 0.355), 4, 4, byrow = FALSE)
#'
#' round(getChromSimMatCpp(r1, r2, "L2", "covariance"), 3)
#' matrix(c(0.025, 0.028, 0.027, 0.028, 0.032, 0.034,
#' 0.033, 0.034, 0.055, 0.051, 0.053, 0.051,
#' -0.004, 0.028, 0.012, 0.028), 4, 4, byrow = FALSE)
#'
#' round(getChromSimMatCpp(r1, r2, "L2", "correlation"), 3)
#' matrix(c(0.874, 0.999, 0.974, 0.999, 0.923, 0.986, 0.993,
#' 0.986, 0.991, 0.911, 0.990, 0.911, -0.065, 0.477,
#' 0.214, 0.477), 4, 4, byrow = FALSE)
#' @export
getChromSimMatCpp <- function(l1, l2, normalization, simType, cosAngleThresh = 0.3, dotProdThresh = 0.96) {
    .Call(`_DIAlignR_getChromSimMatCpp`, l1, l2, normalization, simType, cosAngleThresh, dotProdThresh)
}

#' Outputs a mask for constraining similarity matrix
#'
#' This function takes in timeVectors from both runs, global-fit mapped values
#' of end-points of first time vector and sample-length of window of no constraining.
#' Outside of window, all elements of matrix are either equally weighted or weighted
#' proportional to distance from window-boundry.
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#' ORCID: 0000-0003-3500-8152
#' License: (c) Author (2019) + MIT
#' Date: 2019-03-08
#' @param tA (numeric) A numeric vector. This vector has equally spaced timepoints of XIC A.
#' @param tB (numeric) A numeric vector. This vector has equally spaced timepoints of XIC B.
#' @param B1p (numeric) Timepoint mapped by global fit for tA[1].
#' @param B2p (numeric) Timepoint mapped by global fit for tA[length(tA)].
#' @param noBeef (integer) It defines the distance from the global fit, upto which no penalization is performed.\cr
#' The window length = 2*noBeef.
#' @param hardConstrain (logical) if false; indices farther from noBeef distance are filled with distance from linear fit line.
#' @return mask (matrix) A numeric matrix.
#' @examples
#' tA <- c(3353.2, 3356.6, 3360.0, 3363.5)
#' tB <- c(3325.9, 3329.3, 3332.7, 3336.1)
#' B1p <- 3325.751; B2p <- 3336.119
#' noBeef <- 1
#' mask <- getGlobalAlignMaskCpp(tA, tB, B1p, B2p, noBeef, FALSE)
#' round(mask, 3)
#' matrix(c(0.000, 0.000, 0.707, 1.414, 0.000, 0.000, 0.000, 0.707, 0.707, 0.000,
#' 0.000, 0.000, 1.414, 0.707, 0.000, 0.000), 4, 4, byrow = FALSE)
#' @export
getGlobalAlignMaskCpp <- function(tA, tB, B1p, B2p, noBeef = 50L, hardConstrain = FALSE) {
    .Call(`_DIAlignR_getGlobalAlignMaskCpp`, tA, tB, B1p, B2p, noBeef, hardConstrain)
}

#' Constrain similarity matrix with a mask
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#' ORCID: 0000-0003-3500-8152
#' License: (c) Author (2019) + MIT
#' Date: 2019-03-08
#' @param sim (matrix) A numeric matrix. Input similarity matrix.
#' @param MASK (matrix) A numeric matrix. Masked indices have non-zero values.
#' @param samples4gradient (numeric) This paarameter modulates penalization of masked indices.
#' @return s_new (matrix) A constrained similarity matrix.
#' @examples
#' sim <- matrix(c(-2, 10, -2, -2, -2, -2, 10, -2, 10, -2, -2, -2, -2, -2, -2, 10, 10, -2,-2, -2),
#'  4, 5, byrow = FALSE)
#' MASK <- matrix(c(0.000, 0.000, 0.707, 1.414, 0.000, 0.000, 0.000, 0.707, 0.707, 0.000,
#' 0.000, 0.000, 1.414, 0.707, 0, 0, 2.121, 1.414, 0, 0), 4, 5, byrow = FALSE)
#' constrainSimCpp(sim, MASK, 10)
#' matrix(c(-2, 10, -3.414, -4.828, -2, -2, 10, -3.414, 8.586, -2, -2, -2, -4.828,
#' -3.414, -2, 10, 5.758, -4.828, -2, -2), 4, 5, byrow = FALSE)
#' @export
constrainSimCpp <- function(sim, MASK, samples4gradient = 100.0) {
    .Call(`_DIAlignR_constrainSimCpp`, sim, MASK, samples4gradient)
}

#' Calculates gap penalty for dynamic programming based alignment.
#'
#' This function outputs base gap-penalty depending on SimType used. In case of getting base gap-penalty
#' from similarity matrix distribution, gapQuantile will be used to pick the value.
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#' ORCID: 0000-0003-3500-8152
#' License: (c) Author (2019) + MIT
#' Date: 2019-03-08
#' @param sim (matrix) A numeric matrix. Input similarity matrix.
#' @param SimType (char) A character string. Similarity type must be selected from (dotProductMasked, dotProduct, cosineAngle, cosine2Angle, euclideanDist, covariance, correlation).
#' @param gapQuantile (numeric) Must be between 0 and 1.
#' @return baseGapPenalty (numeric).
#' @examples
#' sim <- matrix(c(-12, 1.0, 12, -2.3, -2, -2, 1.07, -2, 1.80, 2, 22, 42, -2, -1.5, -2, 10), 4, 4,
#'  byrow = FALSE)
#' getBaseGapPenaltyCpp(sim, "dotProductMasked", 0.5) # -0.25
#' @export
getBaseGapPenaltyCpp <- function(sim, SimType, gapQuantile = 0.5) {
    .Call(`_DIAlignR_getBaseGapPenaltyCpp`, sim, SimType, gapQuantile)
}

#' Calculates area between signal-boundaries.
#'
#' This function sums all the intensities between left-index and right-index.
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#' ORCID: 0000-0003-3500-8152
#' License: (c) Author (2019) + MIT
#' Date: 2019-03-08
#' @param l1 (list) A list of time vectors.
#' @param l2 (list) A list of intensity vectors.
#' @param left (numeric) left boundary of the peak.
#' @param right (numeric) right boundary of the peak.
#' @param integrationType (string) method to ompute the area of a peak contained in XICs. Must be
#'  from "intensity_sum", "trapezoid", "simpson".
#' @param baselineType (string) method to estimate the background of a peak contained in XICs. Must be
#'  from "base_to_base", "vertical_division_min", "vertical_division_max".
#' @param fitEMG (logical) enable/disable exponentially modified gaussian peak model fitting.
#' @return area (numeric).
#' @examples
#' data("XIC_QFNNTDIVLLEDFQK_3_DIAlignR", package = "DIAlignR")
#' XICs <- XIC_QFNNTDIVLLEDFQK_3_DIAlignR[["run1"]][["14299_QFNNTDIVLLEDFQK/3"]]
#' l1 <- lapply(XICs, `[[`, 1)
#' l2 <- lapply(XICs, `[[`, 2)
#' areaIntegrator(l1, l2, left = 5203.7, right = 5268.5, "intensity_sum", "base_to_base", FALSE)
#' # 224.9373
#' @export
areaIntegrator <- function(l1, l2, left, right, integrationType, baselineType, fitEMG) {
    .Call(`_DIAlignR_areaIntegrator`, l1, l2, left, right, integrationType, baselineType, fitEMG)
}

#' Aligns MS2 extracted-ion chromatograms(XICs) pair.
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#' ORCID: 0000-0003-3500-8152
#' License: (c) Author (2019) + MIT
#' Date: 2019-03-08
#' @param l1 (list) A list of numeric vectors. l1 and l2 should have same length.
#' @param l2 (list) A list of numeric vectors. l1 and l2 should have same length.
#' @param alignType (char) A character string. Available alignment methods are "global", "local" and "hybrid".
#' @param tA (numeric) A numeric vector. This vector has equally spaced timepoints of XIC A.
#' @param tB (numeric) A numeric vector. This vector has equally spaced timepoints of XIC B.
#' @param normalization (char) A character string. Normalization must be selected from (L2, mean or none).
#' @param simType (char) A character string. Similarity type must be selected from (dotProductMasked, dotProduct, cosineAngle, cosine2Angle, euclideanDist, covariance, correlation).\cr
#' Mask = s > quantile(s, dotProdThresh)\cr
#' AllowDotProd= [Mask × cosine2Angle + (1 - Mask)] > cosAngleThresh\cr
#' s_new= s × AllowDotProd
#' @param B1p (numeric) Timepoint mapped by global fit for tA[1].
#' @param B2p (numeric) Timepoint mapped by global fit for tA[length(tA)].
#' @param noBeef (integer) It defines the distance from the global fit, upto which no penalization is performed.\cr
#' The window length = 2*noBeef.
#' @param goFactor (numeric) Penalty for introducing first gap in alignment. This value is multiplied by base gap-penalty.
#' @param geFactor (numeric) Penalty for introducing subsequent gaps in alignment. This value is multiplied by base gap-penalty.
#' @param cosAngleThresh (numeric) In simType = dotProductMasked mode, angular similarity should be higher than cosAngleThresh otherwise similarity is forced to zero.
#' @param OverlapAlignment (logical) An input for alignment with free end-gaps. False: Global alignment, True: overlap alignment.
#' @param dotProdThresh (numeric) In simType = dotProductMasked mode, values in similarity matrix higher than dotProdThresh quantile are checked for angular similarity.
#' @param gapQuantile (numeric) Must be between 0 and 1. This is used to calculate base gap-penalty from similarity distribution.
#' @param hardConstrain (logical) if false; indices farther from noBeef distance are filled with distance from linear fit line.
#' @param samples4gradient (numeric) This parameter modulates penalization of masked indices.
#' @param objType (char) A character string. Must be either light, medium or heavy.
#' @return affineAlignObj (S4class) A S4class object from C++ AffineAlignObj struct.
#' @examples
#' data(XIC_QFNNTDIVLLEDFQK_3_DIAlignR, package="DIAlignR")
#' XICs <- XIC_QFNNTDIVLLEDFQK_3_DIAlignR
#' data(oswFiles_DIAlignR, package="DIAlignR")
#' oswFiles <- oswFiles_DIAlignR
#' XICs.ref <- XICs[["run1"]][["14299_QFNNTDIVLLEDFQK/3"]]
#' XICs.eXp <- XICs[["run2"]][["14299_QFNNTDIVLLEDFQK/3"]]
#' tVec.ref <- XICs.ref[[1]][["time"]] # Extracting time component
#' tVec.eXp <- XICs.eXp[[1]][["time"]] # Extracting time component
#' B1p <- 4964.752
#' B2p <- 5565.462
#' noBeef <- 77.82315/3.414
#' l1 <- lapply(XICs.ref, `[[`, 2)
#' l2 <- lapply(XICs.eXp, `[[`, 2)
#' AlignObj <- alignChromatogramsCpp(l1, l2, alignType = "hybrid", tA = tVec.ref, tB = tVec.eXp,
#'  normalization = "mean", simType = "dotProductMasked", B1p = B1p, B2p = B2p, noBeef = noBeef,
#'  goFactor = 0.125, geFactor = 40, cosAngleThresh = 0.3, OverlapAlignment = TRUE,
#'  dotProdThresh = 0.96, gapQuantile = 0.5, hardConstrain = FALSE, samples4gradient = 100,
#'  objType = "light")
#' @export
alignChromatogramsCpp <- function(l1, l2, alignType, tA, tB, normalization, simType, B1p = 0.0, B2p = 0.0, noBeef = 0L, goFactor = 0.125, geFactor = 40, cosAngleThresh = 0.3, OverlapAlignment = TRUE, dotProdThresh = 0.96, gapQuantile = 0.5, hardConstrain = FALSE, samples4gradient = 100.0, objType = "heavy") {
    .Call(`_DIAlignR_alignChromatogramsCpp`, l1, l2, alignType, tA, tB, normalization, simType, B1p, B2p, noBeef, goFactor, geFactor, cosAngleThresh, OverlapAlignment, dotProdThresh, gapQuantile, hardConstrain, samples4gradient, objType)
}

#' Perform non-affine global and overlap alignment on a similarity matrix
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#' ORCID: 0000-0003-3500-8152
#' License: (c) Author (2019) + MIT
#' Date: 2019-03-08
#' @param sim (NumericMatrix) A numeric matrix with similarity values of two sequences or signals.
#' @param gap (double) Penalty for introducing gaps in alignment.
#' @param OverlapAlignment (logical) An input for alignment with free end-gaps. False: Global alignment, True: overlap alignment.
#' @return AlignObj (S4class) An object from C++ class of AlignObj.
#' @examples
#' # Get sequence similarity of two DNA strings
#' Match=10; MisMatch=-2
#' seq1 = "GCAT"; seq2 = "CAGTG"
#' s <- getSeqSimMatCpp(seq1, seq2, Match, MisMatch)
#' obj_Global <- doAlignmentCpp(s, 22, FALSE)
#' slot(obj_Global, "score") # -2 -4 -6 4 -18
#' obj_Olap <- doAlignmentCpp(s, 22, TRUE)
#' slot(obj_Olap, "score") # 0 10 20 18 18 18
#'
#' Match=1; MisMatch=-1
#' seq1 = "TTTC"; seq2 = "TGC"
#' s <- getSeqSimMatCpp(seq1, seq2, Match, MisMatch)
#' obj_Global <- doAlignmentCpp(s, 2, FALSE)
#' slot(obj_Global, "optionalPaths")
#' matrix(data = c(1,1,1,1,1,1,1,1,1,2,1,2,1,3,3,1,1,3,6,3), nrow = 5, ncol =4, byrow = TRUE)
#' slot(obj_Global, "M_forw")
#' matrix(data = c(0,-2,-4,-6,-2,-7,-22,-45,-4,-20,-72,-184,-6,-41,-178,-547,-8,-72,-366,-1274),
#'  nrow = 5, ncol =4, byrow = TRUE)
#' @export
doAlignmentCpp <- function(sim, gap, OverlapAlignment) {
    .Call(`_DIAlignR_doAlignmentCpp`, sim, gap, OverlapAlignment)
}

#' Perform affine global and overlap alignment on a similarity matrix
#'
#' @author Shubham Gupta, \email{shubh.gupta@mail.utoronto.ca}
#' ORCID: 0000-0003-3500-8152
#' License: (c) Author (2019) + MIT
#' Date: 2019-03-08
#' @param sim (NumericMatrix) A numeric matrix with similarity values of two sequences or signals.
#' @param go (numeric) Penalty for introducing first gap in alignment.
#' @param ge (numeric) Penalty for introducing subsequent gaps in alignment.
#' @param OverlapAlignment (logical) An input for alignment with free end-gaps. False: Global alignment, True: overlap alignment.
#' @return affineAlignObj (S4class) An object from C++ class of AffineAlignObj.
#' @examples
#' # Get sequence similarity of two DNA strings
#' Match=10; MisMatch=-2
#' seq1 = "GCAT"; seq2 = "CAGTG"
#' s <- getSeqSimMatCpp(seq1, seq2, Match, MisMatch)
#' objAffine_Global <- doAffineAlignmentCpp(s, 22, 7, FALSE)
#' slot(objAffine_Global, "score") # -2  -4  -6  4 -18
#' objAffine_Olap <- doAffineAlignmentCpp(s, 22, 7, TRUE)
#' slot(objAffine_Olap, "score") # 0 10 20 18 18 18
#'
#' Match=10; MisMatch=-2
#' seq1 = "CAT"; seq2 = "CAGTG"
#' s <- getSeqSimMatCpp(seq1, seq2, Match, MisMatch)
#' objAffine_Global <- doAffineAlignmentCpp(s, 22, 7, FALSE)
#' slot(objAffine_Global, "score") # 10  20  -2  -9 -11
#' objAffine_Olap <- doAffineAlignmentCpp(s, 22, 7, TRUE)
#' slot(objAffine_Olap, "score") # 10 20 18 18 18
#'
#' Match=10; MisMatch=-2
#' seq1 = "CA"; seq2 = "AG"
#' s <- getSeqSimMatCpp(seq1, seq2, Match, MisMatch)
#' objAffine_Global <- doAffineAlignmentCpp(s, 22, 7, FALSE)
#' slot(objAffine_Global, "simScore_forw") # -4
#' @export
doAffineAlignmentCpp <- function(sim, go, ge, OverlapAlignment) {
    .Call(`_DIAlignR_doAffineAlignmentCpp`, sim, go, ge, OverlapAlignment)
}

Try the DIAlignR package in your browser

Any scripts or data that you put into this service are public.

DIAlignR documentation built on Nov. 8, 2020, 8:22 p.m.