RNAs and Their Interactions

Documented in calcParProtSeqSim calcTwoProtSeqSim

.calcSeqPairSim = function (twoid, protlist = protlist, type = type, submat = submat) {

    id1 = twoid[1]
    id2 = twoid[2]

    if ( protlist[[id1]] == '' | protlist[[id2]] == '' ) {

        sim = 0L

        } else {

            s1  = try(Biostrings::AAString(protlist[[id1]]), silent = TRUE)
            s2  = try(Biostrings::AAString(protlist[[id2]]), silent = TRUE)
            s12 = try(Biostrings::pairwiseAlignment(s1, s2, type = type, substitutionMatrix = submat, scoreOnly = TRUE), silent = TRUE)
            s11 = try(Biostrings::pairwiseAlignment(s1, s1, type = type, substitutionMatrix = submat, scoreOnly = TRUE), silent = TRUE)
            s22 = try(Biostrings::pairwiseAlignment(s2, s2, type = type, substitutionMatrix = submat, scoreOnly = TRUE), silent = TRUE)

            if ( is.numeric(s12) == FALSE | is.numeric(s11) == FALSE | is.numeric(s22) == FALSE ) {
                sim = 0L
                } else if ( abs(s11) < .Machine$double.eps | abs(s22) < .Machine$double.eps ) {
                    sim = 0L
                    } else {
                        sim = s12/sqrt(s11 * s22)
                    }

        }

    return(sim)

}

#' Parallellized Protein Sequence Similarity Calculation based on Sequence Alignment
#'
#' Parallellized Protein Sequence Similarity Calculation based on Sequence Alignment
#'
#' This function implemented the parallellized version for calculating 
#' protein sequence similarity based on sequence alignment.
#' 
#' @param protlist A length \code{n} list containing \code{n} protein sequences, 
#' each component of the list is a character string, 
#' storing one protein sequence. Unknown sequences should be represented as 
#' \code{''}.
#' @param cores Integer. The number of CPU cores to use for parallel execution, 
#'        default is \code{2}. Users could use the \code{detectCores()} function
#'        in the \code{parallel} package to see how many cores they could use.
#' @param type Type of alignment, default is \code{'local'}, 
#' could be \code{'global'} or \code{'local'}, 
#' where \code{'global'} represents Needleman-Wunsch global alignment; 
#' \code{'local'} represents Smith-Waterman local alignment.
#' @param submat Substitution matrix, default is \code{'BLOSUM62'}, 
#' could be one of \code{'BLOSUM45'}, \code{'BLOSUM50'}, \code{'BLOSUM62'}, 
#' \code{'BLOSUM80'}, \code{'BLOSUM100'}, \code{'PAM30'}, \code{'PAM40'}, 
#' \code{'PAM70'}, \code{'PAM120'}, \code{'PAM250'}.
#' 
#' @return A \code{n} x \code{n} similarity matrix.
#' 
#' @keywords Needleman-Wunsch Smith-Waterman local global 
#' sequence alignment parallel similarity calcParProtSeqSim
#'
#' @aliases calcParProtSeqSim
#' 
#' @author Min-feng Zhu <\email{wind2zhu@@163.com}>, 
#'         Nan Xiao <\url{http://r2s.name}>
#' 
#' @seealso See \code{calcTwoProtSeqSim} for protein sequence alignment 
#' for two protein sequences. See \code{\link{calcParProtGOSim}} for 
#' protein similarity calculation based on 
#' Gene Ontology (GO) semantic similarity.
#' 
#' @export calcParProtSeqSim
#' 
#' @examples
#' s1 = readFASTA(system.file('protseq/P00750.fasta', package = 'BioMedR'))[[1]]
#' s2 = readFASTA(system.file('protseq/P08218.fasta', package = 'BioMedR'))[[1]]
#' s3 = readFASTA(system.file('protseq/P10323.fasta', package = 'BioMedR'))[[1]]
#' s4 = readFASTA(system.file('protseq/P20160.fasta', package = 'BioMedR'))[[1]]
#' s5 = readFASTA(system.file('protseq/Q9NZP8.fasta', package = 'BioMedR'))[[1]]
#' plist = list(s1, s2, s3, s4, s5)
#' psimmat = calcParProtSeqSim(plist, cores = 2, type = 'local', submat = 'BLOSUM62')
#' 

calcParProtSeqSim = function (protlist, cores = 2, 
                              type = 'local', submat = 'BLOSUM62') {

    doParallel::registerDoParallel(cores)

    # generate lower matrix index
    idx = combn(1:length(protlist), 2)

    # then use foreach parallelization
    # input is all pair combination

    seqsimlist = vector('list', ncol(idx))

    seqsimlist <- foreach (i = 1:length(seqsimlist), .errorhandling = 'pass') %dopar% {
        tmp <- .calcSeqPairSim(rev(idx[, i]), protlist = protlist, type = type, submat = submat)
    }

    # convert list to matrix
    seqsimmat = matrix(0, length(protlist), length(protlist))
    for (i in 1:length(seqsimlist)) seqsimmat[idx[2, i], idx[1, i]] = seqsimlist[[i]]
    seqsimmat[upper.tri(seqsimmat)] = t(seqsimmat)[upper.tri(t(seqsimmat))]
    diag(seqsimmat) = 1

    return(seqsimmat)

}

#' Protein Sequence Alignment for Two Protein Sequences
#'
#' Protein Sequence Alignment for Two Protein Sequences
#'
#' This function implements the sequence alignment between two protein sequences.
#' 
#' @param seq1 A character string, containing one protein sequence.
#' @param seq2 A character string, containing another protein sequence.
#' @param type Type of alignment, default is \code{'local'}, 
#' could be \code{'global'} or \code{'local'}, 
#' where \code{'global'} represents Needleman-Wunsch global alignment; 
#' \code{'local'} represents Smith-Waterman local alignment.
#' @param submat Substitution matrix, default is \code{'BLOSUM62'}, 
#' could be one of \code{'BLOSUM45'}, \code{'BLOSUM50'}, \code{'BLOSUM62'}, 
#' \code{'BLOSUM80'}, \code{'BLOSUM100'}, \code{'PAM30'}, \code{'PAM40'}, 
#' \code{'PAM70'}, \code{'PAM120'}, \code{'PAM250'}.
#' 
#' @return An Biostrings object containing the scores and other 
#' alignment information.
#' 
#' @keywords Needleman-Wunsch Smith-Waterman local global 
#' sequence alignment parallel similarity calcTwoProtSeqSim
#'
#' @aliases calcTwoProtSeqSim
#' 
#' @author Min-feng Zhu <\email{wind2zhu@@163.com}>, 
#'         Nan Xiao <\url{http://r2s.name}>
#' 
#' @seealso See \code{\link{calcParProtSeqSim}} for paralleled pairwise 
#' protein similarity calculation based on sequence alignment.
#' See \code{\link{calcTwoProtGOSim}} for calculating the 
#' GO semantic similarity between two groups of GO terms or two Entrez gene IDs.
#' 
#' @export calcTwoProtSeqSim
#' 
#' @examples
#' s1 = readFASTA(system.file('protseq/P00750.fasta', package = 'BioMedR'))[[1]]
#' s2 = readFASTA(system.file('protseq/P10323.fasta', package = 'BioMedR'))[[1]]
#' seqalign = calcTwoProtSeqSim(s1, s2)
#' summary(seqalign)
#' 

calcTwoProtSeqSim = function (seq1, seq2, type = 'local', submat = 'BLOSUM62') {

    # sequence alignment for two protein sequences
    s1  = try(Biostrings::AAString(seq1), silent = TRUE)
    s2  = try(Biostrings::AAString(seq2), silent = TRUE)
    s12 = try(Biostrings::pairwiseAlignment(s1, s2, type = type, 
                                            substitutionMatrix = submat), 
              silent = TRUE)

    return(s12)

}

Any scripts or data that you put into this service are public.

BioMedR documentation built on Nov. 17, 2017, 10:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

BioMedR
Generating Various Molecular Representations for Chemicals, Proteins, DNAs/RNAs and Their Interactions

R/904-calcProtSeqSim.R
In BioMedR: Generating Various Molecular Representations for Chemicals, Proteins, DNAs/RNAs and Their Interactions

Defines functions calcSeqPairSim calcParProtSeqSim calcTwoProtSeqSim

Documented in calcParProtSeqSim calcTwoProtSeqSim

Try the BioMedR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BioMedR Generating Various Molecular Representations for Chemicals, Proteins, DNAs/RNAs and Their Interactions

R/904-calcProtSeqSim.R In BioMedR: Generating Various Molecular Representations for Chemicals, Proteins, DNAs/RNAs and Their Interactions

Defines functions calcSeqPairSim calcParProtSeqSim calcTwoProtSeqSim

Documented in calcParProtSeqSim calcTwoProtSeqSim

Try the BioMedR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BioMedR
Generating Various Molecular Representations for Chemicals, Proteins, DNAs/RNAs and Their Interactions

R/904-calcProtSeqSim.R
In BioMedR: Generating Various Molecular Representations for Chemicals, Proteins, DNAs/RNAs and Their Interactions