#' Get f
#'
#' @description Obtain f - probability of any configuration satisfying the hypothesis of
#' convergent evolution under the constraint of the given tree
#'
#' @import Biostrings
#'
#' @param tree a phylogenetic tree
#' @param phydat a phyDat object containing the relevant alignment
#' @param spe1 The name of species 1
#' @param spe2 The name of species 2
#' @param p Probability of occurence for any AA (default 1/20)
#' @param type type of analysis: absolute ("abs") or by score ("score")
#' @param threshold minimum threshold for score option
#'
#' @return probability (f)
#'
getf <- function(tree, phydat, spe1, spe2, p=(1/20), type=c("abs", "score"), threshold=NA){
alph <- Biostrings::AA_ALPHABET[1:26]
ancNode = getMostRecentCommonAncestor(tree, spe1, spe2)
b1 = getBranchLength(tree, spe1, ancNode)
b2 = getBranchLength(tree, spe2, ancNode)
total = 0
count = 0
if (type=="abs"){
for (aa in alph){
x = aa
y = aa
# Sum over all conditions where x = y and neither are equal to an ancestral value
# Formula given by:
# J. Zhang and S. Kumar, \Detection of convergent and parallel evolution at the amino acid
# sequence level,"Mol. Biol. Evol., vol. 14, no. 5, pp. 527{536, 1997.
for (anc in alph[which(alph != aa)]){
total = total + probOfSiteConfig(tree, phydat, spe1, spe2, anc=anc, x = x, y=y)
}
}
} else if (type=="score"){
for (x in alph){
for (y in alph){
for (anc in alph){
# We count the total number of possibilities
count = count + 1
print(x)
print(y)
print(anc)
if(x%in%colnames(BLOSUM62) && y%in%colnames(BLOSUM62) && anc%in%colnames(BLOSUM62)){
score = BLOSUM62[which(colnames(BLOSUM62) == x), which(rownames(BLOSUM62) == y)]
- BLOSUM62[which(colnames(BLOSUM62) == x), which(rownames(BLOSUM62) == anc)]
} else {
warning("Warning: Amino Acid outside BLOSUM62 matrix.")
score = -100000
}
if (is.na(threshold)){
print("Please supply threshold.")
stop()
}
print(score)
# We count the # of times that a given configuration satisfies our threshold
if (score > threshold){
total = total + 1
}
}
}
}
# Total probability
return (total/count)
}
return (total)
}
#' probOfSiteConfig
#'
#' Calculate the probability of the given configuration
#'
#' @param tree A phylogenetic tree
#' @param phydat An object of class phydat.
#' @param spe1 The name of species 1
#' @param spe2 The name of species 2
#' @param pos The position at which to evaluate if conditions are satisfied
#' @param p The fraction of the amino acid at the target position in the evaluated genome. (Default is 1/20)
#' @param anc ancestral AA value (for testing or internal input)
#' @param x AA value of spe1 (for testing or internal input)
#' @param y AA value of spe2 (for testing or internal input)
#'
#' @return Probability of given configuration
#' @examples
#' probOfSiteConfig(tree, primates, "Human", "Chimp", pos=1)
#' @export
probOfSiteConfig <- function(tree, phydat, spe1, spe2, pos, p=(1/20), anc, x, y){
if (spe1 == spe2){
stop("Species cannot be the same.")
}
# x,y,z arguments provided for testing purposes. Normally we will always calculate them
# using the alignment.
if(missing(x)){
ancM <- getAlignment(tree, phydat, spe1, spe2)
# x = position at pos in the sequence for spe1
x = toString(ancM[1][[1]][pos])
}
if(missing(y)){
ancM <- getAlignment(tree, phydat, spe1, spe2)
# y = position at pos in the sequence for spe2
y = toString(ancM[2][[1]][pos])
}
if (missing(anc)){
# anc gets reconstructed.
ancM <- getAlignment(tree, phydat, spe1, spe2)
# anc = position at pos in the sequence for ancestrally reconstructed
anc = toString(ancM[3][[1]][pos])
}
ancNode <- getMostRecentCommonAncestor(tree, spe1, spe2)
b1 <- getBranchLength(tree, spe1, ancNode)
b2 <- getBranchLength(tree, spe2, ancNode)
prob1 <- probOfChange(pam, x, anc, b1)
prob2 <- probOfChange(pam, y, anc, b2)
# Total probability of: anc occuring in the first place (p)
# AND anc changing to x over a branch length of b1
# AND anc changing to y over a branch length of b2
totalProb = p * prob1 * prob2
return (totalProb)
}
#' Calculate probability of N chance 'convergent' sites
#'
#' Calculate the probability that n sites between the two species will satisfy the conditions of convergent evolution by chance.
#'
#' @importFrom ape is.binary
#'
#' @param tree A phylogenetic tree
#' @param phydat PhyDat object containing corresponding alignment information
#' @param spe1 The name of species 1
#' @param spe2 The name of species 2
#' @param m The length of the genes. (Must of equal length)
#' @param n The number of potential convergent sites
#' @param p The fraction of the amino acid at the target position in the evaluated genome.
#' (Default is 1/20)
#' @param t threshold for score option
#' @param type Type of analysis: 'abs' for basic model or 'score' for by convergence score model
#'
#' @return Scaled likelihood that the amino acids satisfying the conditions of convergent evolution is by chance.
#'
#' @examples
#' \dontrun{
#' probOfNSitesByChance(tree, "Human", "Chimp", 20, 2)
#' }
#' @export
probOfNSitesByChance <- function(tree, phydat, spe1, spe2, m, n, p=(1/20), t=NA, type=c("abs", "score")){
#If we have 0 sites, obviously return 0.
if (n == 0){
return (0)
}
if (type == "abs"){
f = getf(tree, phydat, spe1, spe2, p, type)
prob = 0
#Formula for probability of n convergent sites in m from:
# J. Zhang and S. Kumar, \Detection of convergent and parallel evolution at the amino acid
# sequence level,"Mol. Biol. Evol., vol. 14, no. 5, pp. 527{536, 1997.
for (i in 1:n-1){
prob = prob + ( factorial(m) / factorial(i)*factorial(m-i) ) * f**i * (1-f)**(m-i)
}
return (1 - prob)
} else if (type == "score"){
f = getf(tree, phydat, spe1, spe2, p, type="score", threshold=t)
#If by score:
# Probability equals the probability of
prob = (m - n)*(1 - f) + f*n
return (prob)
}
print("Please select type: either abs or score.")
stop()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.