Nothing
# Maximum parsimony ansestral state reconstruction for discrete traits.
# Modification of Sankoff algorithm for reconstructing discrete ancestral states (Weighted Small Parsimony Problem)
# Sankoff's algorithm allows the inclusion of a cost matrix:
# transition_costs[i,j] is the cost of transitioning i-->j (ignoring edge length)
# If transition_costs is "all_equal", then all transitions are penalized equally (same as if transition_costs[i,j] = 1-delta_{ij})
# If transition_costs is "sequential", then only single-step transitions (i-->i+1) are allowed, and all are penalized equally
# If transition_costs is "proportional", then all transition are allowed but they are penalized proportionally to the number of steps.
# The modification of this function is that optionally, edge lengths can be used to weight the transition costs:
# Longer edges imply smaller transition costs between states
# Specifically, the cost of transitioning is transition_cost[i,j]/(edge_length^edge_exponent)
# where edge_exponent can be e.g. 0 (Sankoff's original algorithm), 1 (linear weighting) or 0.5 (square-root weighting, corresponding to a Brownian motion)
# Requirements:
# Tree can be multifurcating, and can also include nodes with a single child
# Tree must be rooted.
# If (edge_exponent>0) then: All edges must have length > 0
# For a description of the original Sankoff algorithm, see:
# http://telliott99.blogspot.ca/2010/03/fitch-and-sankoff-algorithms-for.html
# (page 11) https://cs.brown.edu/courses/csci1950-z/slides/CSCI1950ZFall09_Lecture2.pdf
# The function returns ancestral state probabilities as a (non-flattened) NumericMatrix of size Nnodes x Nstates.
asr_max_parsimony = function( tree,
tip_states, # integer vector of size Ntips
Nstates = NULL,
transition_costs = "all_equal",
edge_exponent = 0,
weight_by_scenarios = TRUE,
check_input = TRUE){
Ntips = length(tree$tip.label)
Nedges = nrow(tree$edge)
# basic error checking
if(length(tip_states)!=Ntips) stop(sprintf("ERROR: Length of tip_states (%d) is not the same as the number of tips in the tree (%d)",length(tip_states),Ntips));
if(!is.numeric(tip_states)) stop(sprintf("ERROR: tip_states must be integers"))
if(is.null(Nstates)) Nstates = max(tip_states);
if(check_input){
min_tip_state = min(tip_states)
max_tip_state = max(tip_states)
if((min_tip_state<1) || (max_tip_state>Nstates)) stop(sprintf("ERROR: tip_states must be integers between 1 and %d, but found values between %d and %d",Nstates,min_tip_state,max_tip_state))
if((!is.null(names(tip_states))) && any(names(tip_states)!=tree$tip.label)) stop("ERROR: Names in tip_states and tip labels in tree don't match (must be in the same order).")
}
# construct transition_costs matrix if needed
if(is.character(transition_costs)){
if(transition_costs=="all_equal"){
# all transitions penalized equally
transition_costs = matrix(1, nrow=Nstates, ncol=Nstates)
}else if(transition_costs=="sequential"){
# only single-step transitions are allowed, and all are penalized equally
transition_costs = matrix(Inf, nrow=Nstates, ncol=Nstates)
for(i in 1:Nstates){
if(i<Nstates) transition_costs[i,i+1] = 1
if(i>1) transition_costs[i,i-1] = 1
}
}else if(transition_costs=="proportional"){
# all transitions are allowed, but penalized proportional to the number of steps
transition_costs = matrix(0, nrow=Nstates, ncol=Nstates)
for(i in 1:Nstates){
transition_costs[i,] = sapply(1:Nstates, function(j) abs(j-i))
}
}else if(transition_costs=="exponential"){
# all transitions are allowed, but penalized exponentially to the number of steps
transition_costs = matrix(0, nrow=Nstates, ncol=Nstates)
for(i in 1:Nstates){
transition_costs[i,] = sapply(1:Nstates, function(j) exp(abs(j-i)))
}
}else{
stop(sprintf("ERROR: Uknown transition_costs '%s'",transition_costs));
}
diag(transition_costs) = 0; # no cost for staying in the same state
}else{
if(nrow(transition_costs)!=Nstates || ncol(transition_costs)!=Nstates) stop(sprintf("ERROR: Transition costs has wrong size (%d x %d), expected %d x %d",nrow(transition_costs),ncol(transition_costs),Nstates,Nstates));
if(check_input){
if(any(transition_costs<0)) stop(sprintf("ERROR: Some transition costs are negative (found value %g)",min(transition_costs)))
if(any(diag(transition_costs)!=0)) stop(sprintf("ERROR: The diagonal of transition_costs includes non-zero values, which makes no sense in a maximum-parsimony model"))
if((edge_exponent!=0) && (!is.null(tree$edge.length)) && (any(tree$edge.length==0))) stop(sprintf("ERROR: edge_exponent is non-zero, but some edges in the tree have zero length"))
}
}
results = WMPR_ASR_CPP( Ntips = Ntips,
Nnodes = tree$Nnode,
Nedges = Nedges,
Nstates = Nstates,
tree_edge = as.vector(t(tree$edge)) - 1, # flatten in row-major format and adjust clade indices to 0-based
edge_length = (if(is.null(tree$edge.length)) numeric() else tree$edge.length),
tip_states = tip_states-1,
transition_costs = as.vector(t(transition_costs)),
branch_length_exponent = edge_exponent,
weight_posteriors_by_scenario_counts = weight_by_scenarios, # (INPUT) if true, then the posterior_probability of a state (in a specific node) is proportional to the number of scenarios in which the trait is at that state
verbose = FALSE,
verbose_prefix = "");
return(list(success = TRUE,
ancestral_likelihoods = results$posterior_probabilities,
scenario_counts = matrix(results$scenario_counts, ncol=Nstates, byrow=TRUE),
ancestral_states = sapply(seq_len(tree$Nnode), FUN=function(node) which.max(results$posterior_probabilities[node,])), # maximum-likelihood ancestral states
total_cost = results$best_root_cost));
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.