Nothing
AICc<-function(n,k,LogLik){
return(2*n*k/(n-k-1)+2*LogLik)
}
AkaikeWeight<-function(Delta.AICc.Array){
return(exp(-Delta.AICc.Array/2) /sum(exp(-Delta.AICc.Array/2) ))
}
# se.function<-function(cov.matrix,var.name){
# name.Index<-which(rownames(cov.matrix)==var.name)
# if( length(name.Index)==1){
# return( cov.matrix[name.Index,name.Index])
# }else{return(0)}
# }
#
# var.model.Index.function<-
# function(cov.matrix,var.name){
# name.Index<-which(rownames(cov.matrix)==var.name)
# if( length(name.Index)==1){
# return( cov.matrix[name.Index,name.Index])
# }else{return(0)}
# }
#
# weight.para.value<-
# function(para.vect,weight){
# return(sum(para.vect*weight))
# }
#
####################################################################
###################### MAIN PROGRAM ################################
####################################################################
#data is vector with names() matching taxon names
#phy is an ape phylo object
#flow is a data.frame with four columns
#donor = the taxon that is the gene flow source
#recipient = the taxon that is the gene flow recipient
#m = the fraction of the recipient trait that comes from the source. In the case of an equal hybridization between the recipient's sister on the tree and the donor, this is 0.5. In other cases where only, say, 10% of the recipient's quantitative trait
# loci come from the donor, it would be 0.1
#time.from.root.donor = the time, counting forward FROM THE ROOT, when the gene flow happened from the donor. It may not be the same as time.from.root.recipient, as it may have spent time in a now extinct ghost lineage first (though time.from.root.donor <= time.from.root.recipient). It's treated as a one time event, which makes sense in the case of a single allopolyploid speciation event, probably less so in the case
# of ongoing gene flow. Too bad.
#time.from.root.recipient = the time, counting forward FROM THE ROOT, when the gene flow happened from the donor
#If the gene flow happened to or from a lineage with multiple descendant species, use one row for each pair. For example, if lineage (A,B) had 20% of their genes coming in from lineage (C,D,E) at 14.5 MY since the root (not back in time), you would have
# a flow data.frame of
#donor recipient m time.from.root.donor time.from.root.recipient
#C A 0.2 14.5 14.5
#D A 0.2 14.5 14.5
#E A 0.2 14.5 14.5
#C B 0.2 14.5 14.5
#D B 0.2 14.5 14.5
#E B 0.2 14.5 14.5
#We may write a utility function for dealing with this case in the future.
#Note the use of all updates of V.modified based on V.original; we don't want to add v_h to A three different times, for example, for one migration event (so we replace the variance three times based on transformations of the original variance)
#Note that we do not assume an ultrametric tree
BMhyd <- function(data, phy, flow, opt.method="Nelder-Mead", models=c(1,2,3,4), verbose=TRUE, get.se=TRUE, plot.se=TRUE, store.sims=FALSE, precision=2, auto.adjust=FALSE, likelihood.precision=0.001, allow.extrapolation=FALSE) {
if(min(flow$m)<0) {
stop("Min value of flow is too low; should be between zero and one")
}
if(max(flow$m)>1) {
stop("Max value of flow is too high; should be between zero and one")
}
results<-list()
#hessians <- list()
results.summary <-data.frame()
phy.geiger.friendly <- phy #geiger can't handle branch lengths near zero. Let's lengthen them if needed
if(min(phy.geiger.friendly$edge.length)<0.00001) {
phy.geiger.friendly$edge.length <- phy.geiger.friendly$edge.length + 0.00001
}
if(auto.adjust) {
phy <- AdjustForDet(phy)
}
all.sims<-list()
if(verbose) {
print("Getting starting values from Geiger")
}
starting.from.geiger<-fitContinuous(phy.geiger.friendly, data, model="BM", SE=data*NA, ncores=1)$opt
starting.values <- c(starting.from.geiger$sigsq, starting.from.geiger$z0, 1, 0, starting.from.geiger$SE) #sigma.sq, mu, beta, vh, SE
if(verbose) {
print("Done getting starting values")
}
for (model.index in sequence(length(models))) {
step.count <- 0
if(verbose) {
print(paste("Starting model", model.index, "of", length(models)))
}
free.parameters<-rep(TRUE, 5)
names(free.parameters) <- c("sigma.sq", "mu", "bt", "vh", "SE")
model <- models[model.index]
if(model==1) {
free.parameters[which(names(free.parameters)=="bt")]<-FALSE
}
if(model==2) {
free.parameters[which(names(free.parameters)=="vh")]<-FALSE
}
if(model==3) {
free.parameters[which(names(free.parameters)=="bt")]<-FALSE
free.parameters[which(names(free.parameters)=="vh")]<-FALSE
}
best.run <- optim(par=starting.values[free.parameters], fn=CalculateLikelihood, method=opt.method, hessian=FALSE, data=data, phy=phy, flow=flow, actual.params=free.parameters[which(free.parameters)], precision=precision, allow.extrapolation=allow.extrapolation)
if(verbose) {
results.vector<-c(step.count, best.run$value, best.run$par)
names(results.vector) <- c("step","negloglik", names(free.parameters[which(free.parameters)]))
print(results.vector)
}
#this is to continue optimizing; we find that optim is too lenient about when it accepts convergence
times.without.improvement <- 0
while(times.without.improvement<10) {
times.without.improvement <- times.without.improvement+1
new.run <- best.run
if(times.without.improvement%%4==0) {
new.run <- optim(par=best.run$par, fn=CalculateLikelihood, method=opt.method, hessian=FALSE, data=data, phy=phy, flow=flow, actual.params=free.parameters[which(free.parameters)], precision=precision, allow.extrapolation=allow.extrapolation)
} else {
new.run <- optim(par=GenerateValues(best.run$par, lower=c(0, -Inf, 0, 0, 0)[which(free.parameters)], upper=rep(Inf, sum(free.parameters)), examined.max=10*best.run$par, examined.min=0.1*best.run$par), fn=CalculateLikelihood, method=opt.method, hessian=FALSE, data=data, phy=phy, flow=flow, actual.params=free.parameters[which(free.parameters)], precision=precision, allow.extrapolation=allow.extrapolation)
}
#print("new.run best.run")
#print(c(new.run$value, best.run$value))
if(new.run$value<best.run$value) {
if(best.run$value - new.run$value > likelihood.precision) {
times.without.improvement <- 0
if(verbose) {
print("New improvement found, resetting step counter")
}
} else {
if(verbose) {
print("New improvement found, but slight; taking the best value, but not resetting the step counter")
}
}
best.run <- new.run
}
if(verbose) {
step.count <- step.count+1
results.vector<-c(step.count, times.without.improvement, best.run$value, best.run$par)
names(results.vector) <- c("step", "steps.without.improvement","negloglik", names(free.parameters[which(free.parameters)]))
print(results.vector)
}
}
results[[model.index]] <- best.run
#try(hessians[[model.index]] <- hessian(func=CalculateLikelihood, x=new.run$par, data=data, phy=phy, flow=flow, actual.params=free.parameters[which(free.parameters)]))
results.vector.full <- c(NA, NA, 1, 0, 0)
names(results.vector.full) <- names(free.parameters)
names(best.run$par) <- names(free.parameters[which(free.parameters)])
for (i in sequence(length(best.run$par))) {
results.vector.full[which(names(results.vector.full)==names(best.run$par)[i])] <- best.run$par[i]
}
#print(hessians[[model.index]])
#try(print(solve(hessians[[model.index]])))
ci.vector<-rep(NA,10)
for(parameter in sequence(5)) {
names(ci.vector)[1+2*(parameter-1)] <- paste(names(free.parameters)[parameter],"lower", sep=".")
names(ci.vector)[2+2*(parameter-1)] <- paste(names(free.parameters)[parameter],"upper", sep=".")
}
if(get.se) {
if(verbose) {
print("Now doing simulation to estimate parameter uncertainty")
}
interval.results <- AdaptiveConfidenceIntervalSampling(best.run$par, fn=CalculateLikelihood, lower=c(0, -Inf, 0, 0, 0)[which(free.parameters)], data=data, phy=phy, flow=flow, actual.params=free.parameters[which(free.parameters)], allow.extrapolation=allow.extrapolation)
interval.results.in <- interval.results[which(interval.results[,1]-min(interval.results[,1])<=2),]
interval.results.out <- interval.results[which(interval.results[,1]-min(interval.results[,1])>2),]
if(plot.se) {
pdf(file=paste("Model",models[model.index], "_uncertainty_plot.pdf", sep=""), height=5, width=5*sum(free.parameters))
par(mfcol=c(1, sum(free.parameters)))
for(parameter in sequence(sum(free.parameters))) {
plot(x=interval.results[,parameter+1], y=interval.results[,1], type="n", xlab=names(free.parameters[which(free.parameters)])[parameter], ylab="NegLnL", bty="n", ylim=c(min(interval.results[,1]), min(interval.results[,1])+10))
points(x=interval.results.in[,parameter+1], y=interval.results.in[,1], pch=16, col="black")
points(x=interval.results.out[,parameter+1], y=interval.results.out[,1], pch=16, col="gray")
points(x= best.run$par[parameter], y= best.run$value, pch=1, col="red", cex=1.5)
}
dev.off()
if(verbose) {
print(paste("Uncertainty plot has been saved in Model",models[model.index], "_uncertainty_plot.pdf in ", getwd(), sep=""))
}
}
if(store.sims) {
colnames(interval.results) <- c("neglnL", names(free.parameters)[which(free.parameters)])
all.sims[[model.index]]<-interval.results
}
free.index=0
for(parameter in sequence(5)) {
if(free.parameters[parameter]) { #is estimated
free.index <- free.index + 1
ci.vector[1+2*(parameter-1)] <- min(interval.results.in[,free.index+1])
ci.vector[2+2*(parameter-1)] <- max(interval.results.in[,free.index+1])
} else {
ci.vector[1+2*(parameter-1)] <- results.vector.full[parameter]
ci.vector[2+2*(parameter-1)] <- results.vector.full[parameter]
}
}
}
local.df <- data.frame(matrix(c(model.index, results.vector.full, AICc(Ntip(phy),k=length(free.parameters[which(free.parameters)]), best.run$value), best.run$value, length(free.parameters[which(free.parameters)]), ci.vector), nrow=1))
colnames(local.df) <- c("Model", names(results.vector.full), "AICc", "NegLogL", "K", names(ci.vector))
print(local.df)
results.summary <- rbind(results.summary, local.df)
}
results.summary <- cbind(results.summary, deltaAICc=results.summary$AICc-min(results.summary$AICc))
results.summary<-cbind(results.summary, AkaikeWeight = AkaikeWeight(results.summary$deltaAICc))
if(store.sims) {
return(list(results=results.summary, sims=all.sims))
}
return(results.summary)
}
DetPass <- function(phy) {
det.pass <- TRUE
vcv.result <- vcv.phylo(phy)
det.tries <- c(det(vcv.result), det(1000*vcv.result), det(0.0001*vcv.result))
if(min(det.tries)<0) {
det.pass <- FALSE
}
if(sum(is.finite(det.tries))!=length(det.tries)) {
det.pass <- FALSE
}
return(det.pass)
}
AdjustForDet <- function(phy, max.attempts=100) {
attempts<-0
if(!DetPass(phy)) {
warning("Determininant of the phylogeny was difficulty to calculate, so the phylogeny needed to be adjusted. Your results may be approximate as a result")
while(!DetPass(phy) && attempts<=max.attempts) {
attempts <- attempts+1
phy$edge.length <- phy$edge.length+0.00001*attempts
}
}
if(attempts>max.attempts) {
stop("Phylogeny could not be adjusted adequately")
}
return(phy)
}
GetVModified <- function(x, phy, flow, actual.params) {
bt <- 1
vh <- 0
sigma.sq <- x[1]
mu <- x[2]
SE <- x[length(x)]
bt.location <- which(names(actual.params)=="bt")
if(length(bt.location)==1) {
bt<-x[bt.location]
}
vh.location <- which(names(actual.params)=="vh")
if(length(vh.location)==1) {
vh<-x[vh.location]
}
times.original <-vcv.phylo(phy, model="Brownian") #is the initial one based on the tree alone, so just time
V.original <- sigma.sq * times.original
V.modified <- V.original
for (flow.index in sequence(dim(flow)[1])) {
recipient.index <- which(rownames(V.modified)==flow$recipient[flow.index])
if(length(recipient.index)!=1) {
stop(paste("Tried to find ", flow$recipient[flow.index], " but instead found ", paste(rownames(V.modified)[recipient.index], sep=" ", collapse= " "), "; make sure the taxon names in the flow dataframe recipient match that of your tree", sep=""))
}
donor.index <- which(rownames(V.modified)==flow$donor[flow.index])
if(length(donor.index)!=1) {
stop(paste("Tried to find ", flow$donor[flow.index], " but instead found ", paste(rownames(V.modified)[donor.index], sep=" ", collapse= " "), "; make sure the taxon names in the flow dataframe donor match that of your tree", sep=""))
}
V.modified[recipient.index, donor.index] <- (1-flow$m[flow.index]) * V.original[recipient.index, donor.index] + (flow$m[flow.index]) * (flow$time.from.root.recipient[flow.index]) * sigma.sq #covariance is the weighted sum of the covariance from evolution along the tree plus evolution along the migration path
V.modified[donor.index, recipient.index] <- V.modified[recipient.index, donor.index]
#covariance managed, now to manage the variance
V.modified[recipient.index, recipient.index] <- V.original[recipient.index, recipient.index] + vh
}
diag(V.modified) <- diag(V.modified)+SE
return(V.modified)
}
GetMeansModified <- function(x, phy, flow, actual.params) {
badval<-(0.5)*.Machine$double.xmax
bt <- 1
vh <- 0
sigma.sq <- x[1]
mu <- x[2]
SE <- x[length(x)]
bt.location <- which(names(actual.params)=="bt")
if(length(bt.location)==1) {
bt<-x[bt.location]
}
vh.location <- which(names(actual.params)=="vh")
if(length(vh.location)==1) {
vh<-x[vh.location]
}
times.original <-vcv.phylo(phy, model="Brownian") #is the initial one based on the tree alone, so just time
V.original <- sigma.sq * times.original
means.original <- rep(mu, Ntip(phy))
names(means.original) <- rownames(V.original)
means.modified <- means.original
means.original <- rep(mu, Ntip(phy))
names(means.original) <- rownames(V.original)
means.modified <- means.original
for (flow.index in sequence(dim(flow)[1])) {
recipient.index <- which(rownames(V.original)==flow$recipient[flow.index])
if(length(recipient.index)!=1) {
stop(paste("Tried to find ", flow$recipient[flow.index], " but instead found ", paste(rownames(V.original)[recipient.index], sep=" ", collapse= " "), "; make sure the taxon names in the flow dataframe recipient match that of your tree", sep=""))
}
donor.index <- which(rownames(V.original)==flow$donor[flow.index])
if(length(donor.index)!=1) {
stop(paste("Tried to find ", flow$donor[flow.index], " but instead found ", paste(rownames(V.original)[donor.index], sep=" ", collapse= " "), "; make sure the taxon names in the flow dataframe donor match that of your tree", sep=""))
}
means.modified[recipient.index] <- means.original[recipient.index] + log(bt)
}
return(means.modified)
}
#precision is the cutoff at which we think the estimates become unreliable due to ill conditioned matrix
CalculateLikelihood <- function(x, data, phy, flow, actual.params, precision=2, proportion.mix.with.diag=0, allow.extrapolation=FALSE) {
badval<-(0.5)*.Machine$double.xmax
bt <- 1
vh <- 0
sigma.sq <- x[1]
mu <- x[2]
SE <- x[length(x)]
bt.location <- which(names(actual.params)=="bt")
if(length(bt.location)==1) {
bt<-x[bt.location]
}
vh.location <- which(names(actual.params)=="vh")
if(length(vh.location)==1) {
vh<-x[vh.location]
}
V.modified <- GetVModified(x, phy, flow, actual.params)
means.modified <- GetMeansModified(x, phy, flow, actual.params)
if(sigma.sq <0 || vh<0 || bt <= 0.0000001 || SE < 0) {
return(badval)
}
data <- data[match(names(means.modified), names(data))]
if(length(data)!=length(means.modified)) {
stop("Mismatch between names of taxa in data vector and on phy")
}
#if(length(which(eigen(V.modified)$values<0))>0) {
# last.bad <- V.modified
# return(badval)
#}
NegLogML <- (Ntip(phy)/2)*log(2*pi)+(1/2)*t(data-means.modified)%*%pseudoinverse(V.modified)%*%(data-means.modified) + (1/2)*log(abs(det(V.modified)))
if(min(V.modified)<0 || sigma.sq <0 || vh<0 || bt <= 0.0000001 || !is.finite(NegLogML) || SE<0) {
NegLogML<-badval
}
matrix.condition <- kappa(V.modified, exact=TRUE)
#print("condition")
#print(kappa(V.modified, exact=TRUE))
#print("log(condition)")
#print(log(kappa(V.modified, exact=TRUE)))
#pretty<-c(NegLogML, log(matrix.condition))
#names(pretty) <- c("NegLogL", "log(matrix.condition")
#print(pretty)
#The ratio of the largest to smallest singular value in the singular value decomposition of a matrix. The base- logarithm of is an estimate of how many base- digits are lost in solving a linear system with that matrix. In other words, it
#estimates worst-case loss of precision. A system is said to be singular if the condition number is infinite, and ill-conditioned if it is too large, where "too large" means roughly the precision of matrix entries.
#if(rcond(V.modified) < .Machine$double.eps^2){
if(log(matrix.condition) > precision) {
proportions <- seq(from=1, to=0, length.out=101)
lnl.vector <- rep(NA, length(proportions))
max.diff <- 0
for(i in sequence(length(proportions))) {
V.modified.by.proportions<-(1-proportions[i]) * V.modified + proportions[i] * diag(dim(V.modified)[1]) * diag(V.modified)
local.lnl <- (Ntip(phy)/2)*log(2*pi)+(1/2)*t(data-means.modified)%*%pseudoinverse(V.modified.by.proportions)%*%(data-means.modified) + (1/2)*log(abs(det(V.modified.by.proportions)))
if(i>6) {
very.local.lnl <- lnl.vector[(i-6):(i-1)]
max.diff <- max(abs(very.local.lnl[-1] - very.local.lnl[-length(very.local.lnl)])) #looking locally for jumps in the likelihood
current.diff <- abs(local.lnl - lnl.vector[i-1])
if(current.diff > 2 * max.diff) {
#print(paste("breaking after ", i))
break() #the modified matrix is still poorly conditioned, so stop here
}
}
lnl.vector[i] <- local.lnl
}
proportions<-proportions[which(!is.na(lnl.vector))]
lnl.vector<-lnl.vector[which(!is.na(lnl.vector))]
NegLogML <- predict(smooth.spline(proportions, lnl.vector), data.frame(proportions =0.000))$y
#plot(c(0, proportions), c(NegLogML, lnl.vector), type="n")
#points(proportions, lnl.vector, pch=20)
#points(0, NegLogML, col="red")
if(abs(NegLogML - lnl.vector[length(lnl.vector)]) >0.0000001) { #means this point was extrapolated b/c the likelihood surface got strange
if(allow.extrapolation) {
warning("VCV matrix was ill-conditioned, so used splines to estimate its likelihood (allow.extrapolation=TRUE). This could lead to very bad estimates of the likelihood")
} else {
NegLogML <- badval
warning("VCV matrix was ill-conditioned at some points in parameter space; treated the likelihood at these points as a bad value rather than estimating it. If you think the MLE is in this region, you could try allow.extrapolation=TRUE")
}
}
#print(paste("Did interpolation, got ", NegLogML))
}
#print("datadiff")
#print(quantile(data-means.modified))
#print("middle")
#print((1/2)*t(data-means.modified)%*%pseudoinverse(V.modified)%*%(data-means.modified))
#print("end")
#print((1/2)*log(abs(det(V.modified))) )
#print(x)
#print(V.modified[1:10,1:10])
#print(means.modified)
if(NegLogML< (-1000)) {
bad<-V.modified
}
return(NegLogML[1])
}
AdaptiveConfidenceIntervalSampling <- function(par, fn, lower=-Inf, upper=Inf, desired.delta = 2, n.points=5000, verbose=TRUE, ...) {
starting<-fn(par, ...)
if(length(lower) < length(par)) {
lower<-rep(lower, length(par))
}
if(length(upper) < length(par)) {
upper<-rep(upper, length(par))
}
min.multipliers <- rep(1, length(par))
max.multipliers <- rep(1, length(par))
results<-data.frame(data.frame(matrix(nrow=n.points+1, ncol=1+length(par))))
results[1,]<-unname(c(starting, par))
for (i in sequence(n.points)) {
sim.points<-NA
while(is.na(sim.points[1])) {
sim.points<-GenerateValues(par, lower, upper, examined.max=max.multipliers*apply(results[which(results[,1]-min(results[,1], na.rm=TRUE)<=desired.delta),-1], 2, max, na.rm=TRUE), examined.min=min.multipliers*apply(results[which(results[,1]-min(results[,1], na.rm=TRUE)<=desired.delta),-1], 2, min, na.rm=TRUE))
}
results[i+1,] <- c(fn(sim.points, ...), sim.points)
if (i%%20==0) {
for (j in sequence(length(par))) {
returned.range <- range(results[which((results[,1]-min(results[,1], na.rm=TRUE))<desired.delta), j+1], na.rm=TRUE)
total.range <- range(results[,j+1], na.rm=TRUE)
width.ratio <- diff(returned.range)/diff(total.range)
if(is.na(width.ratio)) {
width.ratio=1
}
if(width.ratio > 0.5) { #we are not sampling widely enough
min.multipliers[j] <- min.multipliers[j] * 0.9
max.multipliers[j] <- max.multipliers[j] * 1.1 #expand the range
} else {
min.multipliers[j] <- 1
max.multipliers[j] <- 1
}
}
}
if (verbose && i%%100==0) {
print(paste(i, "of", n.points, "done"))
}
}
return(results)
}
GenerateValues <- function(par, lower, upper, max.tries=100, expand.prob=0, examined.max, examined.min) {
pass=FALSE
tries=0
while(!pass && tries<=max.tries) {
tries <- tries+1
pass=TRUE
new.vals <- rep(NA, length(par))
for(i in sequence(length(par))) {
examined.max[i]<-max(0.001, examined.max[i])
new.vals[i]<-runif(1, max(lower[i], 0.9*examined.min[i]), min(upper[i], 1.1*examined.max[i]))
if(new.vals[i]<lower[i]) {
pass=FALSE
}
if(new.vals[i]>upper[i]) {
pass=FALSE
}
}
}
if(tries>max.tries) {
return(NA)
}
return(new.vals)
}
GetClade <- function(phy, clade.size) {
nodes <- phy$edge[,1]
subtrees <- lapply(nodes, extract.clade, phy=phy)
counts <- sapply(subtrees, Ntip)
matches<-subtrees[which(counts==clade.size)]
if(length(matches)==0) {
return(NA)
}
lucky <- matches[sample.int(length(matches),1)][[1]]
return(findMRCA(phy, tips=lucky$tip.label, type="node"))
}
GetAncestor <- function(phy, node) {
return(phy$edge[which(phy$edge[,2]==node),1])
}
#allow.ghost allows ghost lineage: something that persists for awhile, hybridizes, goes extinct. Otherwise, hybridization events must between coeval edges with extant descendants
SimulateNetwork <- function(ntax.nonhybrid=100, ntax.hybrid=10, flow.proportion=0.5, origin.type=c("clade", "individual"), birth = 1, death = 0.5, sample.f = 0.5, tree.height = 1, allow.ghost=FALSE) {
done = FALSE
used.recipients <- c()
available.recipient.ids <- sequence(ntax.nonhybrid + ntax.hybrid)
flow <- data.frame()
phy<-NA
phy <- sim.bd.taxa.age(n=ntax.nonhybrid+ntax.hybrid, numbsim=1, lambda=birth, mu=death, frac = sample.f, age=tree.height, mrca = TRUE)[[1]]
if(origin.type=="clade" && ntax.hybrid==1) {
warning("For ntax.hybrid = 1 and clade sampling, this will do individual sampling instead (which is equivalent in this case)")
origin.type<-"individual"
}
if(origin.type=="clade") {
while(is.na(GetClade(phy, ntax.hybrid))) { #not all trees of a given size have a clade of a given size, so may need to resimulate it
phy <- sim.bd.taxa.age(n=ntax.nonhybrid+ntax.hybrid, numbsim=1, lambda=birth, mu=0.5, frac = sample.f, age=tree.height, mrca = TRUE)[[1]]
}
}
while(!done) {
donors <- c()
recipients <- c()
recipient.ids <- c()
focal.node <- c()
if (origin.type=="clade") {
focal.node <- GetClade(phy, ntax.hybrid)
if(is.na(focal.node)) {
done=FALSE
break()
}
recipients <- phy$tip.label[getDescendants(phy, node=focal.node)]
recipients <- recipients[!is.na(recipients)] #since we want just the tips
recipient.ids <- which(phy$tip.label %in% recipients)
used.recipients <- append(used.recipients, recipients)
} else {
focal.node<-sample(available.recipient.ids, 1, replace=FALSE)
recipient.ids <- focal.node
recipients <- phy$tip.label[focal.node]
used.recipients <- append(used.recipients, recipients)
}
available.recipient.ids <- available.recipient.ids[!available.recipient.ids %in% recipient.ids]
longest.from.root <- nodeheight(phy, node=focal.node)
shortest.from.root <- nodeheight(phy, node=GetAncestor(phy, focal.node))
all.heights <- nodeHeights(phy)
#idea here: take a recipient clade. The gene flow must happen on its stem edge, which starts at shortest.from.root and goes up to longest.from.root. Gene flow can't go back in time
qualifying.lower <- which(all.heights[,1]<longest.from.root) #if this is false, gene flow goes back in time
qualifying.upper <- sequence(dim(all.heights)[1]) #in general, gene flow can go forward in time via ghost lineages
if(!allow.ghost) {
qualifying.upper <- which(all.heights[,2]>shortest.from.root) #if no ghost lineages, then there must be temporal overlap between the donor and recipient lineages. So the tipward end of the donor edge must be later than the rootward end of the recipient edge
}
qualifying.upper <- qualifying.upper[which(phy$edge[qualifying.upper,2]!=focal.node)] #let's not hybridize with ourselves
qualifying.all <- qualifying.upper[qualifying.upper %in% qualifying.lower]
if(length(qualifying.all)==0) {
break()
}
donor.edge <- sample(qualifying.all, 1)
donors <- phy$tip.label[getDescendants(phy, phy$edge[donor.edge,2])]
donors <- donors[!is.na(donors)] #getDescendants includes all descendant nodes, including internal ones. We just want the terminal taxa
time.in <- runif(1, min=max(all.heights[donor.edge,1],shortest.from.root), max=longest.from.root)
time.out <- runif(1, min=all.heights[donor.edge,1], max=min(time.in, all.heights[donor.edge,2]))
if (!allow.ghost) {
time.in <- runif(1, min=max(shortest.from.root, all.heights[donor.edge,1]), max=min(longest.from.root, all.heights[donor.edge,2])) #if no ghost lineages, must move from the overlapping interval
time.out <- time.in
}
pairs <- expand.grid(donors, recipients)
for (pairs.index in sequence(dim(pairs)[1])) {
flow <- rbind(flow, data.frame(donor=pairs[pairs.index,1], recipient=pairs[pairs.index,2], m=flow.proportion, time.from.root.donor=time.out, time.from.root.recipient=time.in, stringsAsFactors=FALSE))
}
if(length(used.recipients)==ntax.hybrid) {
done=TRUE
}
}
flow$donor <- as.character(flow$donor)
flow$recipient <- as.character(flow$recipient)
flow$m <- as.numeric(as.character(flow$m))
flow$time.from.root.donor <-as.numeric(as.character(flow$time.from.root.donor))
flow$time.from.root.recipient <-as.numeric(as.character(flow$time.from.root.recipient))
return(list(phy=phy, flow=flow))
}
PlotNetwork <- function(phy, flow, col.non="black", col.hybrid="red", col.donor="blue", name.padding=1.5, cex=1, xlab="", bty="n", head.length=0.2, edge.width=2, col.tree="darkgray", col.arrow="red", arrow.width=1, ...) {
phy<-reorder(phy, "pruningwise")
phy4 <- as(phy, "phylo4")
xxyy <- phyloXXYY(phy4)
#plot(phy4)
plot(x=c(min(xxyy$xx), name.padding*max(xxyy$xx)), y=range(xxyy$yy), type="n", xaxt="n", xlab=xlab, yaxt="n", ylab="", bty=bty, ...)
arrows(x0=xxyy$segs$v0x, x1=xxyy$segs$v1x, y0=xxyy$segs$v0y, y1=xxyy$segs$v1y, length=0, lwd=edge.width, col=col.tree)
arrows(x0=xxyy$segs$h0x, x1=xxyy$segs$h1x, y0=xxyy$segs$h0y, y1=xxyy$segs$h1y, length=0, lwd=edge.width, col=col.tree)
label.colors <- rep(col.non, Ntip(phy))
for (i in sequence(Ntip(phy))) {
if (names(getNode(phy4, xxyy$torder))[i] %in% flow$donor) {
label.colors[i]<-col.donor
}
if (names(getNode(phy4, xxyy$torder))[i] %in% flow$recipient) {
label.colors[i]<-col.hybrid
}
}
text(x=rep(max(xxyy$xx), Ntip(phy)), y=xxyy$yy[which(edges(phy4)[xxyy$eorder,2] %in% sequence(Ntip(phy)))], names(getNode(phy4, xxyy$torder)), col=label.colors, pos=4, cex=cex)
for (i in sequence(dim(flow)[1])) {
recipient.node <- getNode(phy4, flow$recipient[i])
recipient.path <- c(recipient.node, ancestors(phy4, recipient.node))
recipient.path.heights <- nodeHeight(phy4, recipient.path, from="root")
valid.recipients <- recipient.path[which(recipient.path.heights > flow$time.from.root.recipient[i])]
recipient <- valid.recipients[length(valid.recipients)] #do it from the earliest qualifying tipward node
if(length(recipient.path)>1 && length(which(recipient.path.heights==flow$time.from.root.recipient[i]))>0) { #the latter condition means we're moving to an existing node
recipient <- valid.recipients[length(valid.recipients)-1]
}
y1 <- xxyy$yy[which(edges(phy4)[xxyy$eorder,2] == recipient)]
donor.node <- getNode(phy4, flow$donor[i])
donor.path <- c(donor.node, ancestors(phy4, donor.node))
donor.path.heights <- nodeHeight(phy4, donor.path, from="root")
valid.donors <- donor.path[which(donor.path.heights > flow$time.from.root.donor[i])]
donor <- valid.donors[length(valid.donors)] #do it from the earliest qualifying tipward node
y0 <- xxyy$yy[which(edges(phy4)[xxyy$eorder,2] == donor)]
arrows(x0=flow$time.from.root.donor[i]/max(branching.times(phy)), x1=flow$time.from.root.recipient[i]/max(branching.times(phy)), y1=y1, y0=y0, col=col.arrow, lwd=arrow.width, length=head.length) #rescale since it goes from zero to 1 in height
#grid.arrows(x=c(flow$time.from.root[i],flow$time.from.root[i]), y=c(y0, y1))
}
}
LumpIntoClades <- function(phy, flow) {
flow.string <- paste(flow$time.from.root.donor, flow$time.from.root.recipient, flow$m)
flow.together <- cbind(flow.string=flow.string, flow)
unique.flow.strings <- unique(flow.string)
donor.clades <- c()
recipient.clades <- c()
m.clades <-c()
time.from.root.donor.clades <- c()
time.from.root.recipient.clades <- c()
for (i in sequence(length(unique.flow.strings))) {
flow.local <- flow.together[which(flow.together$flow.string == unique.flow.strings[i]), ]
donor.clades <- append(donor.clades, paste(unique(flow.local$donor), collapse=","))
recipient.clades <- append(recipient.clades, paste(unique(flow.local$recipient), collapse=","))
m.clades <-append(m.clades, flow.local$m[1])
time.from.root.donor.clades <- append(time.from.root.donor.clades, flow.local$time.from.root.donor[1])
time.from.root.recipient.clades <- append(time.from.root.recipient.clades, flow.local$time.from.root.recipient[1])
}
return(data.frame(donor.clades = donor.clades, recipient.clades=recipient.clades, m=m.clades, time.from.root.donor=time.from.root.donor.clades, time.from.root.recipient = time.from.root.recipient.clades, stringsAsFactors=FALSE))
}
AttachHybridsToDonor <- function(phy, flow, suffix="_DUPLICATE") {
flow.clades <- LumpIntoClades(phy, flow)
phy.merged <- phy
for (i in sequence(dim(flow.clades)[1])) {
taxa.to.retain <- strsplit(flow.clades$recipient.clades[i], ",")[[1]]
donor.taxa <- strsplit(flow.clades$donor.clades[i], ",")[[1]]
pulled.clade <- c()
if(length(taxa.to.retain)>1) {
pulled.clade <- drop.tip(phy, phy$tip.label[!phy$tip.label %in% taxa.to.retain])
} else {
pulled.clade <- structure(list(edge = structure(c(3L, 3L, 1L, 2L), .Dim = c(2L,
2L)), edge.length = rep(flow.clades$time.from.root.recipient[i],2),
tip.label = c(taxa.to.retain, "DUMMY"), Nnode = 1L), .Names = c("edge",
"edge.length", "tip.label", "Nnode"), class = "phylo")
}
pulled.clade$tip.label <- paste(pulled.clade$tip.label, suffix, sep="")
attachment.crown.node <- which(phy.merged$tip.label==donor.taxa[1])
if(length(donor.taxa)>1) {
attachment.crown.node <- findMRCA(phy.merged, tips=donor.taxa, type=c("node"))
}
attachment.stem.node <- GetAncestor(phy.merged, attachment.crown.node)
pulled.clade$root.edge<-max(branching.times(phy)) - max(branching.times(pulled.clade)) - flow.clades$time.from.root.donor[i]
phy.merged <- bind.tree(phy.merged, pulled.clade, attachment.crown.node, position=phy.merged$edge.length[which(phy.merged$edge[,2]==attachment.crown.node)] - (flow.clades$time.from.root.donor[i]-nodeheight(phy.merged, attachment.stem.node)))
if(length(taxa.to.retain)==1) {
phy.merged <- drop.tip(phy.merged, paste("DUMMY", suffix, sep=""))
}
}
return(phy.merged)
}
# names(free.parameters) <- c("sigma.sq", "mu", "bt", "vh", "SE")
#GetMeansModified <- function(x, phy, flow, actual.params) {
#params must be named vector
SimulateTipData <- function(phy, flow, params, suffix="_DUPLICATE") {
# flow.clades <- LumpIntoClades(phy, flow)
# recipients <- c()
# for (i in sequence(dim(flow.clades)[1])) {
# recipients<-c(recipients, strsplit(flow.clades$recipient.clades, ",")[[1]])
# }
# if(length(unique(recipients)) != length(recipients)) {
# print(flow)
# print(flow.clades)
# stop("This function only works if each taxon of hybrid origin only appears once as a recipient")
# }
phy.merged <- AttachHybridsToDonor(phy, flow, suffix=suffix)
phy.merged$edge.length <- phy.merged$edge.length*params["sigma.sq"]
phy.merged$edge.length[which(phy.merged$edge[,2] <= Ntip(phy.merged))] <- phy.merged$edge.length[which(phy.merged$edge[,2] <= Ntip(phy.merged))]+params["SE"]
tips <- sim.char(phy.merged, par=matrix(1, nrow=1, ncol=1), nsim=1, model="BM", root=params["mu"])[,,1] #we have already done the scaling and SE
hybrid.name.root <- gsub(suffix, "", phy.merged$tip.label[grepl(suffix, phy.merged$tip.label)])
for (i in sequence(length(hybrid.name.root))) {
focal.tips <- tips[c(hybrid.name.root[i], paste(hybrid.name.root[i], suffix, sep=""))]
focal.m <- flow$m[which(flow$recipient==hybrid.name.root[i])]
focal.tips.bt <- focal.tips + log(params['bt'])
tip.mean <- focal.m*focal.tips.bt[1] + (1-focal.m) * focal.tips.bt[2]
tip.final <- rnorm(1, mean=tip.mean, sd=sqrt(params['vh']))
tips[hybrid.name.root]<-tip.final
}
tips<-tips[!grepl(suffix, names(tips))]
return(tips)
}
#The following short function comes from Ken Takagi at https://chitchatr.wordpress.com/2011/12/30/convex-hull-around-scatter-plot-in-r/
Plot_ConvexHull<-function(xcoord, ycoord, lcolor){
hpts <- chull(x = xcoord, y = ycoord)
hpts <- c(hpts, hpts[1])
lines(xcoord[hpts], ycoord[hpts], col = lcolor)
}
ContourFromAdaptiveSampling<-function(sims, params.of.interest=NULL) {
#sims is a data.frame with the sim results
if(is.null(params.of.interest)) {
params.of.interest <- colnames(sims)[-1]
}
sims$neglnL <- sims$neglnL - min(sims$neglnL)
for (param.1 in sequence(length(params.of.interest)-1)) {
for (param.2 in c((param.1+1) : length(params.of.interest))) {
sims.sub <- sims[,c(params.of.interest[param.1], params.of.interest[param.2], 'neglnL')]
# points.to.fit <- data.frame()
x.range <- range(sims.sub[,1])
y.range <- range(sims.sub[,2])
x.grid <- quantile(sims.sub[,1], seq(from=0, to=1, length.out = floor(length(sims.sub[,1])/50)))
y.grid <- quantile(sims.sub[,2], seq(from=0, to=1, length.out = floor(length(sims.sub[,2])/50)))
colnames(sims.sub)[1:2] <- c("x", "y")
# for (x.index in sequence(length(x.grid)-1)) {
# for (y.index in sequence(length(y.grid)-1)) {
# relevant.points <- subset(sims.sub, x>=x.grid[x.index] & x<x.grid[x.index+1] & y>=y.grid[y.index] & y<y.grid[y.index+1])
# points.to.fit <- rbind(points.to.fit, relevant.points[which.max(relevant.points[,3]),])
# }
# }
plot(x=x.range, y=y.range, xlab=params.of.interest[param.1], ylab=params.of.interest[param.2], type="n", bty="n")
relevant.point.id <- chull(sims.sub[which(sims.sub[,3]<2),1], sims.sub[which(sims.sub[,3]<2),2])
#polygon(sims.sub[relevant.point.id,1], sims.sub[relevant.point.id, 2], col="gray", border=NA, fillOddEven=TRUE)
#polygon(sims.sub[relevant.point.id,1], sims.sub[relevant.point.id, 2], col="gray", border=NA, fillOddEven=FALSE)
Plot_ConvexHull(sims.sub[which(sims.sub[,3]<2),1], sims.sub[which(sims.sub[,3]<2),2], "black")
Plot_ConvexHull(sims.sub[which(sims.sub[,3]<5),1], sims.sub[which(sims.sub[,3]<5),2], "black")
#points(sims.sub[which(sims.sub[,3]<2),1], sims.sub[which(sims.sub[,3]<2),2], col="green", pch="X")
#contour(interp(points.to.fit[,1], points.to.fit[,2], points.to.fit[,3]), xlab=params.of.interest[param.1], ylab=params.of.interest[param.2], levels=c(1, 2, 5, 10))
points(sims.sub[which.min(sims.sub$neglnL),1], sims.sub[which.min(sims.sub$neglnL),2], col="red", pch=20)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.