# library(igraph)
# library(RBGL) ## masks functions degree() edges() from graph and transitivity() from igraph
# ## contains edmondsOptimumBranching()
# library(Matrix)
## choose root
setRoot <- function(W){
sumIn <- apply(W, 2, sum)
root <- which.min(sumIn)
W[,root] <- 0
diag(W) <- 0
return(list(W=W, root=root))
}
setMinInEdgeGraph <- function(W){
n <- dim(W)[1]
diag(W) <- -1/0
selectedInEdges <- max.col(t(W))
newW <- matrix(0,n,n)
for (i in 1:n){
newW[selectedInEdges[i],i] <- W[selectedInEdges[i],i]
}
return(newW)
}
## Scales scores so that all entries of the matrix are >= 0 , where 0 indicates no edge
scaleScore <- function(score) {
score <- score - min(score) + 1
diag(score) <- 0
return(score)
}
## Finds optimum branching, given a weighted adjacency matrix. (Optimum = maximum)
EdmondsOpt <- function(score,ret='graph') {
if (all(score==t(score))) {
stop("only appropriate for directed graphs")
}
if (all(score<=0)) {
score <- scaleScore(score)
}
nv <- nrow(score) ## number of vertices
ne <- prod(dim(score)) - nv
em <- which(!diag(nrow(score)),arr.ind=TRUE)
em <- rbind(from=em[order(em[,1]),1],to=em[order(em[,1]),2])
eW <- score[t(em)]
ans <- .Call("edmondsOptimumBranching", as.integer(nv), as.integer(ne),
as.integer(em - 1), as.double(eW), PACKAGE = "RBGL")
el <- t(ans[[1]]+1) ## start node numbering with 1
eW <- ans[[2]]
if (!is.null(rownames(score))) {
el <- matrix(rownames(score)[el],ncol=2,byrow=FALSE)
}
if (ret=='graph'){
el <- el[order(el[,1]),] ## order nodes to make sure plotting function works
g <- graph.edgelist(el,directed = TRUE)
return(g)
} else if (ret=='adjMat') {
adjMat <- matrix(0,nrow=nv,ncol=nv)
rownames(adjMat) <- colnames(adjMat) <- rownames(score)
adjMat[el] <- 1
return(adjMat)
}
}
plotG <- function(W){
plot(graph.adjacency(W, weighted = T))
}
plotT <- function(W){
root <- which(colSums(W) == 0)
g <- graph.adjacency(W, weighted=T)
l <- layout.reingold.tilford(g,root=root)
plot(g, layout=l)
}
boundProb <- function(x){
return( (1.0-.Machine$double.eps)*(x-0.5)+0.5 )
}
getWeights <- function(parentStates, ratemat, branchLength){
transProb <- expm(ratemat * branchLength)
m1 <- transProb[2,2]
if (m1 == 1.0){
m1 <- boundProb(m1)
}
m2 <- transProb[1,2]
return(parentStates*m1 + (!parentStates)*m2)
}
getLocalLikhd <- function(childSeq, parentSeq, ratemat, t){
prob <- getWeights(parentSeq, ratemat, t)
prob <- childSeq*prob + (!childSeq)*(1-prob)
return( sum(log(prob)) )
}
getMaxLocalLikhd <-function(childSeq, parentSeq, ratemat, tspace=NULL){
if (is.null(tspace)){
tspace <- seq(0,1,length.out=100)
}
ll <- sapply(tspace, function (i) getLocalLikhd(childSeq,
parentSeq, ratemat, i) )
optLL <- max(ll)
optT <- tspace[ which.max(ll) ]
return( list(optLL = optLL, optT = optT) )
}
getPhyloDist <- function(seq, ratemat){
n <- dim(seq)[1]
likhdW <- matrix(0,n,n)
branchW <- matrix(0,n,n)
for (i in 1:n){
for (j in 1:n){
if (i !=j){
maxLocalLikhd <- getMaxLocalLikhd(seq[i,], seq[j,], ratemat)
likhdW[j,i] <- maxLocalLikhd$optLL
branchW[j,i] <- maxLocalLikhd$optT
}
}
}
return(list(likhdW = likhdW, branchW = branchW))
}
getPhyloDist2 <- function(seq, ratemat){
n <- dim(seq)[1]
likhdW <- matrix(0,n,n)
branchW <- matrix(0,n,n)
loopMat <- combn(n, 2)
loopMat <- t(cbind(loopMat, loopMat[c(2,1),]))
res <- foreach(i=1:(n*(n-1)),
.packages = 'Matrix',
.export='getMaxLocalLikhd') %do% {
index <- loopMat[i,]
getMaxLocalLikhd(seq[index[2],], seq[index[1],], ratemat)
}
res <- unlist(res)
likhdW[loopMat] <- res[which(names(res) == 'optLL')]
branchW[loopMat] <- res[which(names(res) == 'optT')]
return(list(likhdW = likhdW, branchW = branchW))
}
getRateMatrix <- function(data, mode='methyl'){
mrate <- mean(rowSums(data, na.rm = TRUE) / rowSums(!is.na(data)))
urate <- 1-mrate
ratemat <- matrix(0, 2, 2)
ratemat[1,] <- c(-mrate, mrate)
if (mode == 'methyl'){
scale <- 2/(3*urate*mrate)
ratemat[2,] <- c(urate/2, -urate/2)
ratemat <- scale*ratemat
}else{
scale <- 1/(urate*mrate)
ratemat <- scale*ratemat
}
return(ratemat)
}
getDMST <- function(data, mode='methyl'){
ratemat <- getRateMatrix(data, mode)
distmats <- getPhyloDist(data, ratemat)
likhdW <- distmats$likhdW
branchW <- distmats$branchW
A <- EdmondsOpt(likhdW, ret ='adjMat')
mstW1 <- distmats$likhdW*A
mstW2 <- distmats$branchW*A
return(list(W1=mstW1, W2 = mstW2, distmats=distmats, ratemat=ratemat))
}
# f1 <- '~/Dropbox/cancer-evolution/tssb/data/full_methy/noisy_full_methy_8_2000_genotype.csv'
# f2 <- '~/Dropbox/cancer-evolution/tssb/data/noisy-full-obs/noisy_fullsyn_8_2000_genotype.csv'
#
# data1 <- t(read.csv(f1, as.is=TRUE))
# data2 <- t(read.csv(f2, as.is=TRUE))
#
# tic <- proc.time()
# mst1 <- getDMST(data1, mode = 'methyl')
# mst2 <- getDMST(data2, mode = 'mut')
# toc <- proc.time()-tic
#
# stopCluster(cl)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.