## fishy = FALSE: ln of true likelihood (mutation happens somewhere at the branch, no presumptions about where exactly)
## of the node_data given exponential model with parameter lambda
## fishy = TRUE: ln of likelihood computed according to eq (1) on page 1,
## Adot taken at the middle of the branch, (talpha1-talpha0) = length of the branch
lnlikelihood_exp <-function(node_data, lambda, fishy = FALSE){
if (class(node_data) == "list"){
node_data <- node_data[[1]]
}
beta_branches = node_data[node_data["event_indicator"]==0,]
alpha_branches = node_data[node_data["event_indicator"]==1,]
if (fishy){
apply_res1 <- apply( alpha_branches, 1,
function(elm){
if(!is.na(elm["event_indicator"])){ #if (nrow(dataframe) == 0), <apply> still tries to apply the function to.. header? the list is not empty: it contains the header
talpha1 <- as.numeric(elm["t_branch_end"])
talpha0 <- as.numeric(elm["t_branch_start"])
talpha_middle <- (talpha1+talpha0)/2
log(lambda)+log(talpha1-talpha_middle) #changed
}
else { 0 }
})
}
else {
apply_res1 <- apply( alpha_branches, 1,
function(elm){
if(!is.na(elm["event_indicator"])){ #if (nrow(dataframe) == 0), <apply> still tries to apply the function to.. header? the list is not empty: it contains the header
talpha1 <- as.numeric(elm["t_branch_end"])
talpha0 <- as.numeric(elm["t_branch_start"])
survival <- exp(lambda*(talpha0-talpha1))
log(1-survival)
}
else { 0 }
})
}
apply_res2 <- apply( beta_branches, 1,
function(elm){
if(!is.na(elm["event_indicator"])){ #if (nrow(dataframe) == 0), <apply> still tries to apply the function to.. header? the list is not empty: it contains the header
tbeta1 <- as.numeric(elm["t_branch_end"])
tbeta0 <- as.numeric(elm["t_branch_start"])
tbeta0 - tbeta1
}
else { 0 }
})
#added
if(fishy){
apply_res3 <- apply( alpha_branches, 1,
function(elm){
if(!is.na(elm["event_indicator"])){ #if (nrow(dataframe) == 0), <apply> still tries to apply the function to.. header? the list is not empty: it contains the header
talpha1 <- as.numeric(elm["t_branch_end"])
talpha0 <- as.numeric(elm["t_branch_start"])
talpha_middle <- (talpha1+talpha0)/2
talpha0 - talpha_middle
}
else { 0 }
})
}
if (fishy){
lnL <-sum(apply_res1) + lambda*(sum(apply_res2)+sum(apply_res3))
}
else{
lnL <-sum(apply_res1) + lambda*sum(apply_res2)
}
c(lnL = lnL, AIC = aic(lnL, 1))
}
## fishy = FALSE: ln of true likelihood (mutation happens somewhere at the branch, no presumptions about where exactly)
## of the node_data given weibull model with parameters lambda and p
## fishy = TRUE: ln of likelihood computed according to eq (1) on page 1,
## Adot taken at the middle of the branch, (talpha1-talpha0) = length of the branch
lnlikelihood_weibull <-function(node_data, lambda, p, fishy = FALSE){
if (class(node_data) == "list"){
node_data <- node_data[[1]]
}
beta_branches = node_data[node_data["event_indicator"]==0,]
alpha_branches = node_data[node_data["event_indicator"]==1,]
if (fishy){
apply_res1 <- apply( alpha_branches, 1,
function(elm){
if(!is.na(elm["event_indicator"])){ #if (nrow(dataframe) == 0), <apply> still tries to apply the function to.. header? the list is not empty: it contains the header
talpha1 <- as.numeric(elm["t_branch_end"])
talpha0 <- as.numeric(elm["t_branch_start"])
talpha_middle <- (talpha1+talpha0)/2
#log(p) + p*log(lambda)+(p-1)*log( talpha_middle)+log(talpha1-talpha0)
log(p) + p*log(lambda)+(p-1)*log( talpha_middle)+log(talpha1-talpha_middle)
#print ("p to die")
#print (log(p) + p*log(lambda)+(p-1)*log( talpha_middle)+log(talpha1-talpha_middle))
}
else { 0 }
})
}
else {
apply_res1 <- apply( alpha_branches, 1,
function(elm){
if(!is.na(elm["event_indicator"])){ #if (nrow(dataframe) == 0), <apply> still tries to apply the function to.. header? the list is not empty: it contains the header
talpha1 <- as.numeric(elm["t_branch_end"])
talpha0 <- as.numeric(elm["t_branch_start"])
survival <- exp((lambda^p)*(talpha1^p)*( ((talpha0/talpha1)^p) - 1 ))
log(1-survival)
}
else { 0 }
})
}
apply_res2 <- apply( beta_branches, 1,
function(elm){
if(!is.na(elm["event_indicator"])){ #if (nrow(dataframe) == 0), <apply> still tries to apply the function to.. header? the list is not empty: it contains the header
tbeta1 <- as.numeric(elm["t_branch_end"])
tbeta0 <- as.numeric(elm["t_branch_start"])
if (tbeta1 == 0){0}
else {
(tbeta1^p)*( ((tbeta0/tbeta1)^p) - 1 )
}
#print ("p to survive on beta")
#print ((tbeta1^p)*( ((tbeta0/tbeta1)^p) - 1 ) )
}
else { 0 }
})
## added
if (fishy){
apply_res3 <- apply( alpha_branches, 1,
function(elm){
if(!is.na(elm["event_indicator"])){ #if (nrow(dataframe) == 0), <apply> still tries to apply the function to.. header? the list is not empty: it contains the header
talpha1 <- as.numeric(elm["t_branch_end"])
talpha0 <- as.numeric(elm["t_branch_start"])
talpha_middle <- (talpha1+talpha0)/2
#print ("p to survive on alpha")
#print ((talpha_middle^p)*( ((talpha0/talpha_middle)^p) - 1 ))
(talpha_middle^p)*( ((talpha0/talpha_middle)^p) - 1 )
}
else { 0 }
})
}
if (fishy){
lnL <-sum(apply_res1) + (lambda^p)*(sum(apply_res2)+sum(apply_res3))
}
else {
lnL <-sum(apply_res1) + (lambda^p)*sum(apply_res2)
}
c(lnL = lnL, AIC = aic(lnL, 2))
}
# lnl - loglikelihood, p - number of parameters in the model
aic <- function (lnL, p){
2*(p-lnL)
}
# lnl - loglikelihood, p - number of free parameters in the model, n - sample size
bic <- function (lnL, model = NULL, cluster.number, n){
if (model == "weibull"){
p <- 3*cluster.number -1;
}
else {
p <- 2*cluster.number -1;
}
-2*lnL + p*log(n)
}
# LR LR = 2*(lnL1-lnL2) approximately follows a chi-square distribution with 1 degree of freedom
lrt <- function (node_data, lambda_exp, lambda_weib, p, fishy = FALSE, verbose = FALSE){
weibull <- lnlikelihood_weibull(node_data, lambda = lambda_weib, p = p, fishy)
exponential <- lnlikelihood_exp(node_data, lambda = lambda_exp, fishy)
LR <- 2*(weibull[1]-exponential[1])
if(verbose){
print ("weibull")
print (weibull)
print ("exponential")
print (exponential)
print ("LR")
print (LR)
}
out <- c(exponential[1], weibull[1], LR)
names(out) = c("exponential", "weibull", "lr")
out
}
## Compute LR
## takes an already computed set of parameters (with parameters(..., filter=FALSE))
## OR, if parameters = NULL, computes them
## ! mutation_position and parameters cannot be defined simultaneously
## threshold - significance threshold (default - 3.84, value of chi-square distr with 1 degree of freedom for sign level 0.05)
## Prints parameters and lr for significant nodes
## outputs parameters and lr for all nodes
## files like "h1_single_root_31_03_truelambda_fishy" were printed here
lrt_procedure <-function(data, prot, tag, fishy = FALSE, threshold = 3.84, mutation_position = "end", pack = "rootsolve", params = NULL, all = FALSE){
# if (!is.null(params)){
# warning("Parameters are defined, therefore mutation_position will be ignored")
#}
sink(file = paste(c(getwd(), "/output/" ,prot,"_single_root_", tag), collapse=""), append = FALSE, type = c("output", "message"),
split = FALSE)
lratios <- lapply (names(data), function(elm, mutation_position){
mutation_position <- mutation_position
node_data <- data[elm]
if (is.null(params)){
node_roots <- as.list(find_single_root(node_data, mutation_position, pack=pack))
}
else {
node_roots <- params[params$node == elm,]
}
if(!is.na(node_roots) && all(!is.na(node_roots)) && node_roots$p_precision < 1e-5 ){
lr <-lrt (node_data, lambda_exp = node_roots$lambda_exp_root, lambda_weib = node_roots$lambda_weib_root, p = node_roots$p_root, fishy= TRUE, verbose=FALSE)
row <- c(node = elm, lambda_exp = node_roots$lambda_exp_root, lambda_weib = node_roots$lambda_weib_root, p = node_roots$p_root, p_precision = node_roots$p_precision, exponential =lr["exponential"], weibull=lr["weibull"], lr = lr["lr"])
if(!is.na(lr["lr"]) && lr["lr"] > threshold){
if (all){
print (c(node = elm, lambda_exp = node_roots$lambda_exp_root, lambda_weib = node_roots$lambda_weib_root, p = node_roots$p_root, p_precision = node_roots$p_precision, exponential =lr["exponential"], weibull=lr["weibull"], lr = lr["lr"]))
} else print (c(node = elm, lambda_exp = node_roots$lambda_exp_root, lambda_weib = node_roots$lambda_weib_root, p = node_roots$p_root, p_precision = node_roots$p_precision, lr = lr["lr"]))
}
}
else {
row <- c(node = elm, lambda_exp = node_roots$lambda_exp_root, lambda_weib = node_roots$lambda_weib_root, p = node_roots$p_root, p_precision = node_roots$p_precision, lr = NA)
}
}, mutation_position)
sink()
lratios
}
dumb_wood_likelihood <-function(data, prot, tag, fishy = FALSE, threshold = 0, mutation_position = "end", pack = "rootsolve", params, all = FALSE){
sink(file = paste(c(getwd(), "/output/" ,prot,"_dumb_wood_", tag), collapse=""), append = FALSE, type = c("output", "message"),
split = FALSE)
weib_sum <- 0
exp_sum <- 0
for (elm in names(data)){
node_data <- data[elm]
node_roots <- params[params$node == elm,]
if(nrow(node_roots) > 0 && !is.na(node_roots) && all(!is.na(node_roots)) && node_roots$p_precision < 1e-5 ){
lr <-lrt (node_data, lambda_exp = node_roots$lambda_exp_root, lambda_weib = node_roots$lambda_weib_root, p = node_roots$p_root, fishy= TRUE, verbose=FALSE)
print(lr)
weib_sum <- weib_sum + lr["weibull"]
exp_sum <- exp_sum + lr["exponential"]
}
}
print(exp_sum)
print (weib_sum)
sink()
}
## verbose mode also prints a file with parameters for all nodes
lrt_all <- function(mutation_position = "middle", fishy = TRUE, pack = "rootsolve", tag = "defaulttag", verbose = TRUE, threshold = 3.84, all = FALSE){
prots <- c("h1", "h3", "n1", "n2")
for (pr in prots){
prot <- pr
prot_data <- read.csv(paste(c(getwd(), "/data/" ,prot,"_for_LRT.csv"), collapse=""),stringsAsFactors=FALSE)
splitted <- split(prot_data, list(prot_data$site, prot_data$ancestor_node), drop=TRUE)
if (verbose){
sink(file = paste(c(getwd(), "/output/" ,prot,"_all_parms_", tag), collapse=""))
}
prms <-parameters(splitted, mutation_position = mutation_position, verbose = verbose, pack = pack, filter= FALSE)
if (verbose){sink()}
lrt_procedure(data = splitted, prot = prot, tag = tag, fishy=fishy, params = prms, threshold = threshold, all = all)
sink()
}
}
###
## tests and procedures
#lrt_all(mutation_position = "middle", fishy = TRUE, tag = "middle_search", pack = "rootsolve", verbose = TRUE)
#benchmark(parameters(splitted, jack = FALSE, pack = "nleqslv", filter= FALSE), parameters(splitted, jack = FALSE, pack = "rootsolve", filter= FALSE), replications = 1)
##check that there are no zero-length branches with mutations
no_muts_on_zero_branches <-function(prot){
prot_data <- read.csv(paste(c(getwd(), "/data/" ,prot,"_for_LRT.csv"), collapse=""),stringsAsFactors=FALSE)
nullength <- prot_data[prot_data["event_indicator"] == 1,]
nullength <-nullength[nullength["t_branch_end"]-nullength["t_branch_start"] == 0,]
if (nrow(nullength) == 0){TRUE}
else {FALSE}
}
# how much does it take to compute all jackobians?
test_jack <- function(x, splitted){
ps <- lapply (names(splitted), function(elm){
node_data <- splitted[[elm]]
parms <- list(node_data = node_data, mutation_position = "end")
node_jacks <- p_derivative_jacfunc(x, parms)
})
}
#sink("C:/Users/weidewind/workspace/perlCoevolution/TreeUtils/Phylo/MutMap/likelihood/nsyn/likelihood_games_h1")
#h1_likelihood_games <-parameters(splitted, mutation_position = "middle", filter= FALSE, verbose = TRUE)
#sink()
#lrt_procedure(data = splitted, prot = prot, tag = "likelihood_games", fishy=TRUE, params = h1_likelihood_games)
#sink()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.