R/hOUwie.internal.R

Defines functions index_paramers_from_tip_states get_tip_states get_tip_values sum_lliks correct_edge correct_map_edges checkStartingUBLB generateMultiStarting getHouwieObj simCharacterHistory getIP.theta organizeHOUwiePars organizeHOUwieDat getDiscreteModel getRootLiks getAdaptiveConditionals getCherryConditionals combineDesc getOUExpectations getJointBranchMatrix getJointProbBranch getOUProbBranch getAllJointProbs fixEdgeLiksLiks OUwie.basic OUwie.basic.dev getMapFromSubstHistory getMapProb getPathMapProb getStateSampleProb getPathStateProb getInternodeStateSample getMapFromStateSample getInternodeMap getConditionalInternodeLik getEdgeLiks replaceName makeMapEdgesNumeric runSingleThorough hOUwie.fixed.dev hOUwie.dev

##### Main internal functions ##### 
hOUwie.dev <- function(p, phy, data, rate.cat, tip.fog,
                       index.disc, index.cont, root.p,
                       edge_liks_list, nSim, all.paths=NULL, 
                       sample_tips=FALSE, sample_nodes=FALSE,
                       adaptive_sampling=FALSE, split.liks=FALSE, 
                       global_liks_mat=global_liks_mat, diagn_msg=FALSE){
  tip.paths <- all.paths[1:length(phy$tip.label)]
  p <- exp(p)
  # check if these parameters exist in the global matrix
  # set(global_liks_mat, i = as.integer(1),  j = 1:4, value=as.list(c(0, p)))
  if(!is.null(global_liks_mat)){
    liks_match_vector <- colSums(t(global_liks_mat[,-1]) - p) == 0
    llik_houwie <- as.numeric(global_liks_mat[which(liks_match_vector), 1])
    if(!split.liks){
      if(any(liks_match_vector, na.rm = TRUE)){
        # print(llik_houwie)
        # print(p)
        return(-llik_houwie)
      }
    }
  }
  
  k <- max(index.disc, na.rm = TRUE)
  p.mk <- p[1:k]
  p.ou <- p[(k+1):length(p)] 
  Rate.mat <- matrix(1, 3, dim(index.disc)[2])
  alpha.na <- is.na(index.cont[1,])
  index.cont[is.na(index.cont)] <- max(index.cont, na.rm = TRUE) + 1
  Rate.mat[] <- c(p.ou, 1e-10)[index.cont]
  alpha = Rate.mat[1,]
  sigma.sq = Rate.mat[2,]
  theta = Rate.mat[3,]
  rate <- index.disc
  rate[is.na(rate)] <- k + 1
  Q <- matrix(0, dim(rate)[1], dim(rate)[2])
  Q[] <- c(p.mk, 0)[rate]
  diag(Q) <- -rowSums(Q)
  edge_liks_list_init <- edge_liks_list
  # altering the conditional probabilities based on jointly sampled decendent species
  if(sample_nodes){
    edge_liks_list <- try(getCherryConditionals(phy, data, Rate.mat, Q, edge_liks_list_init, tip.paths))
    if(inherits(edge_liks_list, what="try-error")){
    #if(class(edge_liks_list) == "try-error"){
      return(1e10)
    }
  }
  # a way to alter the conditional values of the tips based on current parameeter values (only meaningful if there are multiple rate cats)
  if(rate.cat > 1 & sample_tips){
    normal.params <- rbind(theta, sigma.sq)
    sample.tip.probs <- apply(normal.params, 2, function(x) dnorm(data[,3], x[1], sqrt(x[2])))
    for(i in 1:length(phy$tip.label)){
      if(all(sample.tip.probs[i,] == 0)){
        sample.tip.probs[i,] <- rep(1, length(sample.tip.probs[i,]))
      }
      sample_tip_i <- edge_liks_list[[i]][1,] * sample.tip.probs[i,]
      edge_liks_list[[i]][1,] <- sample_tip_i/sum(sample_tip_i)
    }
  }
  # get the condtional probabilities based on the discrete values
  for(recon_index in 1:length(edge_liks_list)){
    edge_liks_list[[recon_index]] <- edge_liks_list[[recon_index]] * edge_liks_list_init[[recon_index]]
  }
  conditional_probs <- getConditionalInternodeLik(phy, Q, edge_liks_list)
  root_liks <- getRootLiks(conditional_probs, Q, root.p)
  if(is.null(root_liks)){
    return(1e10)
  }
  # initial sample
  # sample mappings based on the conditional probabilites (also calculating some time saving probabilities from transitions to and from particular states)
  internode_maps_and_discrete_probs <- getInternodeMap(phy, Q, conditional_probs$edge_liks_list, conditional_probs$root_state, root_liks, nSim, check_vector = NA, max.attempts=nSim*2)
  internode_maps <- internode_maps_and_discrete_probs$maps
  internode_samples <- internode_maps_and_discrete_probs$state_samples
  check_vector <- unlist(lapply(internode_samples, function(x) paste0(unlist(x), collapse="")))
  # if additional samples are needed (didn't reach nSim), they are taken ~randomly
  if(length(internode_samples) < nSim){
    additional_sims <- nSim - length(internode_samples)
    random_internode_maps_and_discrete_probs <- getInternodeMap(phy, Q * 100, edge_liks_list_init, conditional_probs$root_state, root_liks, additional_sims, check_vector = check_vector, max.attempts=nSim*10)
    internode_maps <- c(internode_maps, random_internode_maps_and_discrete_probs$maps)
    internode_samples <- c(internode_samples, random_internode_maps_and_discrete_probs$state_samples)
    check_vector <- unlist(lapply(internode_samples, function(x) paste0(unlist(x), collapse="")))
  }
  # calculte the discrete probabilities based on the given Q matrix (Pij already calculated)
  discrete_probs <- lapply(internode_samples, function(x) getStateSampleProb(state_sample = x, Pij = internode_maps_and_discrete_probs$Pij, root_liks = root_liks, root_edges = internode_maps_and_discrete_probs$root_edges))
  llik_discrete <- unlist(discrete_probs)
  failed_maps <- discrete_probs == -Inf
  llik_discrete <- llik_discrete[!failed_maps]
  if(length(llik_discrete) == 0){
    return(1e10)
  }
  # generate maps
  simmaps <- getMapFromSubstHistory(internode_maps[!failed_maps], phy)
  # simmaps <- lapply(simmaps, correct_map_edges)
  # if there is no character dependence the map has no influence on continuous likleihood
  character_dependence_check <- all(apply(index.cont, 1, function(x) length(unique(x)) == 1))
  if(character_dependence_check){
    llik_continuous <- OUwie.basic(simmaps[[1]], data, simmap.tree=TRUE, scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq, theta=theta, algorithm="three.point", tip.paths=tip.paths, tip.fog=tip.fog)
    llik_continuous <- rep(llik_continuous, length(simmaps))
  }else{
    llik_continuous <- unlist(lapply(simmaps, function(x) OUwie.basic(x, data, simmap.tree=TRUE, scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq, theta=theta, algorithm="three.point", tip.paths=tip.paths, tip.fog=tip.fog)))
  }
  # combine probabilities being careful to avoid underflow
  llik_houwies <- llik_discrete + llik_continuous
  llik_houwie <- max(llik_houwies) + log(sum(exp(llik_houwies - max(llik_houwies))))
  if(!is.null(global_liks_mat)){
    if(diff(abs(c(llik_houwie,as.numeric(global_liks_mat[1,1])))) > 1e10){
      # houwie sometimes gets stuck optimizing models which have likely failed. so a quick LRT to check is implemented here
      return(1e10)
    }
    if(diff(abs(c(max(llik_discrete), max(llik_continuous))))  > 1e10){
      # another likely optimization error if the difference between the best map for discrete and continuous are over 100 orders of magnitude
      return(1e10)
    }
  }
  # after calculating the likelihoods of an intial set of maps, we sample potentially good maps
  if(adaptive_sampling & !character_dependence_check){
    adaptive_criteria <- FALSE
    adaptive_count <- 0
    best_mapping <- simmaps[[which.max(llik_houwies)]]
    while(!adaptive_criteria){
      adaptive_count <- adaptive_count + 1
      if(adaptive_count > 5){
        adaptive_criteria <- TRUE
      }
      # while we wait to meet some criteria
      # generate a set of expectations based on the best mapping
      current_ou_expectations <- getOUExpectations(best_mapping, Rate.mat, all.paths)
      # generate a new conditional probability based on the new expected values
      edge_liks_list <- try(getAdaptiveConditionals(phy, data, Rate.mat, Q, edge_liks_list_init, tip.paths, current_ou_expectations))
      if(inherits(edge_liks_list, what="try-error")){
      #if(class(edge_liks_list) == "try-error"){
        next
      }
      for(recon_index in 1:length(edge_liks_list)){
        edge_liks_list[[recon_index]] <- edge_liks_list[[recon_index]] * edge_liks_list_init[[recon_index]]
      }
      conditional_probs <- getConditionalInternodeLik(phy, Q, edge_liks_list)
      root_liks <- getRootLiks(conditional_probs, Q, root.p)
      if(is.null(root_liks)){
        next
      }
      # generate a new set of unique mappings based on the new conditional probabilities
      internode_maps_and_discrete_probs <- getInternodeMap(phy, Q, conditional_probs$edge_liks_list, conditional_probs$root_state, root_liks, nSim, check_vector = check_vector, max.attempts=nSim*2)
      if(length(internode_maps_and_discrete_probs$maps) == 0){
        # if no new maps were generated: generate the maps randomly
        internode_maps_and_discrete_probs <- getInternodeMap(phy, Q*100, edge_liks_list_init, conditional_probs$root_state, root_liks, nSim, check_vector = check_vector, max.attempts=nSim*10)
      }
      check_vector <- c(check_vector, unlist(lapply(internode_maps_and_discrete_probs$state_samples, function(x) paste0(unlist(x), collapse=""))))
      if(length(internode_maps_and_discrete_probs$maps) > 0){
        new_simmaps <- getMapFromSubstHistory(internode_maps_and_discrete_probs$maps, phy)
        if(length(new_simmaps) == 1){
          simmaps <- c(simmaps, new_simmaps[[1]])
        }else{
          simmaps <- c(simmaps, new_simmaps)
        }
        # evaluate the new mappings' joint likelhood
        discrete_probs <- lapply(internode_maps_and_discrete_probs$state_samples, function(x) getStateSampleProb(state_sample = x, Pij = internode_maps_and_discrete_probs$Pij, root_liks = root_liks, root_edges = internode_maps_and_discrete_probs$root_edges))
        continuous_probs <- lapply(new_simmaps, function(x) OUwie.basic(x, data, simmap.tree=TRUE, scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq, theta=theta, algorithm="three.point", tip.paths=tip.paths, tip.fog=tip.fog))
        new_liks <- unlist(continuous_probs) + unlist(discrete_probs)
        adaptive_criteria <- max(llik_houwies) > max(new_liks)
        llik_continuous <- c(llik_continuous, unlist(continuous_probs))
        llik_discrete <- c(llik_discrete, unlist(discrete_probs))
        llik_houwies <- llik_discrete + llik_continuous
      }else{
        adaptive_criteria <- TRUE
      }
      # return to the initial step if some criteria has not been met, else done.
    }
  }
  # find the best nSim mappings after adaptive sampling
  sorted_likelihoods <- sort(llik_houwies, decreasing = TRUE, index.return = TRUE)
  unsorted_lliks_df <- data.frame(llik_discrete=llik_discrete, llik_continuous=llik_continuous)
  llik_houwies <- llik_houwies[c(na.omit(sorted_likelihoods$ix[1:nSim]))]
  llik_discrete <- llik_discrete[c(na.omit(sorted_likelihoods$ix[1:nSim]))]
  llik_continuous <- llik_continuous[c(na.omit(sorted_likelihoods$ix[1:nSim]))]
  simmaps <- simmaps[c(na.omit(sorted_likelihoods$ix[1:nSim]))]
  # get the summed probabilities using tricks to prevent underflow
  llik_houwie <- max(llik_houwies) + log(sum(exp(llik_houwies - max(llik_houwies))))
  llik_discrete_summed <- max(llik_discrete) + log(sum(exp(llik_discrete - max(llik_discrete))))
  llik_continuous_summed <- max(llik_continuous) + log(sum(exp(llik_continuous - max(llik_continuous))))
  if(!is.null(global_liks_mat)){
    if(diff(abs(c(llik_houwie,as.numeric(global_liks_mat[1,1])))) > 1e10){
      # houwie sometimes gets stuck optimizing models which have likely failed. so a quick LRT to check is implemented here
      return(1e10)
    }
  }
  if(split.liks){
    # expected_vals <- lapply(simmaps, function(x) OUwie.basic(x, data, simmap.tree=TRUE, scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq, theta=theta, algorithm="three.point", tip.paths=tip.paths, tip.fog=tip.fog,return.expected.vals=TRUE))
    # expected_vals <- colSums(do.call(rbind, expected_vals) * exp(llik_houwies - max(llik_houwies))/sum(exp(llik_houwies - max(llik_houwies))))
    if(!is.na(as.numeric(global_liks_mat[which(liks_match_vector), 1]))){
      llik_houwie <- as.numeric(global_liks_mat[which(liks_match_vector), 1])
    }
    return(list(TotalLik = llik_houwie, DiscLik = llik_discrete_summed, ContLik = llik_continuous_summed, llik_discrete=llik_discrete, llik_continuous=llik_continuous, simmaps=simmaps, unsorted_lliks_df=unsorted_lliks_df))
  }
  if(!is.null(global_liks_mat)){
    new_row <- which(global_liks_mat$X1 == 0)[1]
    set(global_liks_mat, as.integer(new_row), names(global_liks_mat), as.list(c(llik_houwie, p)))
  }
  if(diagn_msg){
    print(c(round(llik_houwie, 2), round(llik_discrete_summed, 2), round(llik_continuous_summed, 2), round(p, 2)))
  }
  # print(p)
  return(-llik_houwie)
}

hOUwie.fixed.dev <- function(p, simmaps, data, rate.cat, tip.fog,
                             index.disc, index.cont, root.p, 
                             edge_liks_list, all.paths=NULL, 
                             sample_tips=FALSE, sample_nodes=FALSE,
                             split.liks=FALSE, adaptive_sampling=FALSE,
                             global_liks_mat=global_liks_mat, diagn_msg=FALSE){
  tip.paths <- all.paths[1:length(simmaps[[1]]$tip.label)]
  p <- exp(p)
  # check if these parameters exist in the global matrix
  # set(global_liks_mat, i = as.integer(1),  j = 1:4, value=as.list(c(0, p)))
  if(!is.null(global_liks_mat)){
    liks_match_vector <- colSums(t(global_liks_mat[,-1]) - p) == 0
    llik_houwie <- as.numeric(global_liks_mat[which(liks_match_vector), 1])
    if(!split.liks){
      if(any(liks_match_vector, na.rm = TRUE)){
        # print(llik_houwie)
        # print(p)
        return(-llik_houwie)
      }
    }
  }
  
  k <- max(index.disc, na.rm = TRUE)
  p.mk <- p[1:k]
  p.ou <- p[(k+1):length(p)] 
  Rate.mat <- matrix(1, 3, dim(index.disc)[2])
  alpha.na <- is.na(index.cont[1,])
  index.cont[is.na(index.cont)] <- max(index.cont, na.rm = TRUE) + 1
  Rate.mat[] <- c(p.ou, 1e-10)[index.cont]
  alpha = Rate.mat[1,]
  sigma.sq = Rate.mat[2,]
  theta = Rate.mat[3,]
  rate <- index.disc
  rate[is.na(rate)] <- k + 1
  Q <- matrix(0, dim(rate)[1], dim(rate)[2])
  Q[] <- c(p.mk, 0)[rate]
  diag(Q) <- -rowSums(Q)
  # calculte the discrete probabilities based on the given Q matrix (Pij already calculated)
  if(inherits(root.p[1], what="character")){
  #if(class(root.p)[1] == "character"){
    if(root.p == "yang"){
      root_liks <- c(MASS::Null(Q))
      root_liks <- root_liks/sum(root_liks)
    }
    if(root.p == "flat"){
      root_liks <- rep(1/dim(Q)[1], dim(Q)[1])
    }
  }else{
    root_liks <- root.p/sum(root.p)
  }
  
  discrete_probs <- lapply(simmaps, function(x) getMapProb(x, Q, root_liks))
  llik_discrete <- unlist(discrete_probs)
  failed_maps <- discrete_probs == -Inf
  llik_discrete <- llik_discrete[!failed_maps]
  if(length(llik_discrete) == 0){
    return(1e10)
  }
  # if there is no character dependence the map has no influence on continuous likleihood
  character_dependence_check <- all(apply(index.cont, 1, function(x) length(unique(x)) == 1))
  if(character_dependence_check){
    llik_continuous <- OUwie.basic(simmaps[[1]], data, simmap.tree=TRUE, scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq, theta=theta, algorithm="three.point", tip.paths=tip.paths, tip.fog=tip.fog)
    llik_continuous <- rep(llik_continuous, length(simmaps))
  }else{
    llik_continuous <- unlist(lapply(simmaps, function(x) OUwie.basic(x, data, simmap.tree=TRUE, scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq, theta=theta, algorithm="three.point", tip.paths=tip.paths, tip.fog=tip.fog)))
  }
  # combine probabilities being careful to avoid underflow
  llik_houwies <- llik_discrete + llik_continuous
  llik_houwie <- max(llik_houwies) + log(sum(exp(llik_houwies - max(llik_houwies))))
  llik_discrete_summed <- max(llik_discrete) + log(sum(exp(llik_discrete - max(llik_discrete))))
  llik_continuous_summed <- max(llik_continuous) + log(sum(exp(llik_continuous - max(llik_continuous))))
  
  # after calculating the likelihoods of an intial set of maps, we sample potentially good maps
  # find the best nSim mappings after adaptive sampling
  if(split.liks){
    # expected_vals <- lapply(simmaps, function(x) OUwie.basic(x, data, simmap.tree=TRUE, scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq, theta=theta, algorithm="three.point", tip.paths=tip.paths, tip.fog=tip.fog,return.expected.vals=TRUE))
    # expected_vals <- colSums(do.call(rbind, expected_vals) * exp(llik_houwies - max(llik_houwies))/sum(exp(llik_houwies - max(llik_houwies))))
    if(!is.na(as.numeric(global_liks_mat[which(liks_match_vector), 1]))){
      llik_houwie <- as.numeric(global_liks_mat[which(liks_match_vector), 1])
    }
    return(list(TotalLik = llik_houwie, DiscLik = llik_discrete_summed, ContLik = llik_continuous_summed, llik_discrete=llik_discrete, llik_continuous=llik_continuous, simmaps=simmaps))
  }
  if(!is.null(global_liks_mat)){
    new_row <- which(global_liks_mat$X1 == 0)[1]
    set(global_liks_mat, as.integer(new_row), names(global_liks_mat), as.list(c(llik_houwie, p)))
  }
  if(diagn_msg){
    print(c(round(llik_houwie, 2), round(llik_discrete_summed, 2), round(llik_continuous_summed, 2), round(p, 2)))
  }
  # print(p)
  return(-llik_houwie)
}

# internal for houwie.thorough
runSingleThorough <- function(houwie_obj, new_maps, init_pars){
  hOUwie.dat <- houwie_obj$hOUwie.dat
  root.p <- houwie_obj$root.p
  tip.fog <- houwie_obj$tip.fog
  rate.cat <- houwie_obj$rate.cat
  index.disc <- houwie_obj$index.disc
  n_p_trans <- max(index.disc, na.rm = TRUE)
  p_disc <- na.omit(c(init_pars$mod_avg_disc))[1:n_p_trans]
  index.cont <- houwie_obj$index.cont
  n_p_alpha <- length(unique(na.omit(index.cont[1,])))
  n_p_sigma <- length(unique(na.omit(index.cont[2,])))
  n_p_theta <- length(unique(na.omit(index.cont[3,])))
  p_cont <- c(init_pars$mod_avg_cont[1,seq_len(n_p_alpha)],
              init_pars$mod_avg_cont[2,seq_len(n_p_sigma)],
              init_pars$mod_avg_cont[3,seq_len(n_p_theta)])
  ip <- c(p_disc, p_cont)
  res <- hOUwie.fixed(simmaps = new_maps, data = hOUwie.dat$data.ou, rate.cat = rate.cat, discrete_model = index.disc, continuous_model = index.cont, adaptive_sampling = FALSE, make_numeric = FALSE, ip = ip)
  return(res)
}

makeMapEdgesNumeric <- function(simmap, observed_traits){
  simmap$maps <- lapply(simmap$maps, function(x) replaceName(x, observed_traits))
  colnames(simmap$mapped.edge) <- as.numeric(names(observed_traits)[match(observed_traits, colnames(simmap$mapped.edge))])
  return(simmap)
}

replaceName <- function(edge, observed_traits){
  names(edge) <- as.numeric(names(observed_traits)[match(names(edge), observed_traits)])
  return(edge)
}

getEdgeLiks <- function(phy, data, n.traits, rate.cat, time_slice){
  edge_liks_list <- vector("list", dim(phy$edge)[1])
  nTip <- length(phy$tip.label)
  for(edge_i in 1:dim(phy$edge)[1]){
    # +2 because we slice the middle of the branch and need 2 terminal nodes (ancestor and descendent)
    n_slice <- (phy$edge.length[edge_i] %/% time_slice) + 2
    edge_liks_list[[edge_i]] <- matrix(1, n_slice, n.traits * rate.cat)
    if(phy$edge[edge_i,2] <= nTip){
      tmp <- numeric(n.traits)
      species_i <- phy$tip.label[phy$edge[edge_i,2]]
      if(!species_i %in% data[,1]){
        next
      }
      state_i <- data[data[,1] == species_i, 2]
      state_i_index<- as.numeric(unlist(strsplit(as.character(state_i), "&")))
      tmp[state_i_index] <- 1
      edge_liks_list[[edge_i]][1,] <- rep(tmp, rate.cat)
    }
  }
  return(edge_liks_list)
}

getConditionalInternodeLik <- function(phy, Q, edge_liks_list){
  nTip <- length(phy$tip.label)
  external_index <- which(phy$edge[,2] <= nTip)
  # external edges
  for(edge_i in external_index){
    # move rootward along all tips to their root
    n_edges <- dim(edge_liks_list[[edge_i]])[1] - 1
    time_edge <- phy$edge.length[edge_i]
    p_mat_i <- expm(Q * (time_edge/n_edges), method=c("Ward77"))
    for(inter_edge_i in 2:(n_edges+1)){
      dec_states <- edge_liks_list[[edge_i]][inter_edge_i-1,]
      v <- edge_liks_list[[edge_i]][inter_edge_i,] * c(p_mat_i %*% dec_states )
      edge_liks_list[[edge_i]][inter_edge_i,] <- v/sum(v)
    }
  }
  # internal edges
  anc <- unique(phy$edge[,1])
  # remove the root
  root <- anc[length(anc)]
  anc <- anc[-length(anc)]
  for(anc_i in anc){
    # for the start of an internal node, combine the decs
    edge_i <- which(phy$edge[,2] == anc_i)
    dec_combo_index_i <- which(phy$edge[,1] == anc_i)
    v <- 1
    for(j in dec_combo_index_i){
      liks_j <- edge_liks_list[[j]]
      v <- v * liks_j[dim(liks_j)[1],]
    }
    v <- edge_liks_list[[edge_i]][1,] * v
    edge_liks_list[[edge_i]][1,] <- v/sum(v)
    n_edges <- dim(edge_liks_list[[edge_i]])[1] - 1
    time_edge <- phy$edge.length[edge_i]
    p_mat_i <- expm(Q * (time_edge/n_edges), method=c("Ward77"))
    for(inter_edge_i in 2:(n_edges+1)){
      dec_states <- edge_liks_list[[edge_i]][inter_edge_i-1,]
      v <- edge_liks_list[[edge_i]][inter_edge_i,] * c(p_mat_i %*% dec_states )
      edge_liks_list[[edge_i]][inter_edge_i,] <- v/sum(v)
    }
  }
  # do the root
  dec_combo_index_i <- which(phy$edge[,1] == root)
  v <- 1
  for(j in dec_combo_index_i){
    liks_j <- edge_liks_list[[j]]
    v <- v * edge_liks_list[[j]][dim(liks_j)[1],]
  }
  root_state <- v/sum(v)
  return(list(root_state = root_state,
              edge_liks_list = edge_liks_list))
}

getInternodeMap <- function(phy, Q, edge_liks_list, root_state, root_liks, nSim, check_vector=NULL, max.attempts){
  # set-up
  current.attempts <- 0
  nStates <- dim(Q)[1]
  nTip <- length(phy$tip.label)
  # a potential speedup is to calculate all Pij (bollback eq.3) for all branches first
  Pij <- array(0, c(dim(Q)[1], dim(Q)[2], length(phy$edge.length)))
  # reduced edge.lengths since we are including internodes
  number_of_nodes_per_edge <- unlist(lapply(edge_liks_list, function(x) dim(x)[1]))
  number_of_edges_per_edge <- number_of_nodes_per_edge - 1
  reduced_edge_length <- phy$edge.length/number_of_edges_per_edge
  for(i in 1:length(phy$edge.length)){
    Pij[,,i] <- expm(Q * reduced_edge_length[i])
  }
  # the probability of a descendent being in state j given starting in the row of the Pj matrix
  Pj <- vector("list", length(phy$edge.length))
  for(i in 1:length(Pj)){
    Pj[[i]] <- array(0, c(dim(Q)[1], dim(Q)[2], number_of_nodes_per_edge[i]))
    for(j in 1:number_of_nodes_per_edge[i]){
      Pj[[i]][,,j] <- sweep(Pij[,,i], MARGIN = 2, edge_liks_list[[i]][j,], '*') 
    }
  }
  # simulate nSim substitution histories
  rev.pruning.order <- rev(reorder.phylo(phy, "pruningwise", index.only = TRUE))
  sub_histories <- vector("list", nSim)
  root_edges <- which(phy$edge[,1] == nTip + 1)
  edge_index <- phy$edge
  Map_i <- mapply(function(x, y) rep(x, y), x=reduced_edge_length/2, y=number_of_edges_per_edge*2, SIMPLIFY = FALSE)
  if(!is.null(check_vector)){
    state_samples <- vector("list", nSim)
    sim_counter <- 0
    while(!(sim_counter >= nSim | current.attempts >= max.attempts)){
      state_sample <- try(getInternodeStateSample(Pj, root_state, root_edges, rev.pruning.order, edge_index, nStates, number_of_nodes_per_edge), silent = TRUE)
      if(inherits(state_sample, "try-error")){
        current.attempts <- current.attempts + 1
      }else{
        current_mapping_id <- paste0(unlist(state_sample), collapse="")
        if(!current_mapping_id %in% check_vector){
          sim_counter <- sim_counter + 1
          state_samples[[sim_counter]] <- state_sample
          check_vector <- c(check_vector, current_mapping_id)
        }else{
          current.attempts <- current.attempts + 1
        }
      }
    }
  }else{
    state_samples <- lapply(1:nSim, function(x) try(getInternodeStateSample(Pj, root_state, root_edges, rev.pruning.order, edge_index, nStates, number_of_nodes_per_edge), silent = TRUE))
    state_samples <- state_samples[unlist(lapply(state_samples, class)) != "try-error"]
  }
  mapping_ids <- unlist(lapply(state_samples, function(x) paste0(unlist(x), collapse="")))
  state_samples <- state_samples[!duplicated(mapping_ids, nmax = 1)]
  mapping_ids <- unlist(lapply(state_samples, function(x) paste0(unlist(x), collapse="")))
  state_samples <- state_samples[!mapping_ids == ""]
  maps <- lapply(state_samples, function(x) getMapFromStateSample(Map_i, x))
  return(list(state_samples=state_samples, maps = maps, root_edges=root_edges,
              Pij = Pij, Pj = Pj))
}

getMapFromStateSample <- function(map, state_sample){
  for(edge_i in 1:length(map)){
    state_transitions <- rep(state_sample[[edge_i]][-c(1, length(state_sample[[edge_i]]))], each = 2)
    state_samples_i <- c(state_sample[[edge_i]][1],state_transitions,state_sample[[edge_i]][length(state_sample[[edge_i]])])
    names(map[[edge_i]]) <- state_samples_i
  }
  return(map)
}

getInternodeStateSample <- function(Pj, root_state, root_edge, rev.pruning.order, edge_index, nStates, number_of_nodes_per_edge){
  # each map will have edges split into equal time portions
  state_samples <- lapply(number_of_nodes_per_edge, function(x) numeric(x))
  root_sample <- sample(1:nStates, 1, prob = root_state)
  for(i in root_edge){
    state_samples[[i]][1] <- root_sample
  }
  # sample the nodes along a branch the last dec node goes into the next map
  for(edge_i in rev.pruning.order){
    from <- state_samples[[edge_i]][1]
    count <- 2
    n_inter_nodes <- length(state_samples[[edge_i]])
    for(inter_edge_i in (n_inter_nodes-1):1){
      to <- sample(1:nStates, 1, prob = Pj[[edge_i]][from,,inter_edge_i])
      from <- state_samples[[edge_i]][count] <- to
      count <- count + 1
    }
    ancestor_to_add <- edge_index[edge_i,2]
    anc_edge <- which(ancestor_to_add == edge_index[,1])
    for(i in anc_edge){
      state_samples[[i]][1] <- to
    }
  }
  return(state_samples)
}

# get path probability internal
getPathStateProb <- function(path_states, p_mat){
  P <- vector("numeric", length(path_states)-1)
  for(i in 1:(length(path_states)-1)){
    P[i] <- p_mat[path_states[1],path_states[2]]
    path_states <- path_states[-1]
  }
  return(sum(log(P)))
}

getStateSampleProb <- function(state_sample, Pij, root_liks, root_edges){
  path_probs <- numeric(length(state_sample))
  root_sample <- state_sample[[root_edges[1]]][1] # the root sample
  for(i in 1:length(state_sample)){
    path_probs[i] <- getPathStateProb(state_sample[[i]], Pij[,,i])
  }
  llik <- sum(path_probs) + log(root_liks[root_sample])
  return(llik)
}

# probability of a particular stochastic map
getPathMapProb <- function(map_edge, Q){
  path_states <- as.numeric(c(names(map_edge)[1], names(map_edge)[length(map_edge)]))
  P <- expm(Q * sum(map_edge))[path_states[1],path_states[2]]
  return(log(P))
}

getMapProb <- function(simmap, Q, root_prior){
  path_probs <- lapply(simmap$maps, function(x) getPathMapProb(x, Q))
  root_state <- as.numeric(names(simmap$maps[[which.min(simmap$edge[,1])]][1]))
  p_vec <- unlist(path_probs)
  llik <- sum(c(p_vec, log(root_prior)[root_state]))
  # llik <- sum(pathway_liks)
  return(llik)
}



# take substition histories and make them simmaps
getMapFromSubstHistory <- function(maps, phy){
  mapped.edge <- lapply(maps, function(x) corHMM:::convertSubHistoryToEdge(phy, x))
  obj <- vector("list", length(maps))
  for (i in 1:length(maps)){
    tree.simmap <- phy
    tree.simmap$maps <- maps[[i]]
    tree.simmap$mapped.edge <- mapped.edge[[i]]
    attr(tree.simmap, "map.order") <- "right-to-left"
    if (!inherits(tree.simmap, "simmap")) 
      class(tree.simmap) <- c("simmap", setdiff(class(tree.simmap), 
                                                "simmap"))
    obj[[i]] <- tree.simmap
  }
  if (length(maps) > 1) {
    class(obj) <- c("multiSimmap", "multiPhylo")
  }
  return(obj)
  
}
# a basic optimization for OUwie basic
OUwie.basic.dev <- function(p, phy, data, tip.fog, index.cont, tip.paths=NULL){
  p <- exp(p)
  index.cont[is.na(index.cont)] <- max(index.cont, na.rm = TRUE) + 1
  Rate.mat <- matrix(1, 3, dim(index.cont)[2])
  Rate.mat[] <- c(p, 1e-10)[index.cont]
  alpha = Rate.mat[1,]
  sigma.sq = Rate.mat[2,]
  theta = Rate.mat[3,]
  llik_continuous <- OUwie.basic(phy, data, simmap.tree=TRUE, scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq, theta=theta, algorithm="three.point", tip.paths=tip.paths, tip.fog=tip.fog)
  return(-llik_continuous)
}


# probability of the continuous parameter
OUwie.basic<-function(phy, data, simmap.tree=TRUE, root.age=NULL, scaleHeight=FALSE, root.station=FALSE, get.root.theta=FALSE, shift.point=0.5, alpha, sigma.sq, theta, tip.fog="none", algorithm="three.point", tip.paths=NULL, return.expected.vals=FALSE){
  # organize tip states based on what the simmap suggests
  mapping <- unlist(lapply(phy$maps, function(x) names(x[length(x)])))
  nTip <- length(phy$tip.label)
  TipStates <- mapping[match(match(data[,1], phy$tip.label), phy$edge[,2])]
  data[,2] <- TipStates
  
  #Makes sure the data is in the same order as the tip labels
  if(tip.fog=="none"){
    data <- data.frame(data[,2], data[,3], row.names=data[,1])
    data <- data[phy$tip.label,]
  }
  if(tip.fog=="known"){
    # algorithm = "invert"
    if(!dim(data)[2]==4){
      stop("You specified measurement error should be incorporated, but this information is missing.", call. = FALSE)
    }
    else{
      if(is.factor(data[,4]) == TRUE){
        stop("Check the format of the measurement error column. It's reading as a factor.",  call. = FALSE)
      }else{
        data <- data.frame(data[,2], data[,3], data[,4], row.names=data[,1])
        data <- data[phy$tip.label,]
      }
    }
  }
  
  #Values to be used throughout
  n <- max(phy$edge[,1])
  ntips <- length(phy$tip.label)
  
  # setup values when simmap (always simmap for hOUwie)
  k <- length(colnames(phy$mapped.edge))
  tot.states <- factor(colnames(phy$mapped.edge))
  tip.states <- factor(data[,1])
  data[,1] <- as.numeric(tip.states)
  #Obtains the state at the root
  root.edge.index <- which(phy$edge[,1] == ntips+1)
  root.state <- which(colnames(phy$mapped.edge)==names(phy$maps[[root.edge.index[2]]][1]))
  ##Begins the construction of the edges matrix -- similar to the ouch format##
  edges <- cbind(c(1:(n-1)),phy$edge,MakeAgeTable(phy, root.age=root.age))
  if(scaleHeight == TRUE){
    Tmax <- max(MakeAgeTable(phy, root.age=root.age))
    edges[,4:5]<-edges[,4:5]/Tmax
    root.age <-  1
    phy$maps <- lapply(phy$maps, function(x) x/Tmax)
  }
  edges <- edges[sort.list(edges[,3]),]
  #Resort the edge matrix so that it looks like the original matrix order
  edges <- edges[sort.list(edges[,1]),]
  
  if(algorithm == "three.point"){
    x <- data[,2]
    names(x) <- rownames(data)
  }else{
    x <- as.matrix(data[,2])
  }
  
  if(scaleHeight==TRUE){
    phy$edge.length <- phy$edge.length/Tmax
    Tmax <- 1
  }
  
  map <- phy$maps
  
  Rate.mat <- rbind(alpha, sigma.sq, theta)
  pars <- matrix(c(theta, sigma.sq, alpha), length(theta), 3, dimnames = list(1:length(sigma.sq), c("opt", "sig", "alp")))
  # if the simmap did not simulate every possible state in a given hmm
  if(dim(phy$mapped.edge)[2] != dim(Rate.mat)[2]){
    Rate.mat <- Rate.mat[,as.numeric(colnames(phy$mapped.edge))]
    pars <- pars[as.numeric(colnames(phy$mapped.edge)), ]
  }
  
  if(get.root.theta == TRUE){
    W <- weight.mat(phy, edges, Rate.mat, root.state=root.state, simmap.tree=simmap.tree, root.age=root.age, scaleHeight=scaleHeight, assume.station=FALSE, shift.point=shift.point)
  }else{
    W <- weight.mat(phy, edges, Rate.mat, root.state=root.state, simmap.tree=simmap.tree, root.age=root.age, scaleHeight=scaleHeight, assume.station=TRUE, shift.point=shift.point)
  }
  
  #Likelihood function for estimating model parameters
  if(get.root.theta == TRUE){
    root_index <- as.numeric(names(phy$maps[[which.min(phy$edge[,1])[1]]]))
    theta0 <- theta[root_index]
    expected.vals <- colSums(t(W) * c(theta0, pars[,1]))
    names(expected.vals) <- phy$tip.label
  }else{
    expected.vals <- colSums(t(W) * pars[,1])
    names(expected.vals) <- phy$tip.label
  }
  if(return.expected.vals){
    return(expected.vals)
  }
  transformed.tree <- transformPhy(phy, map, pars, tip.paths)
  # generate a map from node based reconstructions
  if(tip.fog=="known"){
    TIPS <- transformed.tree$tree$edge[,2] <= length(transformed.tree$tree$tip.label)
    transformed.tree$tree$edge.length[TIPS] <- transformed.tree$tree$edge.length[TIPS] + (data[,3]^2/transformed.tree$diag/transformed.tree$diag)
  }
  comp <- NA
  try(comp <- phylolm::three.point.compute(transformed.tree$tree, x, expected.vals, transformed.tree$diag, check.precision = FALSE), silent=TRUE) # for the newest version (github) of phylolm
  if(is.na(comp[1])){
    try(comp <- phylolm::three.point.compute(transformed.tree$tree, x, expected.vals, transformed.tree$diag), silent=TRUE) # for the cran version of phylolm
  }
  if(is.na(comp[1])){
    return(-1e10)
  }else{
    nTips <- length(phy$tip.label)
    logl <- -as.numeric(nTips * log(2 * pi) + comp$logd + (comp$PP - 2 * comp$QP + comp$QQ))/2
    return(logl)
  }
}

# script for generating all the possible underlying mappings and looking at joint probablity. using this we can look at the bias produced by looking only at the discrete mappings.
fixEdgeLiksLiks <- function(edge_liks_list, combo, phy, n_tips, n_nodes, n_internodes, nStates, rate.cat){
  # fix the externals
  for(j in 1:n_tips){
    tip_index <- n_nodes + n_internodes + which(phy$edge[phy$edge[,2] <= n_tips,2] == j)
    tip_state <- combo[tip_index]
    dec_edges_to_fix <- which(phy$edge[,2] == j)
    fix_vector <- numeric(nStates * rate.cat)
    fix_vector[tip_state] <- 1
    for(k in dec_edges_to_fix){
      edge_liks_list[[k]][1,] <- fix_vector
    }
  }
  # fix the internals
  for(j in 1:n_nodes){
    node_index <- unique(phy$edge[,1])[j]
    node_state <- combo[j]
    anc_edges_to_fix <- which(phy$edge[,1] == node_index)
    dec_edges_to_fix <- which(phy$edge[,2] == node_index)
    fix_vector <- numeric(nStates * rate.cat)
    fix_vector[node_state] <- 1
    for(k in dec_edges_to_fix){
      edge_liks_list[[k]][1,] <- fix_vector
    }
    for(k in anc_edges_to_fix){
      last_row <- dim(edge_liks_list[[k]])[1]
      edge_liks_list[[k]][last_row,] <- fix_vector
    }
  }
  # fix the inter nodes
  if(n_internodes > 0){
    for(j in 1:n_internodes){
      internode_index_list <- which(unlist(lapply(edge_liks_list, function(x) dim(x)[1] - 2)) >= 1)[j]
      internode_state <- combo[j + n_nodes]
      internode_index_edge <- which(apply(edge_liks_list[[internode_index_list]], 1, function(x) sum(x) > 1))[1]
      fix_vector <- numeric(nStates * rate.cat)
      fix_vector[internode_state] <- 1
      edge_liks_list[[internode_index_list]][internode_index_edge,] <- fix_vector
    }
  }
  return(edge_liks_list)
}

getAllJointProbs<- function(phy, data, rate.cat, time_slice, Q, alpha, sigma.sq, theta, quiet=TRUE){
  # prerequisites
  hOUwie.dat <- organizeHOUwieDat(data, "none", TRUE)
  nStates <- as.numeric(max(hOUwie.dat$data.cor[,2]))
  tip.paths <- lapply(1:length(phy$tip.label), function(x) getPathToRoot(phy, x))
  # generate the edge_liks_list
  edge_liks_list <- getEdgeLiks(phy, hOUwie.dat$data.cor, nStates, rate.cat, time_slice)
  # determine all the possible ways to fix the node states
  # how many nodes and internodes are we fixing?
  n_tips <- length(phy$tip.label)
  n_nodes <- n_tips - 1
  n_internodes <- sum(unlist(lapply(edge_liks_list, function(x) dim(x)[1] - 2)))
  # what are the possible internal combinations?
  internal_possibilities <- rep(list(1:(nStates*rate.cat)), n_nodes + n_internodes)
  external_possibilities <- lapply(edge_liks_list[phy$edge[,2] <= n_tips], function(x) which(x[1,] == 1))
  all_combinations <- expand.grid(c(internal_possibilities, external_possibilities))
  # the joint probability table
  joint_probability_table <- matrix(NA, dim(all_combinations)[1], 3, dimnames = list(1:dim(all_combinations)[1], c("disc", "cont", "total")))
  simmap_list <- vector("list", dim(all_combinations)[1])
  if(!quiet){
    cat("Begining to calcualte all possible map combinations...\n")
  }
  # for each possibility, generate an edge_liks_list to match
  for(i in 1:dim(all_combinations)[1]){
    if(!quiet){
      cat("\r", i, "of", dim(all_combinations)[1], "complete.         ")
    }
    combo_i <- as.numeric(all_combinations[i,])
    root_state <- numeric(nStates * rate.cat)
    root_state[combo_i[which(unique(phy$edge[,1]) == n_tips + 1)[1]]] <- 1
    edge_liks_list_i <- fixEdgeLiksLiks(edge_liks_list, combo_i, phy, n_tips, n_nodes, n_internodes, nStates, rate.cat)
    root_liks <- rep(1, nStates * rate.cat)/(nStates * rate.cat)
    # calculate the discrete probability of the edge_liks_list
    tmp <- getInternodeMap(phy, Q, edge_liks_list_i, root_state, root_liks, 1)
    internode_maps <- tmp$maps
    internode_samples <- tmp$state_samples
    llik_discrete <- unlist(lapply(internode_samples, function(x) getStateSampleProb(state_sample = x, Pij = tmp$Pij, root_liks = root_liks, root_edges = tmp$root_edges)))
    # generate a stochstic map
    simmaps <- getMapFromSubstHistory(internode_maps, phy)
    llik_continuous <- unlist(lapply(simmaps, function(x) OUwie.basic(x, data, simmap.tree=TRUE, scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq, theta=theta, algorithm="three.point", tip.paths=tip.paths, tip.fog="none")))
    simmap_list[[i]] <- simmaps[[1]]
    joint_probability_table[i,] <- c(llik_discrete, llik_continuous, llik_discrete + llik_continuous)
  }
  if(!quiet){
    cat("\n")
  }
  simmap_list <- lapply(simmap_list, correct_map_edges)
  class(simmap_list) <- c("multiSimmap", "multiPhylo")
  return(list(joint_probability_table=joint_probability_table, simmap_list=simmap_list, all_combinations=all_combinations))
}

# branchwise joint calculations for conditional probabilities
getOUProbBranch <- function(tip_value, states, pars, bl, init_value, init_var){
  # states are read from tip to root states[1] = tip value, states[2] = node value
  times <- c(0, bl/2, bl)
  alphas <- c(pars[1,states])
  sigma2s <- c(pars[2,states])
  thetas <- c(pars[3,states])
  if(is.null(init_value)){
    values <- c(thetas[1], thetas)
  }else{
    values <- c(init_value, thetas)
  }
  # finding the expected value
  tip_weight <- exp(-((alphas[1] * (times[2] - times[1])) + (alphas[2] * (times[3] - times[2]))))
  theta_1_weight <- tip_weight * (exp(alphas[1] * times[2]) - exp(alphas[1] * times[1]))
  theta_2_weight <- tip_weight * (exp(alphas[2] * times[3]) - exp(alphas[2] * times[2]))
  weights <- c(tip_weight, theta_1_weight, theta_2_weight)/sum(c(tip_weight, theta_1_weight, theta_2_weight))
  expected_value <- sum(values * weights)
  # finding the variance 
  var_weight <- exp(-((2 * alphas[1] * (times[2] - times[1])) + (2 * alphas[2] * (times[2] - times[1]))))
  var_1 <- sigma2s[1]/(2*alphas[1]) * (exp(2 * alphas[1] * times[2]) - exp(2 * alphas[1] * times[1]))
  var_2 <- sigma2s[2]/(2*alphas[2]) * (exp(2 * alphas[2] * times[3]) - exp(2 * alphas[2] * times[2]))
  variance <- (sum(c(var_1, var_2)) * var_weight) + init_var
  loglik <- dnorm(tip_value, expected_value, sqrt(variance), log = TRUE)
  return(loglik)
}

getJointProbBranch <- function(states, tip_value, pars, bl, P_mat, init_value, init_var){
  coninuous_prob <- getOUProbBranch(tip_value, states, pars, bl, init_value, init_var)
  discrete_prob <- log(P_mat[states[1], states[2]])
  joint_prob <- sum(coninuous_prob, discrete_prob)
  return(joint_prob)
}

getJointBranchMatrix <- function(possible_internal, possible_external, tip_value, pars, bl, P_mat, init_value=NULL, init_var = 0){
  cond_matrix <- matrix(NA, dim(P_mat)[1], dim(P_mat)[2])
  for(i in possible_internal){
    for(j in possible_external){
      cond_matrix[i,j] <- getJointProbBranch(c(i,j), tip_value, pars, bl, P_mat, init_value, init_var)
    }
  }
  colnames(cond_matrix) <- rownames(cond_matrix) <- c(1:max(possible_internal))
  return(cond_matrix)
}

# a function for getting OU expectations per node and tip: both variance and means
getOUExpectations <- function(simmap, Rate.mat, tip.paths=NULL){
  Rate.mat[is.na(Rate.mat)] <- 1e-10
  # phy must be of class simmap
  nTip <- length(simmap$tip.label)
  if(is.null(tip.paths)){
    tip.paths <- lapply(1:(nTip*2-1), function(x) getPathToRoot(simmap, x))
  }
  # find the root state and its theta value
  root_state <- as.numeric(names(simmap$maps[[which.min(simmap$edge[,1])]])[1])
  expected_vars <- expected_values <- vector("numeric", length(tip.paths))
  for(i in 1:length(tip.paths)){
    # evaluate the map for this particular edge and calculate the tipward variance
    Map_i <- simmap$maps[tip.paths[[i]]]
    if(length(Map_i) == 0){
      next # we are at the root
    }
    # get the branch specific parameters
    branch_lengths_i <- unlist(rev(Map_i))
    alpha_values <- Rate.mat[1, as.numeric(names(branch_lengths_i))]
    sigma_values <- Rate.mat[2, as.numeric(names(branch_lengths_i))]
    theta_values <- Rate.mat[3, as.numeric(names(branch_lengths_i))]
    # get the ancestral weight value
    ancestral_weight <- exp(-sum(branch_lengths_i * alpha_values))
    # get the ancestral weight variance
    ancestral_var <- exp(-sum(2 * branch_lengths_i * alpha_values))
    # get the weight of each theta per branch
    dist_from_root_i <- c(0, cumsum(branch_lengths_i))
    var_weights <- theta_weights <- vector("numeric", length(dist_from_root_i))
    for(j in 2:length(dist_from_root_i)){
      # doing means
      time_2_j <- exp(dist_from_root_i[j] * alpha_values[j-1])
      time_1_j <- exp(dist_from_root_i[j-1] * alpha_values[j-1])
      weight_j <- ancestral_weight * (time_2_j - time_1_j)
      theta_weights[j] <- weight_j
      # doing variance
      var_2 <- exp(2 * dist_from_root_i[j] * alpha_values[j-1])
      var_1 <- exp(2 * dist_from_root_i[j-1] * alpha_values[j-1])
      var_weights[j] <- (sigma_values[j-1]/(2 * alpha_values[j-1])) * (var_2 - var_1)
    }
    # doing the means
    theta_weights[1] <- ancestral_weight # add the ancestral weight 
    theta_weights <- theta_weights/sum(theta_weights) # standardize the weights to sum to one 
    theta_values <- c(Rate.mat[3, root_state], theta_values) # add the root theta to the branch values
    expected_value <- sum(theta_values * theta_weights)
    expected_values[i] <- expected_value
    # doing the variance
    expected_vars[i] <- sum(var_weights) * ancestral_var
  }
  expected_values[expected_values == 0] <- Rate.mat[3, root_state] # add the root as an expecation
  names(expected_vars) <- names(expected_values) <- 1:(nTip*2-1) # named by node number
  return(list(expected_means=expected_values, expected_variances=expected_vars))
}

combineDesc <- function(P_mat_1, P_mat_2){
  # P_mat_1 <- node_mat[[1]]
  # P_mat_2 <- node_mat[[2]]
  normalized_combined_probs <- vector("list", dim(P_mat_1)[1])
  for(i in 1:dim(P_mat_1)[1]){
    combined_probs_i <- P_mat_1[i,] * t(P_mat_2)
    normalized_combined_probs[[i]] <- t(combined_probs_i)/colSums(combined_probs_i)
  }
  normalized_combined_probs <- do.call(rbind, normalized_combined_probs)
  return(normalized_combined_probs)  
}


# a function for generating an altnerative conditional probabilities (cherry sampling of continuous)
getCherryConditionals <- function(phy, data, Rate.mat, Q, edge_liks_list,tip.paths){
  node_ages <- branching.times(phy)
  dec_liks <- do.call(rbind, lapply(edge_liks_list, function(x) x[1,]))
  anc <- unique(phy$edge[,1])
  possible_internal <- 1:dim(Q)[1]
  for(i in 1:length(anc)){
    # whatever tip paths are chosen, they must include each of these
    # i = 3
    node_edges <- which(phy$edge[,1] == anc[i])
    bl <- node_ages[names(node_ages) == anc[i]]
    node_mat <- vector("list", length(node_edges))
    P_mat <- expm(Q * bl)
    for(j in 1:length(node_edges)){
      # j = 1
      # tips which contain j are shown here
      tips_from_anc <- which(unlist(lapply(tip.paths, function(x) any(node_edges[j] %in% x))))
      # tip_sampled <- sample(c(tips_from_anc, tips_from_anc), 1)
      tip_sampled <- tips_from_anc
      tip_index <- match(tip_sampled, phy$edge[,2]) # relative to the edge matrix
      possible_external <- matrix(dec_liks[tip_index,], ncol = length(possible_internal))
      # possible_external <- which(dec_liks[tip_index,] == 1)
      # tip_value <- mean(data[data$sp %in% phy$tip.label[tip_sampled],3])
      tip_values <- c(data[data$sp %in% phy$tip.label[tip_sampled],3])
      # branch_matrix <- getJointBranchMatrix(possible_internal, possible_external, tip_value, Rate.mat, bl, P_mat)
      branch_matrices <- vector("list", length(tip_values))
      for(k in 1:length(tip_values)){
        branch_matrices[[k]] <- getJointBranchMatrix(possible_internal, which(possible_external[k,] == 1), tip_values[k], Rate.mat, bl, P_mat)
      }
      # branch_matrix <- getJointBranchMatrix(possible_internal, possible_internal, tip_value, Rate.mat, bl, P_mat)
      # the likelihood that the rootward state led to the known tip ward state
      node_state_liks_list <- lapply(branch_matrices, 
                                     function(x) apply(x, 1, sum_lliks))
      node_state_liks_list <- lapply(node_state_liks_list, 
                                     function(x) exp(x - max(x))/sum(exp(x - max(x))))
      node_state_liks <- do.call(rbind, node_state_liks_list)
      node_mat[[j]] <- node_state_liks
    }
    node_mat <- lapply(node_mat, colMeans)
    new_node_mat <- do.call(rbind, node_mat)
    # new_node_mat <- node_mat[[1]]
    # for(j in 2:length(node_mat)){
    #   new_node_mat <- combineDesc(new_node_mat, node_mat[[j]])
    # }
    # node_mat contains two (or more lists) of tip samples, what needs to happen now is the combination of the two edges as parent daughters and then the combination of those pairs
    # node_state_liks <- Reduce("*", node_state_liks_list)
    # node_state_liks <- apply(branch_matrix, 1, sum_lliks)
    # node_state_probs <- exp(node_state_liks - max(node_state_liks))/sum(exp(node_state_liks - max(node_state_liks)))
    # once that node has finished calculating it's conditional probabilitity of each state, we combine the two dec tips
    # node_mat[node_mat == 0] <- 1e-10
    # node_cond_prob <- apply(node_mat, 2, prod)
    node_cond_prob <- colMeans(new_node_mat)
    # node_state_probs[node_state_probs == 0] <- 1e-10
    # node_cond_prob <- node_state_probs
    # this gets place in the edge matrix
    anc_index <- which(phy$edge[,1] %in% anc[i])
    dec_index <- which(phy$edge[,2] %in% anc[i])
    for(k in anc_index){
      # k = 5
      edge_liks_list[[k]][dim(edge_liks_list[[k]])[1],] <- node_cond_prob/sum(node_cond_prob)
    }
    if(length(dec_index) > 0){
      for(k in dec_index){
        edge_liks_list[[k]][1,] <- node_cond_prob/sum(node_cond_prob)
      }
    }
  }
  return(edge_liks_list)
}

# a function for generating an altnerative conditional probabilities (adaptive sampling of continuous)
getAdaptiveConditionals <- function(phy, data, Rate.mat,  Q, edge_liks_list, tip.paths, ou_expectations){
  node_ages <- branching.times(phy)
  dec_liks <- do.call(rbind, lapply(edge_liks_list, function(x) x[1,]))
  anc <- unique(phy$edge[,1])
  possible_internal <- 1:dim(Q)[1]
  for(i in 1:length(anc)){
    # whatever tip paths are chosen, they must include each of these
    # i = 3
    anc_value <- ou_expectations$expected_means[names(ou_expectations$expected_means) == anc[i]]
    anc_var <- ou_expectations$expected_variances[names(ou_expectations$expected_variances) == anc[i]]
    node_edges <- which(phy$edge[,1] == anc[i])
    bl <- node_ages[names(node_ages) == anc[i]]
    node_mat <- vector("list", length(node_edges))
    P_mat <- expm(Q * bl)
    for(j in 1:length(node_edges)){
      # j = 1
      # tips which contain j are shown here
      tips_from_anc <- which(unlist(lapply(tip.paths, function(x) node_edges[j] %in% x)))
      # tips_from_anc <- which(unlist(lapply(tip.paths, function(x) any(node_edges %in% x))))
      # tip_sampled <- sample(c(tips_from_anc, tips_from_anc), 1)
      tip_sampled <- tips_from_anc
      tip_index <- match(tip_sampled, phy$edge[,2]) # relative to the edge matrix
      possible_external <- matrix(dec_liks[tip_index,], ncol = length(possible_internal))
      # tip_value <- mean(data[data$sp %in% phy$tip.label[tip_sampled],3])
      tip_values <- c(data[data$sp %in% phy$tip.label[tip_sampled],3])
      # branch_matrix <- getJointBranchMatrix(possible_internal, possible_external, tip_value, Rate.mat, bl, P_mat)
      # branch_matrices <- lapply(tip_values, function(x) getJointBranchMatrix(possible_internal, possible_external, x, Rate.mat, bl, P_mat))
      branch_matrices <- vector("list", length(tip_values))
      for(k in 1:length(tip_values)){
        branch_matrices[[k]] <- getJointBranchMatrix(possible_internal, which(possible_external[k,] == 1), tip_values[k], Rate.mat, bl, P_mat, init_value = anc_value, init_var = anc_var)
      }
      # branch_matrix <- getJointBranchMatrix(possible_internal, possible_internal, tip_value, Rate.mat, bl, P_mat, init_value = anc_value, init_var = anc_var)
      # the likelihood that the rootward state led to the known tip ward state
      node_state_liks_list <- lapply(branch_matrices, 
                                     function(x) apply(x, 1, sum_lliks))
      node_state_liks_list <- lapply(node_state_liks_list, 
                                     function(x) exp(x - max(x))/sum(exp(x - max(x))))
      node_state_liks <- do.call(rbind, node_state_liks_list)
      node_mat[[j]] <- node_state_liks
    }
    new_node_mat <- node_mat[[1]]
    for(j in 2:length(node_mat)){
      new_node_mat <- combineDesc(new_node_mat, node_mat[[j]])
    }
    node_cond_prob <- colMeans(new_node_mat)
    # once that node has finished calculating it's conditional probabilitity of each state, we combine the two dec tips
    # node_mat[node_mat == 0] <- 1e-10
    # node_cond_prob <- apply(node_mat, 2, prod)
    # this gets place in the edge matrix
    anc_index <- which(phy$edge[,1] %in% anc[i])
    dec_index <- which(phy$edge[,2] %in% anc[i])
    for(k in anc_index){
      # k = 5
      edge_liks_list[[k]][dim(edge_liks_list[[k]])[1],] <- node_cond_prob/sum(node_cond_prob)
    }
    if(length(dec_index) > 0){
      for(k in dec_index){
        edge_liks_list[[k]][1,] <- node_cond_prob/sum(node_cond_prob)
      }
    }
  }
  return(edge_liks_list)
}

# get root liks from conditionals
getRootLiks <- function(conditional_probs, Q, root.p){
  if(any(is.na(conditional_probs$root_state))){
    return(NULL)
  }
  if(inherits(root.p[1], what="character")){
  #if(class(root.p)[1] == "character"){
    if(root.p == "yang"){
      root_liks <- c(MASS::Null(Q))
      root_liks <- root_liks/sum(root_liks)
    }
    if(root.p == "flat"){
      root_liks <- rep(1/dim(Q)[1], dim(Q)[1])
    }
    if(root.p == "maddfitz"){
      root_liks <- conditional_probs$root_state/sum(conditional_probs$root_state)
    }
  }else{
    root_liks <- root.p/sum(root.p)
  }
  return(root_liks)
}

##### Utility internal functions ##### 
getDiscreteModel <- function(data, model, rate.cat, dual, collapse){
  rate <- getStateMat4Dat(data, model, dual, collapse)$rate.mat
  if (rate.cat > 1) {
    StateMats <- vector("list", rate.cat)
    for (i in 1:rate.cat) {
      StateMats[[i]] <- rate
    }
    rate <- getFullMat(StateMats)
  }
  return(rate)
}


# getAllContinuousModelStructures <- function(k, type = "OU"){
#   # index.mat <- matrix(0, 3, k, dimnames = list(c("alpha", "sigma.sq", "theta"), c(1:k)))
#   # we want all unique combinations of a parameter. then we can add a single all same
#   # how many combinations are there of 1:k numbers?
#   potential_combos <- apply(partitions:::setparts(k), 2, function(x) paste(x, collapse="_"))
#   # this technically isn't all the possible alpha combinations, but for sim purposes we're fine.
#   if(type == "BM"){
#     alpha.combos <- paste(rep(0, k), collapse="_")
#     theta.combos <- paste(rep(1, k), collapse="_")
#   }
#   if(type == "OU"){
#     alpha.combos <- potential_combos
#     theta.combos <- potential_combos
#   }
#   if(type == "BMOU"){
#     if(k > 2){
#       stop("BMOU must be manually created if k > 1 atm. Sorry.")
#     }
#     # needed_numerals <- 1:((2^k)-2)
#     needed_numerals <- 1
#     alpha.combos <- apply(sapply(needed_numerals, function(x) as.numeric(intToBits(x)[1:k])), 2, function(x) paste(x, collapse="_")) # currently doesn't allow for BM mixed with OUA
#     # theta.combos <- potential_combos
#     theta.combos <- paste(rep(1, k), collapse="_")
#   }
#   sigma.sq.combos <- potential_combos
#   all_combos <- expand.grid(list(alpha.combos, sigma.sq.combos, theta.combos))
#   index_mats <- array(NA, c(3, k, dim(all_combos)[1]), dimnames = list(c("alpha", "sigma.sq", "theta"), c(1:k)))
#   for(i in 1:dim(all_combos)[1]){
#     alpha_i <- as.numeric(unlist(strsplit(as.character(all_combos[i,1]), "_")))
#     alpha_i[alpha_i == 0] <- NA
#     sigma_i <- max(c(0, alpha_i), na.rm = TRUE) + as.numeric(unlist(strsplit(as.character(all_combos[i,2]), "_")))
#     theta_i <- max(sigma_i) + as.numeric(unlist(strsplit(as.character(all_combos[i,3]), "_")))
#     index_mats[,,i] <- rbind(alpha_i, sigma_i, theta_i)
#   }
#   return(index_mats)
# }


organizeHOUwieDat <- function(data, tip.fog, collapse = TRUE){
  # return a list of corHMM data and OU data
  if(tip.fog=="known"){
    data.cor <- data[, 1:(dim(data)[2]-2)]
    data.cor <- corHMM:::corProcessData(data.cor, collapse = collapse)
    data.ou <- data.frame(sp = data[,1], 
                          reg = data.cor$corData[,2], 
                          x = data[, dim(data)[2]-1],
                          err = data[, dim(data)[2]])
  }
  if(tip.fog=="none"){
    data.cor <- data[, 1:(dim(data)[2]-1)]
    data.cor <- corHMM:::corProcessData(data.cor)
    data.ou <- data.frame(sp = data[,1], 
                          reg = data.cor$corData[,2], 
                          x = data[, dim(data)[2]])
  }
  return(list(StateMats = data.cor$StateMats, 
              PossibleTraits = data.cor$PossibleTraits,
              ObservedTraits = data.cor$ObservedTraits,
              data.cor = data.cor$corData,
              data.ou = data.ou))
}

organizeHOUwiePars <- function(pars, index.disc, index.cont){
  k <- max(index.disc, na.rm = TRUE)
  p.mk <- pars[1:k]
  p.ou <- pars[(k+1):length(pars)] 
  # ouwie pars
  Rate.mat <- matrix(1, 3, dim(index.disc)[2])
  index.cont[is.na(index.cont)] <- max(index.cont, na.rm = TRUE) + 1
  Rate.mat[] <- c(p.ou, NA)[index.cont]
  rownames(Rate.mat) <- rownames(index.cont)
  rate <- index.disc
  rate[is.na(rate)] <- k + 1
  Q <- matrix(0, dim(rate)[1], dim(rate)[2])
  Q[] <- c(p.mk, NA)[rate]
  diag(Q) <- -rowSums(Q)
  if(!is.null(colnames(rate))){
    colnames(Rate.mat) <- colnames(rate)
    colnames(Q) <- rownames(Q) <- colnames(rate)
  }
  # corhmm pars 
  return(list(solution.ou = Rate.mat,
              solution.cor = Q))
}

getIP.theta <- function(x, states, index){
  ip.theta <- vector("numeric", length(unique(index)))
  for(i in 1:length(unique(index))){
    state_i <- which(unique(index)[i] == index)
    ip.theta[i] <- mean(x[states %in% state_i])
  }
  ip.theta[is.nan(ip.theta)] <- mean(x)
  return(ip.theta)
}

simCharacterHistory <- function(phy, Q, root.freqs, Q2 = NA, NoI = NA){
  #Randomly choose starting state at root using the root.values as the probability:
  root.value <- sample.int(dim(Q)[2], 1, FALSE, prob=root.freqs/sum(root.freqs))
  #Reorder the phy:
  phy <- reorder.phylo(phy, "postorder")
  ntips <- length(phy$tip.label)
  N <- dim(phy$edge)[1]
  ROOT <- ntips + 1 #perhaps use an accessor to get the root node id
  
  #Generate vector that contains the simulated states:
  CharacterHistory <- integer(ntips + phy$Nnode)
  CharacterHistory[ROOT] <- as.integer(root.value)
  anc <- phy$edge[, 1]
  des <- phy$edge[, 2]
  edge.length <- phy$edge.length
  diag(Q) = 0
  diag(Q) = -rowSums(Q)
  
  # setting up the alternative Q matrix at the node of interest
  if(!any(is.na(Q2))){
    diag(Q2) = 0
    diag(Q2) = -rowSums(Q2)
  }
  if(!is.na(NoI)){
    NewQDesc <- getDescendants(phy, NoI)
  }
  
  #standard simulation protocol
  if(any(is.na(Q2)) | is.na(NoI)){
    for (i in N:1) {
      p <- expm(Q * edge.length[i], method="Ward77")[CharacterHistory[anc[i]], ]
      CharacterHistory[des[i]] <- sample.int(dim(Q)[2], size = 1, FALSE, prob = p)
    }
  }
  
  # simulating a clade under a different (Q2) evolutionary model
  if(!any(is.na(Q2)) & !is.na(NoI)){
    for (i in N:1) {
      if(anc[i] %in% NewQDesc){
        p <- expm(Q2 * edge.length[i], method="Ward77")[CharacterHistory[anc[i]], ]
        CharacterHistory[des[i]] <- sample.int(dim(Q2)[2], size = 1, FALSE, prob = p)
      }else{
        p <- expm(Q * edge.length[i], method="Ward77")[CharacterHistory[anc[i]], ]
        CharacterHistory[des[i]] <- sample.int(dim(Q)[2], size = 1, FALSE, prob = p)
      }
    }
  }
  
  TipStates <-  CharacterHistory[1:ntips]
  names(TipStates) <- phy$tip.label
  NodeStates <- CharacterHistory[ROOT:(N+1)]
  names(NodeStates) <- ROOT:(N+1)
  
  res <- list(TipStates = TipStates, NodeStates = NodeStates)
  return(res)
  #return(CharacterHistory)
}
# organize the houwie output
getHouwieObj <- function(liks_houwie, pars, phy, data, hOUwie.dat, rate.cat, tip.fog, index.disc, index.cont, root.p, nSim, sample_tips, sample_nodes, adaptive_sampling, nStates, discrete_model, continuous_model, time_slice, root.station, get.root.theta,lb_discrete_model,ub_discrete_model,lb_continuous_model,ub_continuous_model,ip, opts, quiet, negative_values){
  param.count <- max(index.disc, na.rm = TRUE) + max(index.cont, na.rm = TRUE)
  nb.tip <- length(phy$tip.label)
  solution <- organizeHOUwiePars(pars=pars, index.disc=index.disc, index.cont=index.cont)
  if(rate.cat > 1){
    StateNames <- paste("(", rep(1:nStates, rate.cat), rep(LETTERS[1:rate.cat], each = nStates), ")", sep = "")
  }else{
    StateNames <- paste("(", rep(1:nStates, rate.cat), ")", sep = "")
  }
  rownames(solution$solution.cor) <- colnames(solution$solution.cor) <- StateNames
  colnames(solution$solution.ou) <- StateNames
  names(hOUwie.dat$ObservedTraits) <- 1:length(hOUwie.dat$ObservedTraits)
  if(negative_values){
    n_theta <- length(unique(index.cont[3,]))
    pars[(length(pars) - n_theta + 1):length(pars)] <- pars[(length(pars) - n_theta + 1):length(pars)]- 50
    if(tip.fog == "none"){
      data[,dim(data)[2]] <- data[,dim(data)[2]] - 50
      solution$solution.ou[3,] <- solution$solution.ou[3,] - 50
    }else{
      data[,dim(data)[2]-1] <- data[,dim(data)[2]-1] - 50
      solution$solution.ou[3,] <- solution$solution.ou[3,] - 50
    }
  }
  obj <- list(
    loglik = liks_houwie$TotalLik,
    DiscLik = liks_houwie$DiscLik,
    ContLik = liks_houwie$ContLik,
    AIC = -2*liks_houwie$TotalLik + 2*param.count,
    AICc = -2*liks_houwie$TotalLik + 2*param.count + ((2*(param.count^2) + 2*param.count)/(nb.tip - param.count - 1)),
    BIC = -2*liks_houwie$TotalLik + log(nb.tip) * param.count,
    param.count = param.count,
    solution.disc = solution$solution.cor,
    solution.cont = solution$solution.ou,
    recon = NULL,
    index.disc = index.disc,
    index.cont = index.cont,
    phy = phy,
    legend = hOUwie.dat$ObservedTraits,
    data = data, 
    hOUwie.dat = hOUwie.dat,
    rate.cat = rate.cat, 
    discrete_model=discrete_model, 
    continuous_model=continuous_model, 
    root.p=root.p, 
    time_slice=time_slice,
    root.station=root.station, 
    get.root.theta=get.root.theta, 
    tip.fog = tip.fog, 
    sample_tips = sample_tips,
    sample_nodes = sample_nodes,
    adaptive_sampling = adaptive_sampling,
    lb_discrete_model=lb_discrete_model, 
    ub_discrete_model=ub_discrete_model,
    lb_continuous_model=lb_continuous_model, 
    ub_continuous_model=ub_continuous_model,
    p=pars, 
    ip=ip,
    nSim=nSim, 
    opts=opts,
    quiet=quiet
  )
  class(obj) <- "houwie"
  return(obj)
}
# generate random starting values for multiple starts
generateMultiStarting <- function(starts, index.disc, index.cont, n_starts, lower, upper){
  n_p_trans <- max(index.disc, na.rm = TRUE)
  n_p_alpha <- length(unique(na.omit(index.cont[1,])))
  n_p_sigma <- length(unique(na.omit(index.cont[2,])))
  n_p_theta <- length(unique(na.omit(index.cont[3,])))
  multiple_starts <- vector("list", n_starts)
  multiple_starts[[1]] <- checkStartingUBLB(starts, lower, upper)
  if(n_starts > 1){
    for(i in 2:n_starts){
      multiple_starts[[i]] <- numeric(length(starts))
      for(j in 1:length(starts)){
        if(starts[j]/10 <= lower[j] | starts[j]*10 >= upper[j]){
          lb <- (lower[j]+lower[j]*0.01)*10
          ub <- (upper[j]-upper[j]*0.01)/10
        }else{
          lb <- starts[j]
          ub <- starts[j]
        }
        multiple_starts[[i]][j] <- exp(runif(1, log(lb/10), log(ub*10)))
      }
      multiple_starts[[i]] <- checkStartingUBLB(multiple_starts[[i]], lower, upper)
    }
  }
  return(multiple_starts)
}

checkStartingUBLB <- function(starts, lower, upper){
  for(i in 1:length(starts)){
    if(starts[i] < lower[i]){
      starts[i] <- lower[i] * 2
    }
    if(starts[i] > upper[i]){
      starts[i] <- upper[i] / 2
    }
  }
  return(starts)
}

# a function for correcting edge labels to be plotted when using get all joint probs
correct_map_edges <- function(simmap){
  simmap$maps <- lapply(simmap$maps, correct_edge)
  return(simmap)
}

correct_edge <- function(edge){
  edge_names <- names(edge)
  count <- 1
  edge_merge <- numeric(length(edge))
  edge_merge[1] <- count
  for(i in 2:length(edge)){
    if(edge_names[i-1] == edge_names[i]){
      edge_merge[i] <- count
    }else{
      count <- count + 1
      edge_merge[i] <- count
    }
  }
  new_edge_lengths <- vector("numeric", length(unique(edge_merge)))
  new_edge_names <- vector("character", length(unique(edge_merge)))
  for(j in unique(edge_merge)){
    new_edge_lengths[j] <- sum(edge[edge_merge %in% j])
    new_edge_names[j] <- names(edge)[edge_merge %in% j][1]
  }
  names(new_edge_lengths) <- new_edge_names
  return(new_edge_lengths)
}

# simple funciton for getting a probability from logliks
sum_lliks <- function(lliks){
  lliks[is.na(lliks)] <- -Inf
  out <- max(lliks, na.rm = TRUE) + log(sum(exp(lliks - max(lliks))))
  return(out)
}

# get weighted tip values from a stochastic map reults
get_tip_values <- function(model){
  all_tip_states <- lapply(model$simmaps, get_tip_states)
  all_joint_liks <- model$all_disc_liks + model$all_cont_liks
  weights <- exp(all_joint_liks - max(all_joint_liks))/sum(exp(all_joint_liks - max(all_joint_liks)))
  rates <- rowSums(model$solution.disc, na.rm = TRUE)
  continuous_solution <- model$solution.cont
  continuous_solution[is.na(continuous_solution)] <- 0
  parameter_table_list <- lapply(all_tip_states, function(x) index_paramers_from_tip_states(x, rates, continuous_solution))
  for(i in 1:length(parameter_table_list)){
    parameter_table_list[[i]] <- parameter_table_list[[i]] * weights[i]
  }
  tip_value_table <- Reduce("+", parameter_table_list)
  expected_values <- getExpectedValues(model, FALSE)
  expected_values <- expected_values[match(rownames(tip_value_table), expected_values$sp),]
  tip_value_table <- cbind(tip_value_table, expected_values[,c(2,3)])
  return(tip_value_table)
}

get_tip_states <- function(simmap){
  nTip <- length(simmap$tip.label)
  tip_states <- as.numeric(unlist(lapply(simmap$maps[simmap$edge[,2] <= nTip], function(x) names(x)[length(x)])))
  names(tip_states) <- simmap$tip.label[simmap$edge[,2][simmap$edge[,2] <= nTip]]
  return(tip_states)
}

index_paramers_from_tip_states <- function(tip_states, rates, continuous_solution){
  parameter_df <- data.frame(rates = rates[tip_states],
                             alpha = continuous_solution[1,tip_states],
                             sigma.sq = continuous_solution[2,tip_states],
                             theta = continuous_solution[3,tip_states],
                             row.names = names(tip_states))
  return(parameter_df)
}

# hOUwie.twostep <- function(phy, data, rate.cat, discrete_model, continuous_model, nSim=1000, root.p="yang", dual = FALSE, collapse = TRUE, root.station=FALSE, get.root.theta=FALSE, tip.fog = "none", lb_discrete_model=NULL, ub_discrete_model=NULL, lb_continuous_model=NULL, ub_continuous_model=NULL, recon=FALSE, nodes="internal", p=NULL, ip="fast", optimizer="nlopt_ln", opts=NULL, quiet=FALSE, sample_tips=FALSE, sample_nodes=TRUE, adaptive_sampling=TRUE, n_starts = 1, ncores = 1){
#   start_time <- Sys.time()
#   # if the data has negative values, shift it right - we will shift it back later
#   negative_values <- FALSE
#   if(tip.fog == "none"){
#     if(any(data[,dim(data)[2]] < 0)){
#       cat("Negative values detected... adding 50 to the trait mean for optimization purposes\n")
#       negative_values <- TRUE
#       data[,dim(data)[2]] <- data[,dim(data)[2]] + 50
#     }
#   }else{
#     if(any(data[,dim(data)[2]-1] < 0)){
#       cat("Negative values detected... adding 50 to the trait mean for optimization purposes\n")
#       negative_values <- TRUE
#       data[,dim(data)[2]-1] <- data[,dim(data)[2]-1] + 50
#     }
#   }
#   # check that tips and data match
#   # check for invariance of tip states and not that non-invariance isn't just ambiguity
#   if(!is.null(phy$node.label)){
#     if(!quiet){
#       cat("Your phylogeny had node labels, these have been removed.\n")
#     }
#     phy$node.label <- NULL
#   }
#   
#   if(ncores > n_starts){
#     cat("You have specified more cores are to be used than the number of starts. Setting ncores to be equal to the number of optimizations.\n")
#     ncores <- n_starts
#   }
#   
#   # organize the data
#   phy <- reorder.phylo(phy, "pruningwise")
#   hOUwie.dat <- organizeHOUwieDat(data, tip.fog, collapse)
#   nStates <- as.numeric(max(hOUwie.dat$data.cor[,2]))
#   nCol <- dim(data)[2] - ifelse(tip.fog == "none", 2, 3)
#   Tmax <- max(branching.times(phy))
#   tip.paths <- lapply(1:Ntip(phy), function(x) OUwie:::getPathToRoot(phy, x))
#   
#   if(class(discrete_model)[1] == "character"){
#     index.disc <- getDiscreteModel(hOUwie.dat$data.cor, discrete_model, rate.cat, dual, collapse)
#     index.disc[index.disc == 0] <- NA
#   }else{
#     index.disc <- discrete_model
#     index.disc[index.disc == 0] <- NA
#   }
#   if(class(continuous_model)[1] == "character"){
#     index.cont <- getOUParamStructure(continuous_model, "three.point", root.station, get.root.theta, nStates * rate.cat)
#   }else{
#     continuous_model[continuous_model == 0] <- NA
#     index.cont <- continuous_model
#   }
#   if(dim(index.disc)[2] > dim(index.cont)[2]){
#     stop("Not all of your discrete states have OU parameters associated with them. Please check that your discrete index matrix matches your continuous index matrix.")
#   }
#   if(dim(index.cont)[2] > dim(index.disc)[2]){
#     stop("You have specified more OU parameters than there are states in the discrete process. Please check that your discrete index matrix matches your continuous index matrix.")
#   }
#   if(class(root.p[1]) != "character"){
#     if(dim(index.disc)[2] != length(root.p)){
#       stop("You have entered a custom root prior whose length does not equal the number of states in your discrete model.")
#     }
#   }
#   
#   if(is.null(lb_continuous_model)){
#     # the lower limit of alpha is defined as a halflife of 10000% of the max tree height
#     # the lower limit of sigma is defined 10 times less than alpha
#     # the lower limit of optim is defined 10 times lower than the minimum observation
#     if(any(is.na(lb_continuous_model[1,]))){
#       lb.alpha = 1e-10
#     }else{
#       lb.alpha = 1e-10
#     }
#     lb.sigma = 1e-10
#     lb.optim = min(data[, 1+nCol+1])/10 
#     lb_continuous_model=c(lb.alpha,lb.sigma,lb.optim)
#   }
#   if(is.null(ub_continuous_model)){
#     # the upper limit of alpha is defined as a halflife of 1% of the max tree height
#     # the upper limit of sigma is defined 10 times more than alpha
#     # the upper limit of optim is defined 10 times more than the maximum observation
#     ub.alpha = log(2)/(0.01 * Tmax)
#     ub.sigma = ub.alpha
#     ub.optim = max(data[, 1+nCol+1])*10 
#     ub_continuous_model=c(ub.alpha,ub.sigma,ub.optim)
#   }
#   if(is.null(lb_discrete_model)){
#     # the minimum dwell time is defined as 100 times the max tree height
#     lb_discrete_model = 1/(Tmax*100)
#   }
#   if(is.null(ub_discrete_model)){
#     ub_discrete_model = 1/(Tmax*0.01)
#   }
#   #Ensures that weird root state probabilities that do not sum to 1 are input:
#   if(!is.null(root.p)){
#     if(!is.character(root.p)){
#       root.p <- root.p/sum(root.p)
#     }
#   }
#   time_slice <- Tmax+1
#   # the number of parameters for each process
#   n_p_trans <- max(index.disc, na.rm = TRUE)
#   n_p_alpha <- length(unique(na.omit(index.cont[1,])))
#   n_p_sigma <- length(unique(na.omit(index.cont[2,])))
#   n_p_theta <- length(unique(na.omit(index.cont[3,])))
#   n_p <- n_p_trans + n_p_alpha + n_p_sigma + n_p_theta
#   
#   # an internal data structure (internodes liks matrix) for the dev function
#   edge_liks_list <- getEdgeLiks(phy, hOUwie.dat$data.cor, nStates, rate.cat, time_slice)
#   
#   # default MLE search options
#   if(is.null(opts)){
#     if(optimizer == "nlopt_ln"){
#       opts <- list("algorithm"="NLOPT_LN_SBPLX", "maxeval"="1000", "ftol_rel"=.Machine$double.eps^0.5)
#     }
#     if(optimizer == "nlopt_gn"){
#       opts <- list("algorithm"="NLOPT_GN_DIRECT_L", "maxeval"="1000", "ftol_rel"=.Machine$double.eps^0.5)
#     }
#     if(optimizer == "sann"){
#       opts <- list(max.call=1000, smooth=FALSE)
#     }
#   }
#   # a global matrix to contain likelihoods so that identical parameters return identical likelihoods
#   if(is.null(opts$maxeval) | is.null(opts$max.call)){
#     max.its <- 1000
#   }else{
#     max.its <- as.numeric(opts$maxeval)
#   }
#   setDTthreads(threads=1)
#   tmp.df <- data.frame(matrix(c(0, rep(1e5, n_p)), byrow = TRUE, ncol = n_p+1, nrow = max.its))
#   global_liks_mat <- as.data.table(tmp.df)
#   
#   # p is organized into 2 groups with the first set being corHMM and the second set being OUwie
#   # organized as c(trans.rt, alpha, sigma.sq, theta)
#   # evaluate likelihood
#   if(!is.null(p)){
#     if(!quiet){
#       cat("Calculating likelihood from a set of fixed parameters.\n")
#       print(p)
#     }
#     if(max(index.cont, na.rm = TRUE) + max(index.disc, na.rm = TRUE) != length(p)){
#       message <- paste0("The number of parameters does not match the number required by the model structure. You have supplied ", length(p), ", but the model structure requires ", max(index.cont, na.rm = TRUE) + max(index.disc, na.rm = TRUE), ".")
#       stop(message, call. = FALSE)
#     }
#     out<-NULL
#     pars <- out$solution <- log(p)
#   }else{
#     out<-NULL
#     lower = log(c(rep(lb_discrete_model, n_p_trans), 
#                   rep(lb_continuous_model[1], length(unique(na.omit(index.cont[1,])))), 
#                   rep(lb_continuous_model[2], length(unique(na.omit(index.cont[2,])))), 
#                   rep(lb_continuous_model[3], length(unique(na.omit(index.cont[3,]))))))
#     upper = log(c(rep(ub_discrete_model, n_p_trans), 
#                   rep(ub_continuous_model[1], length(unique(na.omit(index.cont[1,])))), 
#                   rep(ub_continuous_model[2], length(unique(na.omit(index.cont[2,])))), 
#                   rep(ub_continuous_model[3], length(unique(na.omit(index.cont[3,]))))))
#     # cat(c("TotalLnLik", "DiscLnLik", "ContLnLik"), "\n")
#     # check for user input initial parameters 
#     if(is.character(ip)){
#       if(rate.cat > 1){
#         bin_index <- cut(hOUwie.dat$data.ou[,3], rate.cat, labels = FALSE)
#         combos <- expand.grid(1:max(hOUwie.dat$data.cor[,2]), 1:rate.cat)
#         disc_tips <- vector("numeric", length(phy$tip.label))
#         for(i in 1:dim(combos)[1]){
#           disc_tips[hOUwie.dat$data.cor[,2] == combos[i,1] & bin_index == combos[i,2]] <- i
#         }
#       }else{
#         disc_tips <- hOUwie.dat$data.cor[,2]
#       }
#       starts.alpha <- rep(log(2)/Tmax, n_p_alpha)
#       # starts.sigma <- rep(var(hOUwie.dat$data.ou[,3]), n_p_sigma)
#       starts.sigma <- rep(log(2)/Tmax, n_p_sigma)
#       start.theta <- getIP.theta(hOUwie.dat$data.ou[,3], disc_tips, index.cont[3,])
#       start.cor <- rep(10/sum(phy$edge.length), n_p_trans)
#       starts.basic = c(start.cor, starts.alpha, starts.sigma, start.theta)
#       cat("\nFitting the discrete model to discrete data...\n")
#       discrete_fit <- corHMM(phy=phy, data=hOUwie.dat$data.cor, rate.cat=rate.cat, rate.mat=index.disc, node.states="none", opts = opts)
#       cat("\nGenerating", nSim, "simmaps and optimizing the continuous model to each.")
#       simmap_list <- makeSimmap(tree=phy, data=hOUwie.dat$data.cor, model=discrete_fit$solution, rate.cat=1, nSim=nSim, nCores=1)
#       continuous_fit <- mclapply(simmap_list, function(x) nloptr(x0 = log(c(starts.alpha, starts.sigma, start.theta)), eval_f = OUwie.basic.dev, lb=lower[-seq(n_p_trans)], ub=upper[-seq(n_p_trans)], opts=opts, phy = x, data = hOUwie.dat$data.ou, tip.fog = tip.fog, index.cont = index.cont, tip.paths = tip.paths), mc.cores=ncores)
#     }
#   }
#   return(list(discrete_fit, continuous_fit))
# }
thej022214/OUwie documentation built on April 13, 2025, 9:36 a.m.