R/phylo_pieces.R

Defines functions phylo_pieces

Documented in phylo_pieces

#' Slices a phylogenetic tree into multiple temporal slices
#' @description
#' This function slices a phylogenetic tree into multiple slices, spaced equally either in million years or intervals of phylogenetic diversity (PD).
#'
#' @usage phylo_pieces(tree, n, criterion, method, timeSteps, dropNodes, returnTree)
#'
#' @param tree phylo. An ultrametric phylogenetic tree in the "phylo" format.
#' @param n numeric. A numeric value indicating either the number of temporal slices (method = 1) or the time interval in million years (or phylogenetic diversity) among the tree slices (method = 2). Default is 1.
#' @param criterion character string. The method for slicing the tree. It can be either "my" (million years) or "PD" (accumulated phylogenetic diversity). Default is "my".
#' @param method numerical. A numerical value indicating the method to make the multiple slices. Setting "method = 1" will slice the phylogeny based on an "n" number of slices. If method = 2, the slices will be created based on a temporal interval. Default is 1.
#' @param timeSteps logical. A logical value indicating whether the vector containing the time-steps used for creating the multiple slices should be returned. If "timeSteps = TRUE", then it returns a list containing both the time vector and a list with the multiple tree slices. Default is FALSE.
#' @param dropNodes logical. A logical value indicating whether the nodes that were sliced (void nodes, presenting no branch length) should be preserved in the node matrix. Default is FALSE.
#' @param returnTree logical. A logical value indicating whether the original input tree should be returned with its slices in a list. Default is FALSE.
#'
#' @return The function returns a list containing multiple slices of a phylogenetic tree. The slices list is ordered from roots to tips. Thus, the first object within the outputted list is the root-slice, whereas the last is the tips-slice.
#'
#' @seealso Other slicing methods: [squeeze_root()], [squeeze_tips()], [squeeze_int()], [prune_tips()]
#'
#' @author Matheus Lima de Araujo <matheusaraujolima@live.com>
#'
#' @examples
#' # Generate a random tree
#' tree <- ape::rcoal(20)
#'
#' # Cuts a phylogeny into multiple temporal slices
#' tree <- phylo_pieces(tree, n = 3, criterion = "my", method = 1)
#'
#' # Plotting the three slices of our phylogeny
#' plot(tree[[1]])
#' plot(tree[[2]])
#' plot(tree[[3]])
#'
#' @export

phylo_pieces <- function(tree, n, criterion = "my", method = 1,
                         timeSteps = FALSE, dropNodes = FALSE, returnTree = FALSE){

  # Capturing the nodes and tips informations
  tree <- nodes_config(tree)

  # The choose criterion for making the slices are time
  if(criterion == "my"){

    if(method == 1){  # number of phylogenetic slices
      # If the number of slices provided were decimals...
      if(n < 1){
        stop("The n must be an integer under argument method = 1")
      } else {
        # Saving the number of slices into a new object
        nslices <- n

        # Capturing the time interval that it belongs
        n <- tree$tree_depth/nslices

        # Printing the threshold choose
        message(paste("> The", nslices, "number of pieces inputted equals to intervals of",
                  n, "million of years.\n"))
      }


    }

    if(method == 2){ # desired time interval among slices

      # if the imputed time is bigger than the tree time
      if(n > sum(tree$tree_depth)){
        stop("The time threshold inputted is older than the phylogeny")
      } else {

        ## Find the best time interval that equal the temporal width among slices
        # Finding the expected number of pieces under the imputed time-interval
        exp_pieces <- tree$tree_depth/n

        # Find the smallest rounding value from the expected
        f_piece <- tree$tree_depth/floor(exp_pieces)
        # Find the biggest rounding values from the expected
        cel_piece <- tree$tree_depth/ceiling(exp_pieces)

        # Check which of these are the closest value and select it
        if(abs(n - f_piece) < abs(n - cel_piece) & f_piece != tree$tree_depth){
          n <- f_piece # floor piece
          nslices <- floor(exp_pieces)
        } else {
          n <- cel_piece # ceiling piece
          nslices <- ceiling(exp_pieces)
        }

        # Printing the threshold choose
        message(paste("> The number of pieces was rounded to", tree$tree_depth/n,
                  ", with intervals of", n, "million of years.\n"))
      }
    }

    # Summing the cummulated values of the temporal slices (from roots -> tips)
    age <- cumsum(rep(n, nslices))

    ## Running the slicer algorithm
    cutted_tree <- lapply(age, function(j){ # j <- age[4]

      ## Cutting the phylogeny TIPWARDLY (TIPS -> ROOT):
      # Which nodes ends after j but are SMALLER or bigger than my thresholds?
      smaller <- which((tree$config$YearEnd[which(tree$config$YearEnd > j)] - j) >= tree$edge.length[which(tree$config$YearEnd > j)])
      bigger <- which((tree$config$YearEnd[which(tree$config$YearEnd > j)] - j) < tree$edge.length[which(tree$config$YearEnd > j)])

      # For those smaller, turn their edge.length 0 (ZERO)
      tree$edge.length[which(tree$config$YearEnd > j)][smaller] <- 0

      # Those values bigger than my threshold when subtracting j,
      # remove its remaining difference to reach the threshold
      tree$edge.length[which(tree$config$YearEnd > j)][bigger] <-
        tree$edge.length[which(tree$config$YearEnd > j)][bigger] - (tree$config$YearEnd[which(tree$config$YearEnd > j)][bigger] - j)

      if(j > n){  ## if j is smaller than n, make the slices specifically on nodes position

        ### Cutting the phylogeny ROOTWARDLY (ROOT -> TIPS):
        # The threshold occupy different nodes with different requirements for node slicing?
        # (ex: several nodes starting before and after a given threshold)
        if(length(unique(c(which(sort(unique(tree$config$YearEnd)) >= j)[1], which(sort(unique(tree$config$YearEnd)) >= (j-n))[1]))) > 1){

          ## Correcting the length of those nodes that ends inside the given interval,
          # but start before the threshold established in (j - n):
          tree$edge.length[tree$config$YearEnd >= (j-n) & tree$config$YearEnd <= (j) & tree$config$YearBegin < (j-n)] <-
            (tree$config$YearEnd[tree$config$YearEnd >= (j-n) & tree$config$YearEnd <= (j) & tree$config$YearBegin < (j-n)] - (j-n))

          ## All nodes ending before my threshold (j-n), turn into zero their lengths
          tree$edge.length[tree$config$YearEnd < (j-n)] <- 0

          ## All nodes originating befores my threshold (j - n) and ending after my threshold (j)
          # (an entire branch length within the interval), assign (n) to its edge.length
          tree$edge.length[tree$config$YearBegin < (j-n) & tree$config$YearEnd >= j] <- n
        }

        ## If all remaining nodes had its edges bigger than the interval:
        if(length(unique(c(which(sort(unique(tree$config$YearEnd)) >= j)[1], which(sort(unique(tree$config$YearEnd)) >= (j-n))[1]))) == 1){

          ## Turn into 0 those edges ending before my threshold
          tree$edge.length[which(tree$config$YearEnd < (j))] <- 0

          ## Those remaining nodes, turn into n their edge.lengths
          tree$edge.length[tree$edge.length > 0] <- n
        }
        return(tree)
      }
      ## If the j threshold is smaller than n, return the tree as it is
      if(j <= n){
        return(tree)
      }
    })
  }

  # The choose criterion for making the slices are PD
  if(criterion == "pd"){
    ## How many PD we have at each nodes spliting?
    # Separating only the nodes inital information
    nodes <- tree$config[!duplicated(tree$config$NodeBegin), c(1, 2, 4)]

    ## Making a matrix with these informations
    df <- data.frame(time = sort(unique(nodes$YearBegin)), # Time which init the node
                     nBranch = 2:(length(unique(nodes$YearBegin)) + 1), # Number of branches
                     timeLength = c(sort(unique(nodes$YearBegin)), max(tree$config$YearEnd))[-1] - # time length of the node
                       sort(unique(nodes$YearBegin)))

    # Calculating the cumulative TIME and PD per node split
    df$cumulativeTIME <- cumsum(df$timeLength)
    df$cumulativePD <- cumsum(df$nBranch * df$timeLength)


    if(method == 1){ # number of phylogenetic slices
      # If the number of slices provided were decimals...
      if(n < 1){
        stop("The n must be an integer under the argument method = 1")
      } else {
        # Saving the number of slices into a new object
        nslices <- n

        # Capturing the time interval that it belongs
        n <- sum(tree$edge.length)/nslices

        # Printing the threshold choose
        message(paste("> The", nslices, "number of pieces inputted equals to intervals of",
                  n, "phylogenetic diversity (PD).\n"))
      }
    }

    if(method == 2){ # desired PD interval among slices

      # if the imputed PD is bigger than the tree PD
      if(n > sum(tree$edge.length)){
        stop("The PD threshold inputted is older than the phylogeny")
      } else {

        ## Find the best PD interval that equal the temporal width among slices
        # Finding the expected number of pieces under the imputed PD-interval
        exp_pieces <- sum(tree$edge.length)/n

        # Find the smallest rounding value from the expected
        f_piece <- sum(tree$edge.length)/floor(exp_pieces)
        # Find the biggest rounding values from the expected
        cel_piece <- sum(tree$edge.length)/ceiling(exp_pieces)

        # Check which of these are the closest value and select it
        if(abs(n - f_piece) < abs(n - cel_piece) & f_piece != sum(tree$edge.length)){
          n <- f_piece # floor piece
          nslices <- floor(exp_pieces)
        } else {
          n <- cel_piece # ceiling piece
          nslices <- ceiling(exp_pieces)
        }

        # Printing the threshold choose
        message(paste("> The number of pieces was rounded to", sum(tree$edge.length)/n,
                  ", with intervals of", n, "phylogenetic diversity (PD).\n"))
      }
    }

    # Qual o PD dentro de cada intervalo desejado ?
    pd_slices <- sum(tree$edge.length)/nslices
    # Calculating the cumulative PD
    pd_slices <- cumsum(rep(pd_slices, nslices))

    # Retrieving the temporal slices of equal PD
    # Running the algorithm
    age <- sapply(pd_slices, function(z){
      # Separating the node information
      node_info <- df[which(df$cumulativePD >= z)[1],]
      # What is the PD difference within this node and the expected PD?
      rmv <- (node_info[, 5] - z)
      # Which is the temporal distance of this difference (based on the local node number)
      rmv <- rmv/node_info[, 2]
      # What time this expected PD are located in?
      time_pd <- node_info[, 4] - rmv
      return(time_pd)
    })

    # Configuring the time intervals into a data.frame
    pd_df <- data.frame(ID = 1:length(age), AGE_t1 = age, AGE_t0 = c(0, age[-length(age)]))


    ## Running the slicer algorithm
    cutted_tree <- lapply(pd_df[,2], function(k){ # k <- pd_df[2, 2]

      # Capturing the time of left-side slice
      l <- pd_df[which(pd_df[, 2] == k), 3]

      ## Cutting the phylogeny TIPWARDLY (TIPS -> ROOT):
      # Which nodes ends after j but are SMALLER or bigger than my thresholds?
      smaller <- which((tree$config$YearEnd[which(tree$config$YearEnd > k)] - k) >= tree$edge.length[which(tree$config$YearEnd > k)])
      bigger <- which((tree$config$YearEnd[which(tree$config$YearEnd > k)] - k) < tree$edge.length[which(tree$config$YearEnd > k)])

      # For those smaller, turn their edge.length 0 (ZERO)
      tree$edge.length[which(tree$config$YearEnd > k)][smaller] <- 0

      # Those values bigger than my threshold when subtracting k,
      # remove its remaining difference to reach the threshold
      tree$edge.length[which(tree$config$YearEnd > k)][bigger] <-
        tree$edge.length[which(tree$config$YearEnd > k)][bigger]-(tree$config$YearEnd[which(tree$config$YearEnd > k)][bigger] - k)

      if(l > 0){  ## if l is smaller than 0, make the slices specifically on nodes position

        ### Cutting the phylogeny ROOTWARDLY (ROOT -> TIPS):
        # The threshold occupy different nodes with different requirements for node slicing?
        # (ex: several nodes starting before and after a given threshold)
        if(length(unique(c(which(sort(unique(tree$config$YearEnd)) >= k)[1], which(sort(unique(tree$config$YearEnd)) >= l)[1]))) > 1){ # + de 1 TRUE

          ## Correcting the length of those nodes that ends inside the given interval,
          # but start before the threshold established in (k - l):
          tree$edge.length[tree$config$YearEnd >= l & tree$config$YearEnd <= (k) & tree$config$YearBegin < l] <-
            (tree$config$YearEnd[tree$config$YearEnd >= l & tree$config$YearEnd <= (k) & tree$config$YearBegin < l] - l)

          ## All nodes ending before my threshold (l), turn into zero their lengths
          tree$edge.length[tree$config$YearEnd < l] <- 0

          ## All nodes originating befores my threshold (l) and ending after my threshold (k)
          # (an entire branch length within the interval), assign (k - l) to its edge.length
          tree$edge.length[tree$config$YearBegin < l & tree$config$YearEnd >= k] <- k-l
        }

        ## If all remaining nodes had its edges bigger than the interval:
        if(length(unique(c(which(sort(unique(tree$config$YearEnd)) >= k)[1], which(sort(unique(tree$config$YearEnd)) >= l)[1]))) == 1){ # ! de 0, porque ele n?o foi cortado; o bra?o ?ntegro (sem recorte) que o valor de recorte

          ## Turn into 0 those edges ending before my threshold
          tree$edge.length[which(tree$config$YearEnd < (k))] <- 0

          ## Those remaining nodes, turn into k-l their edge.lengths
          tree$edge.length[tree$edge.length > 0] <- k-l
        }
        return(tree)
      }
      if(l == 0){
        return(tree)
      }
    })
  }

  ## If there are some void nodes/edges inside our tree pieces,
  # and the user want to remove them from each phylo piece
  if(dropNodes == TRUE){

    cutted_tree <- lapply(cutted_tree, function(tree_pieces){
      # Which are our void nodes with 0 length?
      rm_nd <- which(!(tree_pieces$edge[,2] %in% c(1:length(tree_pieces$tip.label))) & tree_pieces$edge.length == 0) # tree <- teste

      if(length(rm_nd) > 0){
        # Correcting their values
        for (i in 1:length(rm_nd)) { # i <- 1
          # which node comes after our node
          val <- tree_pieces$edge[rm_nd[i], 2]
          # all places with this value, need to be turned into its previous node
          tree_pieces$edge[which(tree_pieces$edge[,1] == val), 1] <- tree_pieces$edge[rm_nd[i], 1]
        }

        # Them, removing those edges and edgelenghts that are 0 and are node-to-node
        tree_pieces$edge <- tree_pieces$edge[-c(rm_nd), ]               # Removing from the edge matrix
        tree_pieces$edge.length <- tree_pieces$edge.length[-c(rm_nd)]   # Removing from the edge length vectors
        tree_pieces$Nnode <- length(unique(tree_pieces$edge[,1]))       # Adding the new number of nodes

        # Unique tree nodes
        oldnodes <- sort(unique(tree_pieces$edge[,1]))

        # Renaming the tree nodes
        for (i in 1:length(oldnodes)) { # i <- 1
          tree_pieces$edge[which(tree_pieces$edge[,1] == oldnodes[i]), 1] <- length(tree_pieces$tip.label) + i
          tree_pieces$edge[which(tree_pieces$edge[,2] == oldnodes[i]), 2] <- length(tree_pieces$tip.label) + i
        }
      }

      return(tree_pieces)
    })
  }

  # If the user want the time-steps used to make the slieces
  if(timeSteps == TRUE){

    # If the user want to return the original tree within the list
    if(returnTree == FALSE){
      # Return a list with all pieces and a time-step vector
      return(list(cutted_tree, age))
    } else {
      # Return a list with all pieces, a time-step vector, and the original tree
      return(list(cutted_tree, age, tree))
    }

  } else {

    # If the user want to return the original tree within the list
    if(returnTree == FALSE){
      # return only the phylogenetic pieces
      return(cutted_tree)
    } else {
      # Return a list with all pieces, a time-step vector, and the original tree
      return(list(cutted_tree, tree))
    }
  }
}

Try the treesliceR package in your browser

Any scripts or data that you put into this service are public.

treesliceR documentation built on Sept. 11, 2024, 8:47 p.m.