R/jplace_expand.R

#' Expanded \code{.jplace} files. 
#' 
#' This is an utility function to expand \code{.jplace} files generated by \code{RAxML} and \code{pplacer}.
#' 
#' @param input Input file, absolute path or relative to the current working directory (a jplace file).
#' @param output Ouput file, absolute path or relative to the current working directory  (a jplace file).
#' If set to \code{NULL}, file content is returned in R directly as a character string.
#' @param round.brlen numeric value. Number of digits to round branch lengths of the tree.
#' If \code{NA}, the tree is not modified.
#' @param likw.max numeric value. If not \code{NA}, the replacements with an inferior likelihood weight are dropped.
#' 
#' @details This utility function is used to generate \code{.jplace} files which can be used with the function \code{tog}
#' of \code{guppy} to get phylogenetic trees.
#' 
#' @references Matsen F.A., Hoffman N.G., Gallagher A. & Stamatakis A. (2012) A Format for Phylogenetic Placements. PLoS ONE 7, e31009.
#' @export
jplaceExpand <- function(input, output, round.brlen = 6, likw.max = NA){
  
  options(scipen = 999)
  json <- fromJSON(input, simplifyDataFrame = FALSE)
  tree <- json$tree
  
  ##### Prepare data
  pqueries.values <- lapply(json$placements, function(x) x[["p"]])
  if(!is.na(likw.max)){
    sel.row.likw <- function(x){
      res <- x[x[, 3] > likw.max, , drop = FALSE]
      if(nrow(res) == 0){
        res <- x[1, , drop = FALSE]
      }
      return(res)
    }
    pqueries.values <- lapply(pqueries.values, sel.row.likw)
  }
  pqueries.n <- unlist(lapply(pqueries.values, nrow))
  pqueries.names <- lapply(json$placements, function(x) x[["n"]])
  pqueries.names <- rep(pqueries.names, pqueries.n)
  pqueries.names <- paste(pqueries.names, unlist(lapply(pqueries.n, function(x) seq(1, x))), sep = "_")
  
  pqueries.values <- lapply(pqueries.values, function(x) split(x, 1:nrow(x)))
  pqueries.values <- unlist(pqueries.values, recursive = FALSE)
  names(pqueries.values) <- NULL
  
  ##### Round tree branch lengths
  if(!is.na(round.brlen)){
    
    match.reg <- gregexpr("(?<=:)(.*?)(?={)", json$tree, perl = TRUE)
    
    capt.length <- as.vector(attr(match.reg[[1]], "capture.length"))
    capt.start <- as.vector(attr(match.reg[[1]], "capture.start"))
    capt.end <- capt.start + capt.length
    
    rounded.length <- round(as.numeric(unlist(regmatches(json$tree, match.reg))), digits = round.brlen)
    rounded.length <- as.character(rounded.length)
    n.zero <- capt.length - nchar(rounded.length)
    rounded.length <- paste0(rounded.length, lapply(n.zero, function(x) paste0(rep(0, times = x), collapse = "")))

    for (i in 1:length(rounded.length)){
      substr(tree, capt.start[i], capt.end[i]) <- rounded.length[i]
    }
    
  }
  
  ##### Compile JSON
  placement.JSON <- sapply(pqueries.values, function(x) paste0(x, collapse = ", "))
  placement.JSON <- paste0('\t{"p":[[', placement.JSON, ']], "n":["', pqueries.names, '"]}', collapse = ',\n')
  
  json.out <- paste0('{\n\t"tree": "',
                    tree,
                    '",\n\t"placements": [\n',
                    placement.JSON,
                    '\n\t],\n',
                    '\t"metadata": {"invocation": "',
                    json$metadata$invocation,
                    '", "raxml_version": "',
                    json$metadata$raxml_version,
                    '"},\n',
                    '\t"version": ',
                    json$version,
                    ',\n\t',
                    '"fields": [\n\t',
                    paste0('"', json$fields, '"', collapse = ", "),
                    '\n\t]\n}')
  
  if(is.null(output)){
    return(json.out)
  } else {
    write(json.out, file = output) 
  }

}
fkeck/diatobc documentation built on May 12, 2019, 7:18 p.m.