R/outbreaker_chains_methods.R
In outbreaker2: Bayesian Reconstruction of Disease Outbreaks by Combining Epidemiologic and Genomic Data

Documented in plot.outbreaker_chains print.outbreaker_chains summary.outbreaker_chains

#' Basic methods for processing outbreaker results
#'
#' Several methods are defined for instances of the class
#' \code{outbreaker_chains}, returned by \code{\link{outbreaker}}, including:
#' \code{print}, \code{plot}, \code{summary}
#'
#' @rdname outbreaker_chains
#'
#' @aliases outbreaker_chains print.outbreaker_chains plot.outbreaker_chains
#'   summary.outbreaker_chains
#'
#' @author Thibaut Jombart (\email{thibautjombart@@gmail.com})
#'
#' @param x an \code{outbreaker_chains} object as returned by \code{outbreaker}.
#' @param n_row the number of rows to display in head and tail; defaults to 3.
#' @param n_col the number of columns to display; defaults to 8.
#' @param ... further arguments to be passed to other methods
#'
#' @export
#' @importFrom utils head tail
#'
print.outbreaker_chains <- function(x, n_row = 3, n_col = 8, ...) {
  cat("\n\n ///// outbreaker results ///\n")
  cat("\nclass: ", class(x))
  cat("\ndimensions", nrow(x), "rows, ", ncol(x), "columns")

  ## process names of variables not shown
  if (ncol(x) > n_col) {
    ori_names <- names(x)
    x <- x[, seq_len(min(n_col, ncol(x)))]

    not_shown <- setdiff(ori_names, names(x))

    alpha_txt <- paste(not_shown[range(grep("alpha", not_shown))], collapse=" - ")
    t_inf_txt <- paste(not_shown[range(grep("t_inf", not_shown))], collapse=" - ")
    kappa_txt <- paste(not_shown[range(grep("kappa", not_shown))], collapse=" - ")

    cat("\nancestries not shown:", alpha_txt)
    cat("\ninfection dates not shown:", t_inf_txt)
    cat("\nintermediate generations not shown:", kappa_txt)
  }

  ## heads and tails
  cat("\n\n/// head //\n")
  print(head(as.data.frame(x), n_row))
  cat("\n...")
  cat("\n/// tail //\n")
  print(tail(as.data.frame(x), n_row))
}





#' @rdname outbreaker_chains
#'
#' @param y a character string indicating which element of an
#'   \code{outbreaker_chains} object to plot
#'
#' @param type a character string indicating the kind of plot to be used (see details)
#' 
#' @param group a vector of character strings indicating the parameters to display, 
#' or "all" to display all global parameters (non node-specific parameters).
#'
#' @param burnin the number of iterations to be discarded as burnin
#'
#' @param min_support a number between 0 and 1 indicating the minimum support of
#' ancestries to be plotted; only used if 'type' is 'network'
#'
#' @param labels a vector of length N indicating the case labels (must be
#'   provided in the same order used for dates of symptom onset)
#'
## #' @param dens_all a logical indicating if the overal density computed over
## all runs should be displayed; defaults to TRUE #' @param col the colors to be
## used for different runs
#'
#' @export
#'
#' @seealso See \href{http://www.repidemicsconsortium.org/outbreaker2/articles/introduction.html#graphics}{introduction vignette} for detailed examples on how to visualise \code{outbreaker_chains} objects.
#'
#' @details \code{type} indicates the type of graphic to plot:
#'
#' \itemize{
#'
#' \item \code{trace} to visualise MCMC traces for parameters or augmented data (plots the
#' log-likelihood by default)
#'
#' \item \code{hist} to plot histograms of quantitative values
#'
#' \item \code{density} to plot kernel density estimations of quantitative values
#'
#' \item \code{alpha} to visualise the posterior frequency of ancestries
#'
#' \item \code{network} to visualise the transmission tree; note that
#'  this opens up an interactive plot and requires a web browser with
#'  Javascript enabled; the argument `min_support` is useful to select only the
#'  most supported ancestries and avoid displaying too many links
#'
#' \item \code{kappa} to visualise the distributions generations between cases and their
#' ancestor/infector
#'
#' }
#'
#' @importFrom ggplot2 ggplot geom_line geom_point geom_histogram geom_density
#'   geom_violin aes aes_string coord_flip labs guides scale_size_area
#'   scale_x_discrete scale_y_discrete scale_color_manual scale_fill_manual
#'   scale_x_continuous scale_y_continuous theme_bw facet_wrap
#'
#' @importFrom stats reshape
#' @importFrom grDevices xyTable
#' @importFrom graphics plot
#'
plot.outbreaker_chains <- function(x, y = "post",
                                   type = c("trace", "hist", "density",
                                            "alpha", "t_inf", "kappa", "network"),
                                   group = NULL, 
                                   burnin = 0, min_support = 0.1, labels = NULL, ...) {

  ## CHECKS ##
  type <- match.arg(type)
  if (!y %in% names(x)) {
    stop(paste(y,"is not a column of x"))
  }

  ## THIS IS JUST TO APPEASE R CMD check

  ## hopefully cran will avoid spurious warnings along the lines of "no
  ## visible binding for global variable" when using ggplot2::aes(...)

  frequency <- NULL

  ## GET DATA TO PLOT ##
  if (burnin > max(x$step)) {
    stop("burnin exceeds the number of steps in x")
  }
  x <- x[x$step>burnin,,drop = FALSE]

  ## check group
  if(!is.null(group)) {
    if(length(group) == 1L && group == "all") {
      ##remove _[digit] vars
      y_vars = names(x)[!grepl("(_[[:digit:]]+$)", names(x))]
    } else if(all(group %in% names(x))) {
      y_vars = c("step", group)
    } else {
      stop("grouping variables not found in outbreaker object")
    }
    ## get only relevant data
    x_sub = as.data.frame(x)[,y_vars]
    ## switch it to long format to use in ggplot
    x_long = reshape(x_sub, 
                     idvar = "step", 
                     ids = x_sub$step,
                     direction = "long", 
                     new.row.names = NULL,
                     timevar = "Parameters", 
                     v.names = "y",
                     varying = list(names(x_sub)[2:ncol(x_sub)]), 
                     times = names(x_sub)[2:ncol(x_sub)])
  }

  ## MAKE PLOT ##
  if (type == "trace") {
    if (!is.null(group)) {
      out <- ggplot(x_long) +
        geom_line(aes_string(x = "step", y = "y")) +
        scale_x_continuous(name = "Iteration") + 
        scale_y_continuous(name = NULL) + 
        facet_wrap(~ Parameters, scales = "free") 
    } else {
      out <- ggplot(x) +
        geom_line(aes_string(x = "step", y = y)) +
        labs(x = "Iteration",
             y = y,
             title = paste("trace:",y))
    }
  }

  if (type == "hist") {
    if (!is.null(group)) {
      out <- ggplot(x_long) +
        geom_histogram(aes_string(x = "y")) +
        geom_point(aes_string(x = "y", y = 0),
                   shape="|",
                   alpha = 0.5,
                   size = 3) +
        scale_x_continuous(name = NULL) + 
        scale_y_continuous(name = NULL) + 
        facet_wrap(~Parameters, scales = "free") 
    } else {
      out <- ggplot(x) +
        geom_histogram(aes_string(x = y)) +
        geom_point(aes_string(x = y, y = 0),
                   shape="|",
                   alpha = 0.5,
                   size = 3) +
        labs(x = y,
             title = paste("histogram:",y))
    }
  }

  if (type == "density") {
    if (!is.null(group)) {
      out <- ggplot(x_long) +
        geom_density(aes_string(x = "y")) +
        geom_point(aes_string(x = "y", y = 0),
                   shape="|",
                   alpha = 0.5,
                   size = 3) +
        scale_x_continuous(name = NULL) + 
        scale_y_continuous(name = NULL) + 
        facet_wrap(~Parameters, scales = "free") 
    } else {
      out <- ggplot(x) +
        geom_density(aes_string(x = y)) +
        geom_point(aes_string(x = y, y = 0),
                   shape="|",
                   alpha = 0.5,
                   size = 3) +
        labs(x = y,
             title = paste("density:",y))
    }
  }

  if (type == "alpha") {
    alpha <- as.matrix(x[,grep("alpha", names(x))])
    colnames(alpha) <- seq_len(ncol(alpha))
    from <- as.vector(alpha)
    to <- as.vector(col(alpha))
    from[is.na(from)] <- 0
    out_dat <- data.frame(xyTable(from,to))
    names(out_dat) <- c("from", "to", "frequency")
    ## Calculate proportion among ancestries
    get_prop <- function(i) {
        ind <- which(out_dat$to == out_dat$to[i])
        out_dat[[3]][i]/sum(out_dat[[3]][ind])
    }
    ## Return labels, if provided
    get_alpha_lab <- function(axis, labels = NULL) {
      if(is.null(labels)) labels <- seq_len(ncol(alpha))
      if(axis == 'x') return(labels) else
      if(axis == 'y') return(c("Import", labels))
    }
    ## Return custom colors if provided
    get_alpha_color <- function(color = NULL) {
      if(is.null(color)) return(NULL)
      else return(scale_color_manual(values = color))
    }
    ## This joining function is needed so that the '...' argument can be passed
    ## to two functions with different arguments
    get_lab_color <- function(labels = NULL, color = NULL) {
      list(alpha_lab_x = get_alpha_lab('x', labels),
           alpha_lab_y = get_alpha_lab('y', labels),
           alpha_color = get_alpha_color(color))
    }

    tmp <- get_lab_color(labels, ...)
    out_dat[3] <- vapply(seq_along(out_dat[[3]]), get_prop, 1)
    out_dat$from <- factor(out_dat$from, levels = c(0, sort(unique(out_dat$to))))
    out_dat$to <- factor(out_dat$to, levels = sort(unique(out_dat$to)))
    out <- ggplot(out_dat) +
      geom_point(aes(x = to, y = from, size = frequency, color = to)) +
      scale_x_discrete(drop = FALSE, labels = tmp$alpha_lab_x) +
      scale_y_discrete(drop = FALSE, labels = tmp$alpha_lab_y) +
      labs(x = 'To', y = 'From', size = 'Posterior\nfrequency') +
      tmp$alpha_color +
      scale_size_area() +
      guides(colour = FALSE)
  }

  if (type == "t_inf") {
    get_t_inf_lab <- function(labels = NULL) {
      N <- ncol(t_inf)
      if(is.null(labels)) labels <- 1:N
      return(labels)
    }
    ## Return custom colors if provided
    get_t_inf_color <- function(color = NULL) {
      if(is.null(color)) return(NULL)
      else return(scale_fill_manual(values = color))
    }
    ## This joining function is needed so that the '...' argument can be passed
    ## to two functions with different arguments
    get_lab_color <- function(labels = NULL, color = NULL) {
      list(t_inf_lab_x = get_t_inf_lab(labels),
           t_inf_color = get_t_inf_color(color))
    }

    t_inf <- as.matrix(x[,grep("t_inf", names(x))])
    tmp <- get_lab_color(labels, ...)
    dates <- as.vector(t_inf)
    cases <- as.vector(col(t_inf))
    out_dat <- data.frame(cases = factor(cases), dates = dates)
    out <- ggplot(out_dat) +
      geom_violin(aes(x = cases, y = dates, fill = cases)) +
      coord_flip() + guides(fill = FALSE) +
      labs(y = 'Infection time', x = NULL) +
      tmp$t_inf_color +
      scale_x_discrete(labels = tmp$t_inf_lab)
  }

  if (type == "kappa") {
    get_kappa_lab <- function(labels = NULL) {
      N <- ncol(kappa)
      if(is.null(labels)) labels <- 1:N
      return(labels)
    }
    kappa <- as.matrix(x[,grep("kappa", names(x))])
    generations <- as.vector(kappa)
    cases <- as.vector(col(kappa))
    to_keep <- !is.na(generations)
    generations <- generations[to_keep]
    cases <- cases[to_keep]
    out_dat <- data.frame(xyTable(generations, cases))
    get_prop <- function(i) {
        ind <- which(out_dat$y == out_dat$y[i])
        out_dat[[3]][i]/sum(out_dat[[3]][ind])
    }
    out_dat[3] <- vapply(seq_along(out_dat[[3]]), get_prop, 1)
    names(out_dat) <- c("generations", "cases", "frequency")
    out <- ggplot(out_dat) +
      geom_point(aes(x = generations, y = as.factor(cases), size = frequency, color = factor(cases))) +
      scale_size_area() +
      scale_y_discrete(labels = get_kappa_lab(labels)) +
      guides(colour = FALSE) +
      labs(title = "number of generations between cases",
           x = "number of generations to ancestor",
           y = NULL)
  }

  if (type == "network") {
    ## extract edge info: ancestries
    alpha <- x[, grep("alpha",names(x)), drop = FALSE]
    from <- unlist(alpha)
    to <- as.vector(col(alpha))
    N <- ncol(alpha)
    edges <- stats::na.omit(data.frame(xyTable(from, to)))
    edges[3] <- edges$number/nrow(alpha)
    names(edges) <- c("from", "to", "value")
    edges <- edges[edges$value > min_support,,drop = FALSE]
    edges$arrows <- "to"
    case_cols <- cases_pal(N)
    edges$color <- case_cols[edges$from]


    ## ## extract edge info: timing
    ## t_inf <- x[, grep("t_inf",names(x)), drop = FALSE]
    ## mean_time <- apply(t_inf, 2, mean)
    ## mean_delay <- mean_time[edges$to] - mean_time[edges$from]
    ## mean_delay[mean_delay<1] <- 1
    ## edges$label <- paste(round(mean_delay), "days")

    ## node info
    find_nodes_size <- function(i) {
      sum(from==i, na.rm = TRUE) / nrow(alpha)
    }
    get_node_lab <- function(labels = NULL) {
      if(is.null(labels)) labels <- 1:N
      return(labels)
    }
    nodes <- data.frame(id = seq_len(ncol(alpha)),
                        label = seq_len(ncol(alpha)))
    nodes$value <- vapply(nodes$id,
                          find_nodes_size,
                          numeric(1))
    nodes$color <- case_cols
    nodes$shape <- rep("dot", N)
    nodes$label <- get_node_lab(labels)

    smry <- summary(x, burnin = burnin)
    is_imported <- is.na(smry$tree$from)
    nodes$shaped[is_imported] <- "star"

    ## generate graph
    out <- visNetwork::visNetwork(nodes = nodes, edges = edges, ...)
    out <- visNetwork::visNodes(out, shadow = list(enabled = TRUE, size = 10),
                                color = list(highlight = "red"))
    out <- visNetwork::visEdges(out, arrows = list(
      to = list(enabled = TRUE, scaleFactor = 0.2)),
      color = list(highlight = "red"))

  }

  return(out)
}




#' @rdname outbreaker_chains
#' @param object an \code{outbreaker_chains} object as returned by \code{outbreaker}.
#'
#' @param method the method used to determine consensus ancestries. 'mpa'
#'   (maximum posterior ancestry) simply returns the posterior ancestry with the
#'   highest posterior support for each case, even if this includes
#'   cycles. 'decycle' will return the maximum posterior ancestry, except when
#'   cycles are detected, in which case the link in the cycle with the lowest
#'   support is pruned and the tree recalculated.
#'
#' @export
#' @importFrom stats median
summary.outbreaker_chains <- function(object, burnin = 0, method = c("mpa", "decycle"), ...) {
  ## check burnin ##
  x <- object
  if (burnin > max(x$step)) {
    stop("burnin exceeds the number of steps in object")
  }
  x <- x[x$step>burnin,,drop = FALSE]


  ## make output ##
  out <- list()


  ## summary for $step ##
  interv <- ifelse(nrow(x)>2, diff(tail(x$step, 2)), NA)
  out$step <- c(first = min(x$step),
                last = max(x$step),
                interval = interv,
                n_steps = length(x$step)
                )


  ## summary of post, like, prior ##
  out$post <- summary(x$post)
  out$like <- summary(x$like)
  out$prior <- summary(x$prior)


  ## summary for mu ##
  out$mu <- summary(x$mu)

  ## summary for pi ##
  out$pi <- summary(x$pi)


  ## summary tree ##
  out$tree <- list()

  ## summary of alpha ##
  alpha <- as.matrix(x[,grep("alpha", names(x))])

  method <- match.arg(method)

  if(method == 'mpa') {

    ## function to get most frequent item
    f1 <- function(x) {
      as.integer(names(sort(table(x, exclude = NULL), decreasing = TRUE)[1]))
    }
    out$tree$from <- apply(alpha, 2, f1)
    out$tree$to <- seq_len(ncol(alpha))

    ## function to get frequency of most frequent item
    f2 <- function(x) {
      (sort(table(x), decreasing = TRUE)/length(x))[1]
    }
    support <- apply(alpha, 2, f2)

  } else if(method == 'decycle') {

    cons <- .decycle_tree(x)
    out$tree$from <- cons$from
    out$tree$to <- cons$to
    support <- cons$support

  }

  ## summary of t_inf ##
  t_inf <- as.matrix(x[,grep("t_inf", names(x))])
  out$tree$time <- apply(t_inf, 2, median)

  out$tree$support <- support

  ## summary of kappa ##
  kappa <- as.matrix(x[,grep("kappa", names(x))])
  out$tree$generations <- apply(kappa, 2, median, na.rm = TRUE)
  out$tree$generations[is.na(out$tree$from)] <- NA

  ## shape tree as a data.frame
  out$tree <- as.data.frame(out$tree)
  rownames(out$tree) <- NULL

  return(out)
}

Any scripts or data that you put into this service are public.

outbreaker2 documentation built on May 23, 2022, 5:06 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

outbreaker2
Bayesian Reconstruction of Disease Outbreaks by Combining Epidemiologic and Genomic Data

R/outbreaker_chains_methods.R
In outbreaker2: Bayesian Reconstruction of Disease Outbreaks by Combining Epidemiologic and Genomic Data

Defines functions summary.outbreaker_chains plot.outbreaker_chains print.outbreaker_chains

Documented in plot.outbreaker_chains print.outbreaker_chains summary.outbreaker_chains

Try the outbreaker2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

outbreaker2 Bayesian Reconstruction of Disease Outbreaks by Combining Epidemiologic and Genomic Data

R/outbreaker_chains_methods.R In outbreaker2: Bayesian Reconstruction of Disease Outbreaks by Combining Epidemiologic and Genomic Data

Defines functions summary.outbreaker_chains plot.outbreaker_chains print.outbreaker_chains

Documented in plot.outbreaker_chains print.outbreaker_chains summary.outbreaker_chains

Try the outbreaker2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

outbreaker2
Bayesian Reconstruction of Disease Outbreaks by Combining Epidemiologic and Genomic Data

R/outbreaker_chains_methods.R
In outbreaker2: Bayesian Reconstruction of Disease Outbreaks by Combining Epidemiologic and Genomic Data