R/plot_tree.R

Defines functions plot_tree

Documented in plot_tree

#' Plot Decision Tree Function
#'
#' This function creates a plot of the decision tree with customizable appearance.
#' @param tree a decision tree/tree object generated by create_tree()
#' @param edge.label.display edge label mode, options:
#'  - "all","both": concatenate edge names and probabilities (default)
#'  - "label": show edge names only
#'  - "probability": show edge probabilities only
#'  - "none": don't show edge labels
#' @param vertex.label.display vertex label mode, options:
#'  - "all": show labels for all vertices
#'  - "end": show labels only for end vertices (default)
#'  - "none": don't show vertex labels
#' @param edge.label.position how far along the edge's straight part to display the label
#'   ([0-1], default = 0.33)
#' @param vertex.size size of vertices in mm (default = 7)
#' @param font.size font size in edge and vertex labels (default = 3)
#' @param plot.padding left and right padding to add to the plot, options:
#'  - "auto": automatically set based on font size and max label length (default)
#'  - c(left, right): manually set based on provided vector
#' @param color.root color of the root node (default = "yellow")
#' @param color.internal color of internal, i.e. not root or end nodes (default = "green")
#' @param color.end color of the end nodes (default = "red")
#' @param theme theme to apply to the final plot (default = empty)
#'   refer to https://ggplot2.tidyverse.org/reference/theme.html for more on ggplot2 themes
#'
#'
#' @examples
#' tree <- plot_tree(read.csv("branches.csv"))
#'
#' @export

plot_tree <- function(tree,
                      edge.label.display = "both",
                      vertex.label.display = "end",
                      edge.label.position = 0.33,
                      vertex.size = 7,
                      font.size = 3,
                      plot.padding = "auto",
                      color.root="yellow",
                      color.internal="green",
                      color.end="red",
                      theme=FALSE)
{

  # arrange the edges following a dendrogram layout
  layout <- create_layout(tree,'dendrogram')
  data <- get_edges("short")(layout)

  # check for original index column, may have different name in different R versions
  if("node1.ggraph.orig_index" %in% colnames(data)){

    # using the layout's edge ordering, get a list of edges with all attributes
    edge_data <- E(tree, P = c(rbind(data[, "node1.ggraph.orig_index"],
                                     data[, "node2.ggraph.orig_index"])))
  } else {

    # using the layout's edge ordering, get a list of edges with all attributes
    edge_data <- E(tree, P = c(rbind(data[, "node1..ggraph.orig_index"],
                                     data[, "node2..ggraph.orig_index"])))
  }

  # don't show probability-1 edges
  prob_labels <- ifelse(edge_data$probability != 1, edge_data$probability, "")


  # if edge.label.display is "all"or "both", show all edge labels
  if(edge.label.display %in% c("all", "both")){
    edge_labels <- paste(edge_data$name, prob_labels, sep = "\n")

  # if edge.label.display is "label", show only edge labels
  } else if(edge.label.display == "label"){
    edge_labels <- paste(edge_data$name, "", sep = "\n")

  # if edge.label.display is "probability", show only edge probabilities
  } else if(edge.label.display == "probability"){
    edge_labels <- paste(prob_labels, "", sep = "\n")

  # if edge.label.display is "none", don't show edge labels
  } else if(edge.label.display == "none"){
    edge_labels <- ""

  } else {
    stop("Invalid value for parameter: edge.label.display")
  }


  # if vertex.diplay is "all", show all vertex labels
  if (vertex.label.display == "all") {
    vertex_labels <- layout$label

  # if vertex.diplay is "end", show only end vertex labels
  } else if (vertex.label.display == "end") {
    vertex_labels <- ifelse(layout$type == END, as.character(layout$label), NA)

  # otherwise don't show any vertex labels
  } else if (vertex.label.display == "none") {
    vertex_labels <- NA

  # if vertex.diplay is "none", don't show vertex labels
  } else {
    stop("Invalid value for parameter: vertex.label.display")
  }

  # add auto padding to plot based on overflow of labels, i.e. max label size
  if (plot.padding == "auto") {
    if(vertex.label.display != "none") { # if there actually are labels shown
      padding = c(.05, font.size * max(
        ifelse(is.na(vertex_labels), 0,
               str_length(vertex_labels))) / 300)

    } else { # if labels aren't shown
      padding = c(.05, .05)
    }

  } else { # add user-defined padding
    padding = plot.padding
  }

  # if a theme was provided apply it, otherwise apply empty theme
  if (theme != FALSE) {
    plot_theme = theme
  } else {
    plot_theme = theme(axis.text.y = element_blank(),
                       axis.text.x = element_blank(),
                       axis.ticks = element_blank(),
                       axis.title.x = element_blank(),
                       axis.title.y = element_blank(),
                       panel.background = element_blank(),
                       panel.grid.major = element_blank(),
                       panel.grid.minor = element_blank(),
                       plot.background = element_blank()
    )
  }

  # ggraph doesn't have triangle vertices, so we use flipped arrows instead
  # set edge arrow size to vertex.size if target is end, 0 otherwise
  arrow_sizes <- unit(vertex.size * (data$node2.type == END), "mm")
  edge_arrows <- arrow(angle = 140, length = arrow_sizes, ends = "last", type = "closed")

  # To achieve the particular common style of decision trees, we will overlay
  # three different sets of edges over the same vertices:
  # - elbow edges with hidden lines and visible arrows, for end vertices
  # - fan (line) edges with modified start positions for the angled part
  # - fan (line) edges with modified start, end positions for the straight part

  # create the tree plot, flip it to horizontal
  plot <- ggraph(tree, 'dendrogram') + coord_flip()

  # flip left-to-right and add some padding
  plot <- plot + scale_y_reverse(expand = expand_scale(padding))

  # set the edge color scale to the end color to prevent transparency
  plot <- plot + scale_edge_colour_manual(values = color.end)

  # add the flipped edge arrows, hide the actual line
  plot <- plot + geom_edge_elbow(
    aes(direction = 0, colour = color.end, x=xend, y = y - 0.5),
    # start_cap = rectangle(10, 0.1999, 'native', 'native'),
    arrow = edge_arrows,
    edge_width = 0,
    show.legend = FALSE
  )

  # add angled parts of edges
  plot <- plot + geom_edge_fan(
    aes(yend = y - 0.5)
  )

  # add straight parts of edges, apply edge labels
  plot <- plot + geom_edge_fan(
    aes(y = y - 0.5, x = xend, label = edge_labels),
    label_pos = edge.label.position,
    label_size = font.size,
    angle_calc = 'along'
  )

  # set shape, size and color of nodes in tree
  plot <- plot + geom_node_point(
    size = ifelse(layout$type == END, 0, vertex.size), # 0 if end vertex
    shape = ifelse(layout$type %in% c(DECISION, ROOT), 15, 16), # square/circle
    color = ifelse(layout$type == ROOT, color.root, color.internal)
  )

  # temporarily suppress warnings caused by a bug in nudge_y
  oldw <- getOption("warn")
  options(warn = -1)

  # add node labels, justify ROOT right, ENDs left and others center
  plot <- plot + geom_node_label(
    aes(label = vertex_labels, hjust = "left"),
    nudge_y = ifelse(layout$type == END, vertex.size * .04, vertex.size * .03),
    size = font.size,
    label.padding = unit(font.size * .1 - .1, "lines") # adjust padding with font
  )

  # set axis titles, legends, etc.
  plot <- plot + plot_theme

  # show the plot in a new window and restore warnings
  print(plot)
  options(warn = oldw)
}
DARTH-git/Dectree documentation built on Feb. 6, 2020, 6:57 p.m.