R/phase2_atree.R

Defines functions resource_utilization_tree_node panel_activenodes panel_nodememuse nodememuse_check panel_utiltreedepth panel_utiltreenode panel_atree panel_atree_structure

Documented in panel_activenodes panel_atree panel_atree_structure panel_nodememuse panel_utiltreedepth panel_utiltreenode resource_utilization_tree_node

#' Create the elimination tree structure plot along time
#'
#' Use Atree and Application data to create the elimination tree
#' structure plot in a ggplot object and return it
#'
#' @param data starvz_data with trace data
#' @param end_arrow behavior of the end arrow [ParentEnd, ComputationEnd]
#' @return A ggplot object
#' @include starvz_data.R
#' @examples
#' \dontrun{
#' panel_atree_structure(starvz_sample_lu)
#' }
#' @export
panel_atree_structure <- function(data = NULL, end_arrow = "ParentEnd") {
  if (is.null(data)) stop("input data for geom_atree_plot is NULL")

  makespan <- data$Application %>%
    .$End %>%
    max()

  dtree <- tibble()

  # make child end point to parent end
  if (tolower(end_arrow) == "parentend") {
    dtree <- data$Atree %>%
      # Get Start time of the first task belonging to each ANode
      left_join(
        data$Application %>%
          select("ANode", "Start", "End") %>%
          group_by(.data$ANode) %>%
          summarize(
            Start = min(.data$Start),
            End = max(.data$End)
          ),
        by = "ANode"
      ) %>%
      # Get graphical properties of Parent for each ANode
      left_join(
        x = .,
        y = .,
        by = c("Parent" = "ANode"), suffix = c("", ".Parent")
      ) %>%
      select(-"Parent.Parent") %>%
      # Keep only non pruned nodes for tree structure
      filter(.data$NodeType != "Pruned") %>%
      # Calculate coordinates for lines connecting child with parent
      # Values used in the Start line
      mutate(
        Edge.X = .data$Start,
        Edge.Y = .data$Position + .data$Height / 2,
        Edge.Xend = .data$Start.Parent,
        Edge.Yend = .data$Position.Parent + .data$Height.Parent / 2
      ) %>%
      # Values used in the End line
      mutate(
        Edge.End.X = .data$End,
        Edge.End.Y = .data$Position + .data$Height / 2,
        Edge.End.Xend = .data$End.Parent,
        Edge.End.Yend = .data$Position.Parent + .data$Height.Parent / 2
      )

    # else child nodes point to the parent Y in the last computational task time X among the children
  } else {
    dtree <- data$Atree %>%
      # Get Start time of the first task belonging to each ANode (keep Start as it is, but consider two end points clean and comptation)
      left_join(
        data$Application %>%
          mutate(EndType = case_when(
            grepl("clean", .data$Value) ~ "CleanEnd",
            TRUE ~ "ComputationEnd"
          )) %>%
          select("ANode", "Start", "End", "EndType") %>%
          group_by(.data$ANode, .data$EndType) %>%
          mutate(End = max(.data$End)) %>%
          group_by(.data$ANode) %>%
          mutate(Start = min(.data$Start)),
        by = "ANode"
      ) %>%
      # Keep only non pruned nodes for tree structure
      filter(.data$NodeType != "Pruned") %>%
      ungroup() %>%
      unique() %>%
      spread(.data$EndType, .data$End) %>%
      # Calculate coordinates for lines connecting child with parent
      # Get graphical properties of Parent for each ANode
      left_join(
        x = .,
        y = .,
        by = c("Parent" = "ANode"), suffix = c("", ".Parent")
      ) %>%
      select(-"Parent.Parent") %>%
      group_by(.data$Parent) %>%
      mutate(ComputationEnd.Parent = max(.data$ComputationEnd)) %>%
      ungroup() %>%
      # Calculate coordinates for lines connecting child with parent
      # Values used in the Start line (keep the same)
      mutate(
        Edge.X = .data$Start,
        Edge.Y = .data$Position + .data$Height / 2,
        Edge.Xend = .data$Start.Parent,
        Edge.Yend = .data$Position.Parent + .data$Height.Parent / 2
      ) %>%
      # Values used in the End line
      mutate(
        End = .data$CleanEnd, # last moment for node lifetime
        Edge.End.X = .data$ComputationEnd,
        Edge.End.Y = .data$Position + .data$Height / 2,
        Edge.End.Xend = .data$ComputationEnd.Parent,
        Edge.End.Yend = .data$Position.Parent + .data$Height.Parent / 2
      )
  }


  # data frame for the tree plot structure
  dstruct <- dtree %>%
    # Remove nodes without parent
    filter(!is.na(.data$Parent)) %>%
    # The root has no tasks associated with, remove it.
    mutate(Parent = as.integer(.data$Parent)) %>%
    filter(.data$Parent != max(.data$Parent))

  # data frame for node "lifetime" geom_segment
  dline <- dtree %>%
    select("ANode", "Start", "End", "Position", "Height", "Parent") %>%
    filter(!is.na(.data$Parent), !is.na(.data$Start))

  atreeplot <- ggplot() +
    # Lines connecting child with parent (Start)
    geom_segment(
      data = dstruct,
      aes(
        x = .data$Edge.X,
        y = .data$Edge.Y,
        xend = .data$Edge.Xend,
        yend = .data$Edge.Yend
      ), arrow = arrow(length = unit(0.03, "npc")), color = "#1B9E77"
    ) +
    geom_point(
      data = dstruct,
      aes(
        x = .data$Edge.X,
        y = .data$Edge.Y
      ), color = "#1B9E77"
    ) +
    # Lines connecting child with parent (End)
    geom_segment(
      data = dstruct,
      arrow = arrow(length = unit(0.03, "npc")),
      aes(
        x = .data$Edge.End.X,
        y = .data$Edge.End.Y,
        xend = .data$Edge.End.Xend,
        yend = .data$Edge.End.Yend
      ), color = "#D95F02"
    )

  # End free memory point (use for end_arrow="ComputationEnd" only)
  if (tolower(end_arrow) != "parentend") {
    atreeplot <- atreeplot +
      geom_point(
        data = dline,
        aes(
          y = .data$Position + .data$Height / 2,
          x = .data$End,
        ), color = "black"
      )
  }

  atreeplot <- atreeplot +
    geom_point(
      data = dstruct,
      aes(
        x = .data$Edge.End.X,
        y = .data$Edge.End.Y
      ), color = "#D95F02"
    ) +
    # Horizontal lines
    geom_segment(
      data = dline,
      aes(
        y = .data$Position + .data$Height / 2,
        yend = .data$Position + .data$Height / 2,
        x = .data$Start,
        xend = .data$End
      ), color = "lightblue"
    )

  return(atreeplot)
}

#' Create the elimination tree plot with some options in the config file
#'
#' Use starvz_data to create a representation of the elimination tree structure
#' considering initialization, communication, and computational tasks. These
#' representations can be controlled in the configuration file.
#'
#' @param data starvz_data with trace data
#' @param step size in milliseconds for the time aggregation step
#' @param legend enable/disable panel legend
#' @param zoom enable/disable vertical zoom in the tree structure
#' @param computation enable/disable computations representations in the tree
#' @param pruned enable/disable pruned computations representations in the tree
#' @param initialization enable/disable initialization tasks representation
#' @param x_start X-axis start value
#' @param x_end X-axis end value
#' @param communication enable/disable communication tasks representation
#' @param anomalies enable/disable anomalies tasks representation
#' @param performance_metric which metric to represent ["time", "gflops"]
#' @param level draw a dashed line to divide the tree at the level h
#' @param end_arrow behavior of the end arrow [ParentEnd, ComputationEnd]
#' @return A ggplot object
#' @examples
#' \dontrun{
#' panel_atree(starvz_sample_lu, step = 10)
#' panel_atree(starvz_sample_lu,
#'   step = 20,
#'   communication = FALSE, initialization = FALSE
#' )
#' }
#' @export
panel_atree <- function(data = NULL, step = data$config$atree$step, legend = data$config$atree$legend, zoom = FALSE,
                        computation = data$config$atree$computation$active,
                        pruned = data$config$atree$computation$pruned$active,
                        initialization = data$config$atree$initialization$active,
                        x_start = data$config$limits$start,
                        x_end = data$config$limits$end,
                        communication = data$config$atree$communication$active,
                        anomalies = data$config$atree$anomalies$active,
                        performance_metric = "time",
                        level = 0,
                        end_arrow = "ParentEnd") {
  starvz_check_data(data, tables = list("Atree" = c("ANode")))

  if (is.null(step) || !is.numeric(step)) {
    if (is.null(data$config$global_agg_step)) {
      step <- 100
    } else {
      step <- data$config$global_agg_step
    }
  }

  if (is.null(x_start) || (!is.na(x_start) && !is.numeric(x_start))) {
    x_start <- NA
  }

  if (is.null(x_end) || (!is.na(x_end) && !is.numeric(x_end))) {
    x_end <- NA
  }

  data_utilization_node <- resource_utilization_tree_node(data$Application, data$Atree,
    step = step, group_pruned = TRUE, performance_metric = performance_metric
  ) %>%
    filter(.data$Value1 != 0) %>%
    select("ANode", "Slice", "Value1") %>%
    ungroup() %>%
    rename(NodeUsage = "Value1") %>%
    left_join(data$Atree, by = "ANode")

  # Resize for not pruned nodes, first and last computational tasks
  data_tree_utilization_not_pruned <- data_utilization_node %>%
    inner_join(
      data$Application %>%
        filter(grepl("qrt", .data$Value)) %>%
        select("ANode", "End", "Start") %>%
        group_by(.data$ANode) %>%
        mutate(Start = min(.data$Start), End = max(.data$End)) %>%
        select("ANode", "Start", "End") %>%
        unique(),
      by = "ANode"
    ) %>%
    mutate(Start = ifelse(.data$Start >= .data$Slice, .data$Start, .data$Slice)) %>%
    mutate(End = ifelse(.data$Slice + step >= .data$End, .data$End, .data$Slice + step))

  # 1. Add the atree structure representation first
  atreeplot <- panel_atree_structure(data, end_arrow = end_arrow) +
    default_theme(data$config$base_size, data$config$expand) +
    ylab("Elimination Tree\n[Submission Order]")

  # Add the computation, initialization, and communication over the tree structure plot
  # 2. Add computations
  # 2.1 Add the not pruned computation representation
  if (computation) {
    atreeplot <- atreeplot +
      atree_geom_rect_gradient(data_tree_utilization_not_pruned, yminOffset = 0, ymaxOffset = 1)
  }
  # 2.2 Add the pruned computation representation
  # TODO can't use pruned with gflops performance metric, fix this
  if (pruned) {
    # Manipulate data for the tree resource utilization plots
    # Resize for pruned nodes Start and End of the aggregated pruned nodes
    data_tree_utilization_pruned <- data_utilization_node %>%
      inner_join(
        data$Application %>%
          filter(grepl("do_subtree", .data$Value)) %>%
          select("ANode", "End", "Start") %>%
          group_by(.data$ANode) %>%
          mutate(Start = min(.data$Start), End = max(.data$End)) %>%
          unique(),
        by = "ANode"
      ) %>%
      group_by(.data$Parent, .data$Position) %>%
      mutate(Start = min(.data$Start), End = max(.data$End)) %>%
      mutate(Start = ifelse(.data$Start >= .data$Slice, .data$Start, .data$Slice)) %>%
      mutate(End = ifelse(.data$Slice + step >= .data$End, .data$End, .data$Slice + step))

    atreeplot <- atreeplot +
      atree_geom_rect_gradient(data_tree_utilization_pruned, yminOffset = 0.25, ymaxOffset = 0.75)
  }
  if (tolower(performance_metric) == "time") {
    # 2.3 Add the color to represent resource utilization
    atreeplot <- atreeplot +
      scale_fill_gradient2(
        name = paste0("Computational Load %"),
        limits = c(0, 100), breaks = c(0, 50, 100),
        midpoint = 50, low = "blue", mid = "yellow", high = "red"
      ) +
      theme(legend.title = element_text(size = rel(0.8)))
  } else {
    minGFlop <- round(min(data_utilization_node$NodeUsage), 1)
    maxGFlop <- round(max(data_utilization_node$NodeUsage), 1)
    midGFlop <- round((maxGFlop + minGFlop) / 2, 1)

    atreeplot <- atreeplot +
      scale_fill_gradient2(
        name = "GFlops Throughput", limits = c(minGFlop, maxGFlop), breaks = c(minGFlop, midGFlop, maxGFlop),
        midpoint = midGFlop, low = "blue", mid = "yellow", high = "red"
      ) +
      theme(legend.title = element_text(size = rel(0.8)))
  }

  # 3. Add initialization tasks representation
  if (initialization) {
    # filter initialization tasks
    dfw_init <- data$Application %>%
      filter(grepl("init_", .data$Value)) %>%
      unique() %>%
      select(-"Position", -"Height") %>%
      left_join(data$Atree, by = "ANode") %>%
      filter(!is.na(Position))

    atreeplot <- atreeplot +
      atree_geom_rect(dfw_init, "#4DAF4A", yminOffset = 0, ymaxOffset = 1, alpha = 0.7)
  }

  # 4. Add "communication" (block_copy | block_extadd) tasks in the tree nodes
  if (communication) {
    # filter communication tasks
    dfw_comm <- data$Application %>%
      filter(grepl("block_copy", .data$Value) | grepl("block_extadd", .data$Value)) %>%
      unique() %>%
      select(-"Position", -"Height") %>%
      left_join(data$Atree, by = "ANode") %>%
      filter(!is.na(.data$Position))

    atreeplot <- atreeplot +
      atree_geom_rect(dfw_comm, "#000000", yminOffset = 0.25, ymaxOffset = 0.75, alpha = 0.7)
  }

  # 5. add division in the tree by its levels
  if (level > 1) {
    cut_positions <- data$Atree %>%
      filter(.data$Depth == level & .data$Intermediary) %>%
      arrange(-.data$Position) %>%
      pull(.data$Position)
    cut_positions <- cut_positions[1:length(cut_positions)] + 1

    atreeplot <- atreeplot +
      geom_hline(aes(yintercept = cut_positions, linetype = "dashed"), color = "black") +
      scale_linetype_identity(name = "", breaks = c("dashed"), labels = paste0("Cut at level ", level), guide = "legend")
  }

  # 6. Vertical zoom
  if (zoom) {
    z.start <- data$config$atree$zoom$start
    z.end <- data$config$atree$zoom$end
    max.y.coordinate <- (data$Atree %>% pull(.data$Position) %>% max()) +
      (data$Atree %>% pull(.data$Height) %>% max())
    tzScale <- list(
      coord_cartesian(
        xlim = c(x_start, x_end),
        ylim = c(
          z.start / 100 * max.y.coordinate,
          z.end / 100 * max.y.coordinate
        )
      )
    )
    atreeplot <- atreeplot + tzScale
  } else {
    tzScale <- list(
      coord_cartesian(
        xlim = c(x_start, x_end)
      )
    )
    atreeplot <- atreeplot + tzScale
  }

  # 7. Add representation for anomalies in the tree
  if (anomalies) {
    atreeplot <- atreeplot +
      atree_geom_anomalies(data)
  }

  # 8. Add legend to the plot
  if (legend) {
    atreeplot <- atreeplot +
      guides(shape = FALSE, alpha = FALSE, color = FALSE) +
      theme(
        legend.position = "top",
        legend.justification = "left",
        legend.box.just = "left",
        legend.direction = "horizontal",
        legend.text = element_text(size = rel(0.8), colour = "black")
      )
  } else {
    atreeplot <- atreeplot + theme(legend.position = "none")
  }
  return(atreeplot)
}

#' Create the resource utilization by tree node plot
#'
#' Use starvz_data Application and Atree to create a plot that shows the
#' total resource utilization, painted by tree node using geom_ribbon. The
#' colors are reused between nodes, not tied to a specific tree node.
#'
#' @param data starvz_data with trace data
#' @param step size in milliseconds for the time aggregation step
#' @param x_start X-axis start value
#' @param x_end X-axis end value
#' @return A ggplot object
#' @examples
#' \dontrun{
#' panel_utiltreenode(data = starvz_sample_lu, step = 100)
#' }
#' @export
panel_utiltreenode <- function(data = NULL,
                               step = data$config$utiltreenode$step,
                               x_start = data$config$limits$start,
                               x_end = data$config$limits$end) {
  starvz_check_data(data, tables = list("Atree" = c("ANode")))

  if (is.null(step) || !is.numeric(step)) {
    if (is.null(data$config$global_agg_step)) {
      step <- 100
    } else {
      step <- data$config$global_agg_step
    }
  }

  if (is.null(x_start) || (!is.na(x_start) && !is.numeric(x_start))) {
    x_start <- NA
  }

  if (is.null(x_end) || (!is.na(x_end) && !is.numeric(x_end))) {
    x_end <- NA
  }

  df1 <- resource_utilization_tree_node(data$Application, data$Atree, step = step, group_pruned = FALSE)

  event_data <- df1 %>%
    filter(.data$Value != 0) %>%
    group_by(.data$ANode) %>%
    summarize(Start = min(.data$Slice), End = max(.data$Slice) + step) %>%
    gather(.data$Start, .data$End, key = "Event", value = "Time") %>%
    arrange(.data$Time, .data$Event)

  # Set node colors
  pkg.env$active_colors <- c()
  pkg.env$df_colors <- tibble(ANode = character(), Event = character(), color = integer())
  apply(event_data, 1, define_colors) -> colors
  colors <- tibble(Color = colors)
  event_data <- event_data %>%
    cbind(colors) %>%
    as_tibble() %>%
    select("ANode", "Color") %>%
    unique()

  # join data frames
  df2 <- df1 %>%
    ungroup() %>%
    left_join(event_data, by = "ANode") %>%
    group_by(.data$Slice)

  # must expand data frame to make geom_area work properly
  df_plot <- df2 %>%
    filter(!is.na(.data$Color)) %>%
    select(-"ANode") %>%
    expand(.data$Slice, .data$Color) %>%
    left_join(df2 %>% filter(.data$Value != 0), by = c("Slice", "Color")) %>%
    mutate(Value1 = ifelse(is.na(.data$Value1), 0, .data$Value1))

  df_plot <- df_plot %>%
    # expand all time slices with the possible colors (for geom_ribbon)
    expand(.data$Slice, Color = 0:(df_plot$Color %>% max())) %>%
    left_join(df_plot, by = c("Slice", "Color")) %>%
    group_by(.data$Color) %>%
    arrange(.data$Color) %>%
    mutate(Value1 = na.locf(.data$Value1)) %>%
    group_by(.data$Slice) %>%
    arrange(.data$Slice, -.data$Color) %>%
    # define Ymin and Ymax for geom ribbon
    mutate(Usage = sum(.data$Value1)) %>%
    mutate(Ymin = lag(.data$Value1), Ymin = ifelse(is.na(.data$Ymin), 0, .data$Ymin)) %>%
    mutate(Ymin = cumsum(.data$Ymin), Ymax = .data$Ymin + .data$Value1) %>%
    # remove Ending nodes at the middle of a Slice to keep their res. utilization
    ungroup() %>%
    mutate(Slc = .data$Slice %% step) %>%
    filter(.data$Slc == 0 | .data$Slice == max(.data$Slice))

  # decide the size of the pallet, if it will include more colors than just green
  ncolors <- df_plot %>%
    ungroup() %>%
    select("Color") %>%
    max(.$Color)
  if (ncolors <= 10) {
    palette <- brewer.pal(9, "Greens")[3:9]
  } else if (ncolors <= 20) {
    palette <- c(
      brewer.pal(9, "Reds")[3:9], brewer.pal(9, "Greens")[3:9]
    )
  } else {
    palette <- c(
      brewer.pal(9, "Oranges")[3:9], brewer.pal(9, "Blues")[3:9],
      brewer.pal(9, "Reds")[3:9], brewer.pal(9, "Greens")[3:9]
    )
  }

  fill_palette <- rev(colorRampPalette(palette)(ncolors + 1))

  df_plot %>%
    ggplot() +
    geom_ribbon(aes(ymin = .data$Ymin, ymax = .data$Ymax, x = .data$Slice, fill = as.factor(.data$Color))) +
    scale_fill_manual(values = fill_palette) +
    default_theme(data$config$base_size, data$config$expand) +
    theme(legend.position = "none") +
    ylab("Usage %\nANode") +
    ylim(0, 100) -> panel

  tzScale <- list(
    coord_cartesian(
      xlim = c(x_start, x_end)
    )
  )
  panel <- panel + tzScale
  return(panel)
}

#' Create the resource utilization by tree depth plot
#'
#' Use starvz_data Application and Atree to create a plot that shows the
#' total resource utilization, painted by tree depth level using geom_ribbon
#'
#' @param data starvz_data with trace data
#' @param step size in milliseconds for the time aggregation step
#' @param legend enable/disable plot legends
#' @param x_start X-axis start value
#' @param x_end X-axis end value
#' @return A ggplot object
#' @examples
#' \dontrun{
#' panel_utiltreedepth(starvz_sample_lu, step = 100, legend = TRUE)
#' }
#' @export
panel_utiltreedepth <- function(data,
                                step = data$config$utiltreenode$step,
                                x_start = data$config$limits$start,
                                x_end = data$config$limits$end,
                                legend = data$config$utiltreedepth$legend) {
  starvz_check_data(data, tables = list("Atree" = c("ANode")))

  if (is.null(x_start) || (!is.na(x_start) && !is.numeric(x_start))) {
    x_start <- NA
  }

  if (is.null(x_end) || (!is.na(x_end) && !is.numeric(x_end))) {
    x_end <- NA
  }

  if (is.null(step) || !is.numeric(step)) {
    if (is.null(data$config$global_agg_step)) {
      step <- 100
    } else {
      step <- data$config$global_agg_step
    }
  }

  # Prepare data
  depth_plot_data <- resource_utilization_tree_depth(data$Application, data$Atree, step)

  maxDepth <- depth_plot_data %>%
    .$Depth %>%
    max()
  depthPalette <- rev(colorRampPalette(brewer.pal(9, "YlOrRd"))(maxDepth))

  panel <- depth_plot_data %>%
    ggplot() +
    geom_ribbon(aes(ymin = .data$Ymin, ymax = .data$Ymax, x = .data$Slice, fill = as.factor(.data$Depth))) +
    default_theme(data$config$base_size, data$config$expand) +
    theme(legend.position = "top") +
    scale_fill_manual(values = depthPalette) +
    ylab("Usage %\nDepth") +
    ylim(0, 100)

  # configure legend
  if (!legend) {
    panel <- panel + theme(legend.position = "none")
  }

  tzScale <- list(
    coord_cartesian(
      xlim = c(x_start, x_end)
    )
  )
  panel <- panel + tzScale

  return(panel)
}

nodememuse_check <- function(data) {
  data$Application %>%
    filter(grepl("front", .data$Value) & .data$GFlop != 0) %>%
    nrow() -> nr
  return(!(nr == 0))
}

#' Create the node memory usage plot
#'
#' Use starvz_data to create a line plot of the memory usage in MB of
#' active nodes along the application execution time
#'
#' @param data starvz_data with trace data
#' @param step size in milliseconds for the time aggregation step
#' @param aggregation enable/disable time aggregation for the plot
#' @param x_start X-axis start value
#' @param x_end X-axis end value
#' @param legend enable/disable plot legends
#' @examples
#' \dontrun{
#' panel_nodememuse(starvz_sample_lu, step = 100)
#' }
#' @export
panel_nodememuse <- function(data = NULL,
                             step = data$config$activenodes$aggregation$step,
                             aggregation = data$config$activenodes$aggregation$active,
                             x_start = data$config$limits$start,
                             x_end = data$config$limits$end,
                             legend = data$config$activenodes$nodememuse$legend) {
  starvz_check_data(data,
    tables = list(
      "Atree" = c("ANode"),
      "Application" = c("Value", "GFlop")
    ),
    extra_func = nodememuse_check
  )

  if (is.null(step) || !is.numeric(step)) {
    if (is.null(data$config$global_agg_step)) {
      step <- 100
    } else {
      step <- data$config$global_agg_step
    }
  }

  if (is.null(x_start) || (!is.na(x_start) && !is.numeric(x_start))) {
    x_start <- NA
  }

  if (is.null(x_end) || (!is.na(x_end) && !is.numeric(x_end))) {
    x_end <- NA
  }

  panel <- geom_blank()

  # Prepare data
  data_mem_utilization <- data$Application %>%
    filter(grepl("front", .data$Value) | grepl("do_subtree", .data$Value)) %>%
    rename(Task = "Value") %>%
    # we wrote the memory usage in the GFlops field...
    mutate(UsedMemMB = ifelse(grepl("clean", .data$Task), -.data$GFlop * 1024, .data$GFlop * 1024)) %>%
    arrange(.data$Start) %>%
    mutate(Value = cumsum(.data$UsedMemMB)) %>%
    mutate(Type = NA)

  if (aggregation) {
    panel <- var_integration_chart(data_mem_utilization, ylabel = NA, step = step, facetting = FALSE, base_size = data$config$base_size, expand = data$config$expand)
  } else {
    # TODO: try to reuse some code from above
    # mem use without time aggregation
    df_mem <- data$Application %>%
      filter(grepl("front", .data$Value) | grepl("do_sub", .data$Value)) %>%
      select("Start", "Value", "GFlop", "ANode", "Node") %>%
      arrange(.data$Start) %>%
      mutate(GFlop = ifelse(.data$Value == "clean_front", -.data$GFlop, .data$GFlop)) %>%
      mutate(MemMB = .data$GFlop * 1024) %>%
      mutate(UsedMemMB = cumsum(.data$MemMB)) %>%
      mutate(Time = .data$Start * 0.9999) %>%
      gather(.data$Start, .data$Time, key = "Start", value = "Time") %>%
      select(-"Start") %>%
      arrange(.data$Time) %>%
      mutate(UsedMemMB = lag(.data$UsedMemMB))

    panel <- df_mem %>%
      ggplot(aes(x = .data$Time, y = .data$UsedMemMB, color = .data$Node)) +
      geom_line() +
      default_theme(data$config$base_size, data$config$expand)
  }

  panel <- panel +
    ylab("Used\nMB") +
    scale_color_brewer(palette = "Dark2")

  # configure legend
  if (!legend) {
    panel <- panel + theme(legend.position = "none")
  }

  tzScale <- list(
    coord_cartesian(
      xlim = c(x_start, x_end)
    )
  )
  panel <- panel + tzScale

  return(panel)
}

#' Create the active nodes in memory plot
#'
#' Use starvz_data to create a line plot of the number of active nodes per type
#' along the application execution time
#'
#' @param data starvz_data with trace data
#' @param step size in milliseconds for the time aggregation step
#' @param aggregation enable/disable time aggregation for the plot
#' @param legend enable/disable plot legends
#' @param x_start X-axis start value
#' @param x_end X-axis end value
#' @return A ggplot object
#' @examples
#' \dontrun{
#' panel_activenodes(data = starvz_sample_lu, step = 100)
#' }
#' @export
panel_activenodes <- function(data = NULL,
                              step = data$config$activenodes$aggregation$step,
                              aggregation = data$config$activenodes$aggregation$active,
                              x_start = data$config$limits$start,
                              x_end = data$config$limits$end,
                              legend = data$config$activenodes$legend) {
  starvz_check_data(data, tables = list("Atree" = c("ANode")))

  if (is.null(step) || !is.numeric(step)) {
    if (is.null(data$config$global_agg_step)) {
      step <- 100
    } else {
      step <- data$config$global_agg_step
    }
  }

  if (is.null(x_start) || (!is.na(x_start) && !is.numeric(x_start))) {
    x_start <- NA
  }

  if (is.null(x_end) || (!is.na(x_end) && !is.numeric(x_end))) {
    x_end <- NA
  }

  panel <- geom_blank()

  # Prepare data
  data_active_nodes <- data$Application %>%
    filter(grepl("front", .data$Value) | grepl("subtree", .data$Value)) %>%
    select("ResourceId", "Start", "End", "Value", "ANode") %>%
    mutate(
      node_count = case_when(
        .data$Value == "do_subtree" ~ 1,
        .data$Value == "init_front" ~ 1,
        .data$Value == "clean_front" ~ -1,
        TRUE ~ 0
      )
    ) %>%
    # get the node types from Atree
    left_join(
      data$Atree %>% select("ANode", "NodeType"),
      by = "ANode"
    ) %>%
    arrange(.data$Start) %>%
    group_by(.data$NodeType) %>%
    rename(Task = "Value") %>%
    mutate(Value = cumsum(.data$node_count)) %>%
    filter(!is.na(.data$NodeType))

  # use time aggregation or not
  if (aggregation) {
    data_active_nodes <- data_active_nodes %>%
      # need for the var integration
      mutate(Type = NA) %>%
      mutate(ResourceType = .data$NodeType, Node = .data$NodeType) %>%
      ungroup()

    panel <- var_integration_chart(data_active_nodes, ylabel = NA, step = step, facetting = FALSE, base_size = data$config$base_size, expand = data$config$expand)
  } else {
    panel <- data_active_nodes %>%
      ggplot(aes(x = .data$Start, y = .data$Value, color = .data$NodeType)) +
      default_theme(data$config$base_size, data$config$expand) +
      geom_line()
  }

  panel <- panel +
    ylab("Active\nNodes") +
    scale_colour_brewer(palette = "Dark2")

  # configure legend
  if (!legend) {
    panel <- panel + theme(legend.position = "none")
  }

  tzScale <- list(
    coord_cartesian(
      xlim = c(x_start, x_end)
    )
  )
  panel <- panel + tzScale

  return(panel)
}

# Calculate the computational resource utilization by tree node
#' Create the node memory usage plot
#'
#' Use starvz_data to create a line plot of the memory usage in MB of
#' active nodes along the application execution time
#'
#' @param Application starvz application data
#' @param Atree starvz elimination tree data
#' @param step size in milliseconds for the time aggregation step
#' @param group_pruned aggregate computations of the same parent pruned nodes
#' @param performance_metric Performance metric to save in Value1 [Time, GFlops]
resource_utilization_tree_node <- function(Application = NULL,
                                           Atree = NULL,
                                           step = 100,
                                           group_pruned = FALSE,
                                           performance_metric = "Time") {
  # Prepare and filter data
  df_filter <- Application %>%
    filter(
      grepl("qrt", .data$Value) | grepl("subtree", .data$Value)
    ) %>%
    select("ANode", "Start", "End", "JobId", "GFlop") %>%
    unique() %>%
    arrange(.data$Start)

  # Get number of workers for resource utilization
  NWorkers <- Application %>%
    select("ResourceId") %>%
    unique() %>%
    nrow()

  # Group pruned nodes with same Parent to aggregate their computations
  if (isTRUE(group_pruned)) {
    # When we change ANode, we also need to update it in Atree, but this will modify other plots as well
    groupPruned <- Atree %>%
      mutate(NewANode = case_when(.data$NodeType == "Pruned" ~ paste0(.data$Parent, "Pruned"), TRUE ~ .data$ANode)) %>%
      select("ANode", "NodeType", "NewANode")

    df_filter <- df_filter %>%
      left_join(
        groupPruned %>%
          select("ANode", "NodeType", "NewANode"),
        by = "ANode"
      ) %>%
      mutate(originalAnode = .data$ANode, ANode = .data$NewANode)
  }

  if (tolower(performance_metric) == "time") {
    # Compute the node parallelism
    data_node_parallelism <- df_filter %>%
      gather(.data$Start, .data$End, key = "Event", value = "Time") %>%
      arrange(.data$Time) %>%
      group_by(.data$ANode) %>%
      mutate(Value = ifelse(.data$Event == "Start", 1, -1)) %>%
      mutate(nodeParallelism = cumsum(.data$Value)) %>%
      ungroup()

    # Do the time aggregation of resource utilization by ANode
    data_node_plot <- data_node_parallelism %>%
      select("ANode", "Time", "nodeParallelism") %>%
      arrange(.data$Time) %>%
      mutate(End = lead(.data$Time)) %>%
      mutate(Duration = .data$End - .data$Time) %>%
      rename(Start = "Time", Value = "nodeParallelism") %>%
      na.omit() %>%
      group_by(.data$ANode) %>%
      do(remyTimeIntegrationPrepNoDivision(., myStep = step)) %>%
      # This give us the total worker usage grouped by ANode in a time slice
      mutate(Value1 = .data$Value / (step * NWorkers) * 100) %>%
      ungroup()
  } else if (tolower(performance_metric) == "gflops") {
    # get the slices and the task data
    data_node_plot <- getSlices(Application, step = step) %>%
      tibble(Slice = .) %>%
      group_by(.data$Slice) %>%
      mutate(nest(Application %>%
        filter(grepl("qrt", .data$Value) | grepl("subtree", .data$Value)) %>%
        select("Start", "End", "ANode", "Duration", "GFlop"), data = everything())) %>%
      # filter task by slice and calculate GFlops contribution to that slice
      mutate(SliceData = map2(.data$data, .data$Slice, function(x, y) {
        SliceStart <- .data$Slice
        SliceEnd <- .data$Slice + step
        x %>%
          filter((.data$End >= SliceStart & .data$End <= SliceEnd) |
            (.data$Start >= SliceStart & .data$Start <= SliceEnd) |
            (.data$Start <= SliceStart & .data$End >= SliceEnd)) %>%
          mutate(GFlopMultiplier = case_when(
            # 1 - task is completely inside the slice:                 | s---e  |
            .data$Start >= SliceStart & .data$End <= SliceEnd ~ 1,
            # 2 - task start before the slice and ends inside it:  s---|----e   |
            .data$Start <= SliceStart & .data$End <= SliceEnd ~ 1 - (SliceStart - .data$Start) / .data$Duration,
            # 3 - task start in the slice and ends after it:           |  s-----|--e
            .data$Start >= SliceStart & .data$End >= SliceEnd ~ 1 - (.data$End - SliceEnd) / .data$Duration,
            # 4 - task starts before the slice and ends afeter it: s---|--------|--e
            .data$Start <= SliceStart & .data$End >= SliceEnd ~ 1 - ((.data$End - SliceEnd) + (SliceStart - .data$Start)) / .data$Duration
          )) %>%
          mutate(SliceGFlop = .data$GFlop * .data$GFlopMultiplier) %>%
          group_by(.data$ANode) %>%
          summarize(GFlop = sum(.data$GFlop), SliceGFlop = sum(.data$SliceGFlop), .groups = "keep")
      })) %>%
      select(-"data") %>%
      unnest(cols = c("SliceData")) %>%
      mutate(Value1 = .data$SliceGFlop) %>%
      ungroup()
  }

  # Restore original values of ANode for the Pruned nodes
  if (isTRUE(group_pruned)) {
    data_node_plot <- data_node_plot %>%
      left_join(df_filter %>% select("ANode", "originalAnode"),
        by = "ANode"
      ) %>%
      mutate(ANode = .data$originalAnode) %>%
      select(-"originalAnode")
  }

  return(data_node_plot)
}

# Calculate the computational resource utilization by tree depth
resource_utilization_tree_depth <- function(Application = NULL, Atree = NULL, step = 100) {
  # Prepare and filter data
  df_filter <- Application %>%
    filter(grepl("qrt", .data$Value) | grepl("do_subtree", .data$Value)) %>%
    select("ANode", "Start", "End", "JobId") %>%
    arrange(.data$Start)

  # Get number of workers for resource utilization
  NWorkers <- Application %>%
    filter(grepl("CPU", .data$ResourceType) | grepl("CUDA", .data$ResourceType)) %>%
    select("ResourceId") %>%
    unique() %>%
    nrow()

  # Compute the node parallelism
  df_node_parallelism <- df_filter %>%
    select("ANode", "Start", "JobId") %>%
    mutate(Event = "Start") %>%
    rename(Time = "Start") %>%
    bind_rows(df_filter %>%
      select("ANode", "End", "JobId") %>%
      mutate(Event = "End") %>%
      rename(Time = "End")) %>%
    arrange(.data$Time) %>%
    group_by(.data$ANode) %>%
    mutate(Value = ifelse(.data$Event == "Start", 1, -1)) %>%
    mutate(nodeParallelism = cumsum(.data$Value)) %>%
    ungroup()

  # Integrate resource utilization by ANode
  df_node_plot <- df_node_parallelism %>%
    select("ANode", "Time", "nodeParallelism") %>%
    arrange(.data$Time) %>%
    mutate(End = lead(.data$Time)) %>%
    mutate(Duration = .data$End - .data$Time) %>%
    rename(Start = "Time", Value = "nodeParallelism") %>%
    na.omit() %>%
    group_by(.data$ANode) %>%
    do(remyTimeIntegrationPrepNoDivision(., myStep = step)) %>%
    # This give us the total worker usage grouped by ANode in a time slice
    mutate(Value1 = (.data$Value / (step * NWorkers)) * 100) %>%
    # usage by slice
    group_by(.data$Slice) %>%
    arrange(.data$Slice) %>%
    mutate(Usage = sum(.data$Value1)) %>%
    # usage by depth
    left_join(Atree %>% select("ANode", "Depth"), by = "ANode") %>%
    group_by(.data$Slice, .data$Depth) %>%
    mutate(Value1 = sum(.data$Value1)) %>%
    ungroup() %>%
    select(-"ANode", -"Value") %>%
    unique()

  # expand all time slices with the possible colors (for geom_ribbon)
  data_depth_plot <- df_node_plot %>%
    expand(.data$Slice, Depth = 1:(df_node_plot$Depth %>% max())) %>%
    left_join(df_node_plot, by = c("Slice", "Depth")) %>%
    group_by(.data$Depth) %>%
    arrange(.data$Depth) %>%
    mutate(Value1 = na.locf(.data$Value1)) %>%
    arrange(.data$Slice, .data$Depth) %>%
    group_by(.data$Slice) %>%
    mutate(Ymin = lag(.data$Value1), Ymin = ifelse(is.na(.data$Ymin), 0, .data$Ymin)) %>%
    mutate(Ymin = cumsum(.data$Ymin), Ymax = .data$Ymin + .data$Value1) %>%
    mutate(Slc = .data$Slice %% step) %>%
    ungroup() %>%
    filter(.data$Slc == 0 | .data$Slice == max(.data$Slice))

  return(data_depth_plot)
}

# Add anomalies representation in the tree structure
atree_geom_anomalies <- function(data) {
  anomalies_points <- data$Application %>%
    select(-"Position") %>%
    left_join(
      data$Atree %>%
        select(
          "ANode",
          "Position"
        ),
      by = "ANode"
    ) %>%
    filter(.data$Outlier) %>%
    mutate(Height = 1)

  list(
    geom_point(
      data = anomalies_points,
      aes(
        x = .data$Start,
        y = .data$Position + .data$Height / 2,
        color = .data$Value,
      ),
      size = 0.5,
      shape = 1
    ),
    scale_color_manual(values = c(
      "geqrt" = "#FF7F00", "gemqrt" = "#377EB8", "tpqrt" = "#F781BF", "tpmqrt" = "#A65628",
      "lapack_geqrt" = "#FF7F00", "lapack_gemqrt" = "#377EB8", "lapack_tpqrt" = "#F781BF", "lapack_tpmqrt" = "#A65628"
    )),
    scale_shape_manual(values = c("19"))
  )
}


# Define a geom rect receiving the data and defining the fill color as the resource usage
atree_geom_rect_gradient <- function(data, yminOffset, ymaxOffset) {
  list(
    geom_rect(
      data = data,
      aes(
        fill = .data$NodeUsage,
        xmin = .data$Start,
        xmax = .data$End,
        ymin = .data$Position + yminOffset,
        ymax = .data$Position + ymaxOffset,
      )
    )
  )
}

# Define a geom rect receiving the data, color, and height of the geom_rect
atree_geom_rect <- function(data, color, yminOffset, ymaxOffset, alpha) {
  list(
    geom_rect(
      data = data,
      aes(
        xmin = .data$Start,
        xmax = .data$End,
        ymin = .data$Position + yminOffset,
        ymax = .data$Position + ymaxOffset
      ),
      fill = color,
      alpha = alpha
    )
  )
}

# remyTimeIntegrationPrep without dividing the Value column by the time slice
remyTimeIntegrationPrepNoDivision <- function(dfv = NULL, myStep = 100) {
  if (is.null(dfv)) {
    return(NULL)
  }
  if (nrow(dfv) == 0) {
    return(NULL)
  }
  mySlices <- getSlices(dfv, step = myStep)
  tibble(Slice = mySlices, Value = c(remyTimeIntegration(dfv, slices = mySlices), 0))
}

# Set of functions to define the color of the nodes in the resource utilization plot
get_min_color <- function(node) {
  min_color <- 0
  while (1) {
    if (min_color %in% pkg.env$active_colors) {
      min_color <- min_color + 1
    } else {
      pkg.env$df_colors <- pkg.env$df_colors %>%
        rbind(tibble(ANode = node, Event = "Start", color = min_color))
      pkg.env$active_colors <- c(pkg.env$active_colors, min_color)
      return(min_color)
    }
  }
}

find_node_color <- function(node) {
  color <- pkg.env$df_colors %>%
    filter(.data$ANode == node & .data$Event == "Start") %>%
    select("color") %>%
    .$color

  return(color)
}

set_color_available <- function(node) {
  color <- find_node_color(node)

  pkg.env$df_colors <- pkg.env$df_colors %>%
    rbind(tibble(ANode = node, Event = "End", color = color))
  pkg.env$active_colors <- pkg.env$active_colors[pkg.env$active_colors != as.integer(color)]

  return(color)
}

define_colors <- function(data) {
  ANode <- data[1]
  Event <- data[2]

  if (Event == "Start") {
    get_min_color(ANode)
  } else {
    set_color_available(ANode)
  }
}

get_tree_utilization <- function(data, step, performance_metric) {
  data_utilization_node <- resource_utilization_tree_node(data$Application,
    data$Atree,
    step = step, group_pruned = TRUE, performance_metric = performance_metric
  ) %>%
    unique() %>%
    filter(.data$Value1 != 0) %>%
    select("ANode", "Slice", "Value1") %>%
    ungroup() %>%
    rename(NodeUsage = "Value1") %>%
    left_join(data$Atree, by = "ANode")

  # Resize for not pruned nodes, first and last computational tasks
  # data_tree_utilization <- data_gflops_node  %>%
  data_tree_utilization <- data_utilization_node %>%
    inner_join(
      data$Application %>%
        filter(grepl("qrt", .data$Value) | grepl("do_sub", .data$Value)) %>%
        select("ANode", "End", "Start") %>%
        group_by(.data$ANode) %>%
        mutate(Start = min(.data$Start), End = max(.data$End)) %>%
        select("ANode", "Start", "End") %>%
        unique(),
      by = "ANode"
    ) %>%
    mutate(Start = ifelse(.data$Start >= .data$Slice, .data$Start, .data$Slice)) %>%
    mutate(End = ifelse(.data$Slice + step >= .data$End, .data$End, .data$Slice + step))

  return(data_tree_utilization)
}

get_tree_flops <- function(data, step) {
  data_utilization_node <- resource_utilization_tree_node(data$Application, data$Atree, step = step, group_pruned = TRUE) %>%
    unique() %>%
    filter(.data$Value != 0) %>%
    select("ANode", "Slice", "Value1") %>%
    ungroup() %>%
    rename(NodeUsage = "Value1") %>%
    left_join(data$Atree, by = "ANode")

  # Resize for not pruned nodes, first and last computational tasks
  data_tree_utilization <- data_utilization_node %>%
    inner_join(
      data$Application %>%
        filter(grepl("qrt", .data$Value) | grepl("do_sub", .data$Value)) %>%
        select("ANode", "End", "Start") %>%
        group_by(.data$ANode) %>%
        mutate(Start = min(.data$Start), End = max(.data$End)) %>%
        select("ANode", "Start", "End") %>%
        unique(),
      by = "ANode"
    ) %>%
    mutate(Start = ifelse(.data$Start >= .data$Slice, .data$Start, .data$Slice)) %>%
    mutate(End = ifelse(.data$Slice + step >= .data$End, .data$End, .data$Slice + step))

  return(data_tree_utilization)
}

#' Combine two atree plots to compare two different executions
#'
#' Use starvz_data Application and Atree to create a plot that shows the
#' total resource utilization, painted by tree node using geom_ribbon. The
#' colors are reused between nodes, not tied to a specific tree node.
#'
#' @param data1 starvz_data with trace data
#' @param data2 starvz_data with trace data
#' @param step size in milliseconds for the time aggregation step
#' @param x_start X-axis start value
#' @param x_end X-axis end value
#' @param performance_metric which metric to represent ["time", "gflops"]
#' @param add_diff_line add the computed gflops difference line
#' @param add_end_line add smaller end time vertical line
#' @return A ggplot object
#' @examples
#' \dontrun{
#' panel_compare_tree(data1, data2, step = 100)
#' }
#' @export
panel_compare_tree <- function(data1 = NULL,
                               data2 = NULL,
                               step = data1$config$utiltreenode$step,
                               x_start = data1$config$limits$start,
                               x_end = data1$config$limits$end,
                               performance_metric = "Time",
                               add_diff_line = FALSE,
                               add_end_line = FALSE) {
  starvz_check_data(data1, tables = list("Atree" = c("ANode")))
  starvz_check_data(data2, tables = list("Atree" = c("ANode")))

  if (is.null(step) || !is.numeric(step)) {
    if (is.null(data1$config$global_agg_step)) {
      step <- 100
    } else {
      step <- data1$config$global_agg_step
    }
  }

  if (is.null(x_start) || (!is.na(x_start) && !is.numeric(x_start))) {
    x_start <- NA
  }

  if (is.null(x_end) || (!is.na(x_end) && !is.numeric(x_end))) {
    x_end <- NA
  }

  # check wich one is the slower execution
  end1 <- data1$Application %>%
    pull(.data$End) %>%
    max()
  end2 <- data2$Application %>%
    pull(.data$End) %>%
    max()
  if (end1 >= end2) {
    slower <- data1
    faster <- data2
    end_cut <- end2
    x_end <- end1
  } else {
    faster <- data1
    slower <- data2
    end_cut <- end1
    x_end <- end2
  }

  data_tree_utilization1 <- get_tree_utilization(faster, step, performance_metric)
  data_tree_utilization2 <- get_tree_utilization(slower, step, performance_metric)

  # Calculate the diff
  data_diff <- data_tree_utilization1 %>%
    mutate(Execution = "NodeUsage.faster") %>%
    bind_rows(data_tree_utilization2 %>%
      mutate(Execution = "NodeUsage.slower")) %>%
    select(-"Start", -"End") %>%
    spread(.data$Execution, .data$NodeUsage) %>%
    mutate(
      NodeUsage.faster = ifelse(is.na(.data$NodeUsage.faster), 0, .data$NodeUsage.faster),
      NodeUsage.slower = ifelse(is.na(.data$NodeUsage.slower), 0, .data$NodeUsage.slower)
    ) %>%
    # check when we have ocncurrent or exclusive execution
    mutate(boxColor = case_when(
      .data$NodeUsage.faster != 0 & .data$NodeUsage.slower != 0 ~ "Concurrent execution",
      .data$NodeUsage.faster != 0 & .data$NodeUsage.slower == 0 ~ "Exclusive faster",
      .data$NodeUsage.faster == 0 & .data$NodeUsage.slower != 0 ~ "Exclusive slower"
    )) %>%
    # ifelse(.data$NodeUsage.faster == 0 | .data$NodeUsage.slower == 0, "Exclusive execution", "Concurrent execution")) %>%
    mutate(NodeUsage = .data$NodeUsage.faster - .data$NodeUsage.slower)

  # add cumulative performance metric line
  if (add_diff_line) {
    lineplot <- panel_gflops_diff(
      data_tree_utilization1, data_tree_utilization2,
      slower$config$base_size, slower$config$expand, performance_metric
    )
  }

  atreeplot <- panel_atree_structure(slower) +
    default_theme(slower$config$base_size, slower$config$expand) +
    ylab("Elimination Tree\n[Submission Order]")

  atreeplot <- atreeplot +
    geom_rect(
      data = data_diff,
      aes(
        fill = .data$NodeUsage,
        xmin = .data$Slice,
        xmax = .data$Slice + step,
        ymin = .data$Position,
        ymax = .data$Position + .data$Height,
        color = .data$boxColor,
        linetype = .data$boxColor
      ),
      # color = "#807d7c",
      size = 0.5
    ) +
    scale_linetype_manual(
      breaks = c("Concurrent execution", "Exclusive faster", "Exclusive slower"),
      values = c("blank", "solid", "solid")
    ) +
    scale_color_manual(
      breaks = c("Concurrent execution", "Exclusive faster", "Exclusive slower"),
      values = c("black", "blue", "red")
    ) +
    guides(color = FALSE, linetype = FALSE)

  myPalette <- colorRampPalette(c(rev(brewer.pal(9, "Reds")[2:9]), brewer.pal(9, "Greens")[2:9]))

  if (tolower(performance_metric) == "time") {
    palette_colors <- scale_fill_gradientn(colours = myPalette(20), limits = c(-100, 100))
    atreeplot <- atreeplot + palette_colors
  } else if (tolower(performance_metric) == "gflops") {
    minGFlop <- round(min(data_diff$NodeUsage), 1)
    maxGFlop <- round(max(data_diff$NodeUsage), 1)
    atreeplot <- atreeplot + scale_fill_gradient2(
      name = paste0("GFlops difference per time slice (", step, "ms)"),
      low = "red", mid = "grey", high = "blue",
      limits = c(minGFlop, maxGFlop),
      midpoint = 0,
      breaks = c(minGFlop, 0, maxGFlop)
    ) +
      theme(legend.title = element_text(size = rel(0.8))) +
      guides(fill = guide_colorbar(barwidth = 15, barheight = 1, nbin = 30, title.position = "right", title.vjust = 1), linetype = "none")
    # palette_colors <- scale_fill_gradientn(colours = myPalette(20))
  }

  tzScale <- list(
    coord_cartesian(
      xlim = c(x_start, x_end)
    )
  )
  atreeplot <- atreeplot + tzScale

  # add vetical computation ending line for faster case
  if (add_end_line) {
    atreeplot <- atreeplot + geom_vline(aes(xintercept = c(end_cut)))

    if (add_diff_line) {
      lineplot <- lineplot + geom_vline(aes(xintercept = c(end_cut)))
      lineplot <- lineplot + tzScale
    }
  }
  # add the line to the plot
  if (add_diff_line) {
    atreeplot <- (atreeplot + theme(axis.title.x = element_blank(), axis.text.x = element_blank())) / lineplot + patchwork::plot_layout(heights = c(1, 0.25))
  }

  return(atreeplot)
}

# Represent the total computed GFlops difference over time
panel_gflops_diff <- function(data_tree_utilization1, data_tree_utilization2, base_size, expand, performance_metric) {
  data_diff_line <- data_tree_utilization1 %>%
    mutate(Execution = "NodeUsage.x") %>%
    bind_rows(data_tree_utilization2 %>%
      mutate(Execution = "NodeUsage.y")) %>%
    spread(.data$Execution, .data$NodeUsage) %>%
    mutate(
      NodeUsage.x = ifelse(is.na(.data$NodeUsage.x), 0, .data$NodeUsage.x),
      NodeUsage.y = ifelse(is.na(.data$NodeUsage.y), 0, .data$NodeUsage.y)
    ) %>%
    group_by(.data$Slice, .groups = "keep") %>%
    summarize(NodeUsage.x = sum(.data$NodeUsage.x), NodeUsage.y = sum(.data$NodeUsage.y)) %>%
    ungroup() %>%
    mutate(Usage.x = cumsum(.data$NodeUsage.x), Usage.y = cumsum(.data$NodeUsage.y)) %>%
    mutate(diff = .data$Usage.x - .data$Usage.y) %>%
    mutate(signal = ifelse(.data$diff >= 0, "positive", "negative"))

  lineplot <- data_diff_line %>%
    arrange(.data$signal) %>%
    ggplot(aes(x = .data$Slice, y = .data$diff, color = .data$signal)) +
    geom_line(aes(group = .data$.groups)) +
    scale_color_manual(
      breaks = c("positive", "negative"),
      values = c("blue", "red")
    ) +
    labs(y = paste0(performance_metric, "\n difference")) +
    default_theme(base_size, expand) +
    theme(legend.position = "none")

  return(lineplot)
}

Try the starvz package in your browser

Any scripts or data that you put into this service are public.

starvz documentation built on June 19, 2025, 1:08 a.m.