R/join_edge_attrs.R

Defines functions join_edge_attrs

Documented in join_edge_attrs

#' Join new edge attribute values using a data frame
#'
#' @description
#'
#' Join new edge attribute values in a left join using a data frame. The use of
#' a left join in this function allows for no possibility that edges in the
#' graph might be removed after the join.
#'
#' @inheritParams render_graph
#' @param df The data frame to use for joining.
#' @param by_graph Optional specification of the column in the graph's internal
#'   edge data frame for the left join. If both `by_graph` and `by_df` are not
#'   provided, then a natural join will occur if there are columns in the
#'   graph's edf and in `df` with identical names.
#' @param by_df Optional specification of the column in `df` for the left join.
#'   If both `by_graph` and `by_df` are not provided, then a natural join will
#'   occur if there are columns in the graph's edf and in `df` with identical
#'   names.
#'
#' @return A graph object of class `dgr_graph`.
#'
#' @examples
#' # Set a seed
#' suppressWarnings(RNGversion("3.5.0"))
#' set.seed(23)
#'
#' # Create a simple graph
#' graph <-
#'   create_graph() %>%
#'   add_n_nodes(n = 5) %>%
#'   add_edges_w_string(
#'     edges = "1->2 1->3 2->4 2->5 3->5")
#'
#' # Create a data frame with node ID values
#' # representing the graph edges (with `from` and `to`
#' # columns), and, a set of numeric values
#' df <-
#'   data.frame(
#'     from = c(1, 1, 2, 2, 3),
#'     to = c(2, 3, 4, 5, 5),
#'     values = rnorm(5, 5))
#'
#' # Join the values in the data frame to the
#' # graph's edges; this works as a left join using
#' # identically-named columns in the graph and the df
#' # (in this case `from` and `to` are common to both)
#' graph <-
#'   graph %>%
#'   join_edge_attrs(
#'     df = df)
#'
#' # Get the graph's internal edf to show that the
#' # join has been made
#' graph %>% get_edge_df()
#' @family edge creation and removal
#' @export
join_edge_attrs <- function(
    graph,
    df,
    by_graph = NULL,
    by_df = NULL
) {

  # Get the time of function start
  time_function_start <- Sys.time()

  # Validation: Graph object is valid
  check_graph_valid(graph)

  if (is.null(by_graph) && !is.null(by_df)) {

    cli::cli_abort(
      "Both column specifications must be provided.")
  }

  if (!is.null(by_graph) && is.null(by_df)) {

    cli::cli_abort(
      "Both column specifications must be provided.")
  }

  # Extract the graph's edf
  edges <- get_edge_df(graph)

  # Get column names from the graph's edf
  column_names <- colnames(edges)

  if (is.null(by_graph) && is.null(by_df)) {

    # Perform a left join on the `edges` data frame
    edges <- merge(edges, df, all.x = TRUE)
  }

  if (!is.null(by_graph) && !is.null(by_df)) {

    # Perform a left join on the `edges` data frame
    edges <-
      merge(
        edges, df,
        all.x = TRUE,
        by.x = by_graph,
        by.y = by_df)
  }

  # Get new column names in the revised edf
  new_col_names <-
    base::setdiff(colnames(edges), column_names)

  # Sort the columns in `edges`
  edges <-
    edges %>% dplyr::relocate("id", "from", "to", "rel")

  # Modify the graph object
  graph$edges_df <- edges

  # Get the name of the function
  fcn_name <- get_calling_fcn()

  # Update the `graph_log` df with an action
  graph$graph_log <-
    add_action_to_log(
      graph_log = graph$graph_log,
      version_id = nrow(graph$graph_log) + 1L,
      function_used = fcn_name,
      time_modified = time_function_start,
      duration = graph_function_duration(time_function_start),
      nodes = nrow(graph$nodes_df),
      edges = nrow(graph$edges_df))

  # Write graph backup if the option is set
  if (graph$graph_info$write_backups) {
    save_graph_as_rds(graph = graph)
  }

  graph
}
rich-iannone/DiagrammeR documentation built on Feb. 5, 2024, 8 a.m.