#' #' Layout for a \code{qgraph} network representation of an energy conversion chain
#' #'
#' #' Industries and products are interleaved from left to right,
#' #' starting with the first industry, followed by the first product.
#' #' Industries and products alternate until all products are exhausted.
#' #' Finally, the last industry (which should be final demand) is placed at the far right.
#' #' Storage industries are arranged across the top of the network.
#' #'
#' #' @param Industries a data frame consisting of columns named
#' #' \code{industry_colname}, \code{stage_colname}, and \code{group_colname}.
#' #' Entries in \code{industry_colname} must be names of industries.
#' #' Entries in \code{stage_colname} must be names of left-to-right stages in the ECC.
#' #' Industries (or sectors) in final demand must be included in the \code{Industries}
#' #' data frame.
#' #' Industries that provide "storage" must also be included in the \code{Industries}
#' #' data frame (such as Stock changes, International aviation and marine bunkers, and
#' #' Statistical differences), their \code{stage_colname} value being \code{storage_stagename}.
#' #' All industries with \code{storage_stagename} are laid out
#' #' along the top of the network.
#' #' The left-to-right order of stages in the network layout generated by this function
#' #' is taken from the order of appearance of stages in the \code{stage_colname} column.
#' #' The left-to-right order of storage industries is given by
#' #' the appearance order of the industries in \code{industry_colname}.
#' #' Entries in \code{group_colname} give groups of industries or products to keep together
#' #' at each stage. The top-to-bottom order of groups is taken from the appearance order
#' #' in \code{group_colname}.
#' #' @param Products a data frame consisting of same columns as \code{Industries},
#' #' except that \code{product_colname} takes the place of \code{industry_colname}.
#' #' Note that the number of stages in \code{Products} must be one less than than
#' #' number of stages in \code{Industries}, not counting industries in stage
#' #' \code{storage_stagename}.
#' #' @param industry_colname the name of the column in \code{Industries} containing
#' #' names of industries (a string).
#' #' Default is "\code{Industry}".
#' #' @param product_colname the name of the column in \code{Products} containing
#' #' names of products (a string).
#' #' Default is "\code{Product}".
#' #' @param stage_colname the name of the columns in \code{Industries} and \code{Products}
#' #' containing names of stages (a string).
#' #' The left-to-right order of stages in the network layout
#' #' is determined by the top-to-bottom
#' #' order in which stage names appear in the
#' #' \code{Industries} and \code{Products} data frames.
#' #' Default is "\code{Stage}".
#' #' @param group_colname the name of the column in \code{Industries} and \code{Products}
#' #' containing industries and products that should be grouped together vertically
#' #' at a stage (a string).
#' #' The top-to-bottom order of groups at a stage is given by the top-to-bottom
#' #' order of appearance in \code{group_colname}.
#' #' Default is "\code{Group}".
#' #' \code{group_colname} is optional in \code{Industries} and \code{Products}.
#' #' If \code{group_colname} is missing, a column named "\code{group_colname}"
#' #' will be created and filled with "\code{group_colname}",
#' #' thereby creating a single group for all industries or products.
#' #' @param storage_stagename the name of the stage in \code{stage_colname} of \code{Industries}
#' #' that identifies a "storage" industry (a string).
#' #' Default is "\code{Storage}".
#' #' Typical storage industries are bunkers, stock changes, and statistical differences.
#' #' This layout puts storage industries across the top of the ECC graph.
#' #' @param g an optional \code{qgraph} object from which the order of node names is extracted,
#' #' if present.
#' #' It is assumed that the layout calculated by this function will be applied to \code{g},
#' #' so the row order of the outgoing layout is set to the row order expected by \code{g}.
#' #' Furthermore, additional error checking is performed:
#' #' if the names of nodes in \code{Industries} and \code{Products}
#' #' don't exactly match the names of nodes that \code{g} expects,
#' #' a data frame of mismatching names is printed and execution is halted.
#' #' An easy way to obtain a \code{qgraph} object for the \code{g} argument is to call
#' #' \code{g <- qgraph(somedata, DoNotPlot = TRUE)}.
#' #' @param x_colname the name of the output column containing x coordinates for each node
#' #' (a string).
#' #' Default is "\code{x}".
#' #' @param y_colname the name of the output column containing y coordinates for each node
#' #' (a string).
#' #' Default is "\code{y}".
#' #'
#' #' @return a matrix with two columns.
#' #' The first column contains x coordinates for the nodes and is named \code{x_colname}.
#' #' The second column contains y coordinates for the nodes and is named \code{y_colname}.
#' #' Node names are the row names for the outgoing matrix.
#' #'
#' #' @export
#' ecc_layout <- function(Industries,
#' Products,
#' industry_colname = "Industry",
#' product_colname = "Product",
#' stage_colname = "Stage",
#' group_colname = "Group",
#' storage_stagename = "Storage",
#' g,
#' # Output columns
#' x_colname = "x",
#' y_colname = "y"){
#' # Set a name for the node name columsn that will be used throughout this function.
#' node_name_colname <- ".Node_name"
#' # First step is to eliminate factors in the incoming data frames
#' Industries <- Industries %>% dplyr::mutate_if(is.factor, as.character)
#' Products <- Products %>% dplyr::mutate_if(is.factor, as.character)
#' # The group_colname is optional.
#' # If the column is not present, add and fill with a single group (group_colname).
#' if (!(group_colname %in% names(Industries))) {
#' Industries <- Industries %>%
#' dplyr::mutate(
#' !!as.name(group_colname) := group_colname
#' )
#' }
#' if (!(group_colname %in% names(Products))) {
#' Products <- Products %>%
#' dplyr::mutate(
#' !!as.name(group_colname) := group_colname
#' )
#' }
#' # Extract storage industries from the Industries data frame.
#' Storage <- Industries %>%
#' dplyr::filter((!!as.name(stage_colname)) == storage_stagename)
#' Industries_less_Storage <- Industries %>%
#' dplyr::filter((!!as.name(stage_colname)) != storage_stagename)
#' # Ensure that the number of industry stages (less storage)
#' # is one more than the number of product stages.
#' N_industry_stages <- Industries_less_Storage %>% dplyr::select(!!as.name(stage_colname)) %>% unique() %>% nrow()
#' N_product_stages <- Products %>% dplyr::select(!!as.name(stage_colname)) %>% unique() %>% nrow()
#' # if (N_industry_stages - N_product_stages != 1) {
#' # stop(paste0("N_industry_stages = ", N_industry_stages,
#' # ". N_product_stages = ", N_product_stages, ". ",
#' # "There should be one more industry stages than product stages. "),
#' # call. = FALSE)
#' # }
#' assertthat::assert_that(N_industry_stages - N_product_stages == 1,
#' msg = paste0("N_industry_stages = ", N_industry_stages,
#' ". N_product_stages = ", N_product_stages, ". ",
#' "There should be one more industry stages than product stages. "))
#' # Set groups for the Group variable based on order of appearance.
#' # These groups will be used later for ordering the y coordinates of nodes.
#' grps <- rbind(Industries_less_Storage %>% dplyr::select(!!as.name(group_colname)),
#' Products %>% dplyr::select(!!as.name(group_colname))) %>%
#' dplyr::filter(!is.na(!!as.name(group_colname))) %>% unique()
#' grps <- magrittr::set_rownames(grps, 1:nrow(grps))
#' # Set levels for groups in order of their appearance.
#' # These levels will be used later for calculating the y coordinates for the nodes.
#' Industries <- Industries %>%
#' dplyr::mutate(
#' !!as.name(group_colname) := factor(!!as.name(group_colname), levels = grps[[group_colname]])
#' )
#' Industries_less_Storage <- Industries_less_Storage %>%
#' dplyr::mutate(
#' !!as.name(group_colname) := factor(!!as.name(group_colname), levels = grps[[group_colname]])
#' )
#' Products <- Products %>%
#' dplyr::mutate(
#' !!as.name(group_colname) := factor(!!as.name(group_colname), levels = grps[[group_colname]])
#' )
#' Storage <- Storage %>%
#' dplyr::mutate(
#' !!as.name(group_colname) := factor(!!as.name(group_colname), levels = grps[[group_colname]])
#' )
#' # Make data frames of stage numbers.
#' # Left-to-right order across the network is taken from the order of appearance
#' # in the respective data frames.
#' # This process is simpler for industries and products
#' # but takes additional calculations for storage industries (later).
#' i_stage_colname <- paste0("i_", stage_colname)
#' Industry_stage_order <- data.frame(temp = Industries_less_Storage %>%
#' dplyr::select(!!as.name(stage_colname)) %>%
#' unique()) %>%
#' tibble::rownames_to_column(var = i_stage_colname) %>%
#' dplyr::mutate(
#' !!as.name(i_stage_colname) := as.numeric(!!as.name(i_stage_colname)),
#' !!as.name(i_stage_colname) := 2 * (!!as.name(i_stage_colname)) - 1
#' )
#' Product_stage_order <- data.frame(temp = Products %>%
#' dplyr::select(!!as.name(stage_colname)) %>%
#' unique()) %>%
#' tibble::rownames_to_column(var = i_stage_colname) %>%
#' dplyr::mutate(
#' !!as.name(i_stage_colname) := as.numeric(!!as.name(i_stage_colname)),
#' !!as.name(i_stage_colname) := 2 * (!!as.name(i_stage_colname))
#' )
#' # rbind these *_stage_order data frames
#' Stage_coords <- rbind(Industry_stage_order %>%
#' dplyr::rename(!!as.name(x_colname) := !!as.name(i_stage_colname)),
#' Product_stage_order %>%
#' dplyr::rename(!!as.name(x_colname) := !!as.name(i_stage_colname))) %>%
#' dplyr::arrange(!!as.name(x_colname))
#'
#' # Join Stage_coords to a list of industries and products to create the list of nodes.
#' Temp <- rbind(
#' Industries_less_Storage %>% dplyr::rename(!!as.name(node_name_colname) := !!as.name(industry_colname)),
#' Products %>% dplyr::rename(!!as.name(node_name_colname) := !!as.name(product_colname))
#' ) %>%
#' dplyr::left_join(Stage_coords, by = stage_colname)
#' # Split Temp in half by rows that have NA Group and those that don't.
#' # Sort the nodes where Groups are specified.
#' Node_coords <- dplyr::bind_rows(
#' Temp %>% dplyr::filter(is.na(!!as.name(group_colname))),
#' Temp %>% dplyr::filter(!is.na(!!as.name(group_colname))) %>%
#' dplyr::group_by(!!as.name(x_colname), !!as.name(group_colname)) %>%
#' # Put nodes in correct order.
#' # First group on stage (x coordinate) followed by
#' # the Group within each stage.
#' dplyr::arrange(!!as.name(x_colname), !!as.name(group_colname)))
#'
#' # At this point, all x coordinates have been decided and are in the Node_coords data frame.
#' # Furthermore, the Node_coords data frame is in the correct order for y coordinates.
#' # So work on y coordinates.
#' # Figure out the number of nodes in each stage.
#' N_nodes <- Node_coords %>%
#' dplyr::group_by(!!as.name(stage_colname)) %>%
#' dplyr::count()
#' # The count function automatically creates a column named "n".
#' # We need this name later, so declare it here.
#' n_colname <- "n"
#' y_max <- max(N_nodes[[n_colname]])
#' # y_center is the y coordinate that gives a horizontal line
#' # that serves as the vertical center of the network layout
#' # (not including storage nodes).
#' y_center <- (y_max + 1) / 2
#'
#' # Calculate y coordinates
#' i_group_colname <- ".i_group"
#' Node_coords <- Node_coords %>%
#' # Re-group according to x_colname only,
#' # thereby ensuring that we apply y coords to each stage independently.
#' dplyr::group_by(!!as.name(x_colname)) %>%
#' dplyr::mutate(
#' # Add a column for the row number (i) within each group (_group).
#' !!as.name(i_group_colname) := dplyr::row_number()
#' ) %>%
#' # Add a column (n) that gives the number of nodes in each stage
#' dplyr::left_join(N_nodes, by = stage_colname) %>%
#' dplyr::mutate(
#' # Calculate the column of y coordinates
#' # using the index within each group (i_group) and
#' # the total number of nodes in the group (n)
#' !!as.name(y_colname) := (y_max + 1) -
#' (y_center + (!!as.name(i_group_colname)) - ((!!as.name(n_colname)) + 1)/2)
#' ) %>%
#' # select only relevant columns
#' dplyr::select(!!as.name(node_name_colname), !!as.name(x_colname), !!as.name(y_colname))
#'
#' # Figure out where to place Storage nodes in x dimension.
#' # Align storage industries with other industries.
#' # Doing so will avoid vertical flows from products to storage.
#' x_max <- max(Industry_stage_order[[i_stage_colname]], Product_stage_order[[i_stage_colname]])
#' # Calculate the center of the x dimension.
#' x_center <- (x_max + 1) / 2
#' # Find out how many storage industries we have.
#' N_storage <- nrow(Storage)
#' # Find x coordinate of first Storage industry
#' x_first_storage <- x_center - (N_storage - 1) / 2
#' # If we have no Storage industries, we're done and can return Node_coords now.
#' if (nrow(Storage) == 0) {
#' out <- Node_coords
#' } else {
#' # We have some Storage nodes.
#' # Calculate coordinates for storage nodes.
#' Storage_coords <- data.frame(temp = Storage %>%
#' dplyr::select(!!as.name(industry_colname)) %>%
#' unique()) %>%
#' dplyr::mutate(
#' !!as.name(x_colname) := x_first_storage:(x_first_storage - 1 + N_storage)
#' ) %>%
#' dplyr::left_join(Storage, by = industry_colname) %>%
#' dplyr::rename(
#' !!as.name(node_name_colname) := !!as.name(industry_colname)
#' ) %>%
#' dplyr::mutate(
#' # Add the y coordinate
#' !!as.name(y_colname) := y_max + 1
#' ) %>%
#' # Select only columns that we want in output
#' dplyr::select(!!as.name(node_name_colname), !!as.name(x_colname), !!as.name(y_colname))
#' # Finally, rbind Node_coords and Storage_coords and return
#' out <- rbind(as.data.frame(Node_coords), Storage_coords)
#' }
#' if (!missing(g)) {
#' # When using a layout, qgraph respects the order, but not the names,
#' # of nodes in the layout argument.
#' # So, we need to re-order the rows of out to match the order of rows in g.
#' # First, get the node names (in qgraph order) from the qgraph object (g).
#' qgraph_node_names <- g$graphAttributes$Nodes$names
#' if (is.null(names(qgraph_node_names))) {
#' # If the graph is specified by an edge list, g has an unnamed list of full node names
#' node_names_from_graph <- data.frame(qgraph_node_names, stringsAsFactors = FALSE)
#' } else {
#' # If the graph is specified from a matrix, g has a named list of abbreviated node names.
#' # The list itself comprises abbreviated names.
#' # But the names of the items in the list comprise the actual node names.
#' node_names_from_graph <- data.frame(names(qgraph_node_names), stringsAsFactors = FALSE)
#' }
#' # Give the only column the correct name
#' names(node_names_from_graph)[1] <- node_name_colname
#' # By left_join-ing here, we keep the row order of node_names_from_graph
#' # while we add the x and y coordinates for the nodes in this layout.
#' out <- dplyr::left_join(node_names_from_graph, out, by = node_name_colname) %>%
#' # Select only the columns that we want to return
#' dplyr::select(!!as.name(x_colname), !!as.name(y_colname), !!as.name(node_name_colname))
#' # Ensure that we have x and y values for each row.
#' # If any are missing, we didn't have a matching set of node names
#' # among g, Industries, and Products.
#' err <- out %>%
#' dplyr::filter(
#' is.na((!!as.name(x_colname))) |
#' is.na((!!as.name(y_colname)))
#' )
#' # Check for errors.
#' assertthat::assert_that(nrow(err) == 0,
#' msg = paste("Error in ecc_layout().",
#' "Check err data frame.",
#' "Where NA appears, mismatched names among g, Industries, and Products in ecc_layout()."))
#' }
#' # Return a matrix.
#' out %>%
#' # Move the node names to rownames ...
#' tibble::column_to_rownames(node_name_colname) %>%
#' # ... which allows the as.matrix function to create numeric columns for x and y.
#' # This matrix can be used by qgraph for its layout argument.
#' as.matrix()
#' }
#'
#'
#'
#' #' Identify industry stages
#' #'
#' #' Names for industries in an ECC may be comprised of a general industry name
#' #' and the name of specific product,
#' #' such as "\code{Production - Hydro}".
#' #' Stages of an ECC are most easily identified simply by the general industry name
#' #' (e.g., all "\code{Production}" industries are \code{primary} industries),
#' #' but simple matching of specific industry names with general industry names to identify
#' #' industry stages will fail ("\code{Production - Hydro}" does not match "\code{Production}", e.g.).
#' #' This function assigns ECC stages to specific industry names
#' #' using a table of general industry names and associated ECC stages.
#' #' A general industry name matches a specific industry name if the specific industry name
#' #' starts with or is equal to the general industry name.
#' #'
#' #' @param GeneralIndustries a data frame containing at least two columns,
#' #' \code{general_industry_colname} and \code{stage_colname}.
#' #' @param SpecificIndustries a data frame containing at least \code{specific_industry_colname}.
#' #' @param general_industry_colname a string for the name of the column in \code{GeneralIndustries}
#' #' that contains the general names of industries (such as "\code{Production}").
#' #' Default is "\code{Industry}".
#' #' @param stage_colname a string for the name of the column in \code{GeneralIndustries}
#' #' that contains the names of ECC stages (such as "\code{Primary industry}",
#' #' "\code{Primary-to-final industry}", or "\code{Final-to-useful industry}").
#' #' Default is "\code{Stage}".
#' #' @param specific_industry_colname a string for the name
#' #' of the column in \code{SpecificIndustries} that contains the specific names of industries
#' #' (such as "\code{Production - Hydro}").
#' #' Default is "\code{Industry}".
#' #'
#' #' @return a data frame containing \code{specific_industry_colname} and \code{stage_colname},
#' #' wherein the stages associated with general industry names in \code{GeneralIndustries}
#' #' are assigned to the specific industries from \code{SpecificIndustries}
#' #' according to matches between \code{GeneralIndustries$general_industry_colname}
#' #' and \code{SpecificIndustries$specific_industry_colname}.
#' #'
#' #' @export
#' #'
#' #' @examples
#' #' GeneralIndustries <- data.frame(Industry = c("Production", "Plants", "Cars"),
#' #' Stage = c("Primary industry", "p-->f industry",
#' #' "f-->u industry"), stringsAsFactors = FALSE)
#' #' SpecificIndustries <- data.frame(Industry = c("Production - Coal", "Production - Hydro",
#' #' "Plants - Electric", "Plants - refinery",
#' #' "Cars - Electric", "Cars - Petrol"),
#' #' stringsAsFactors = FALSE)
#' #' identify_industry_stages(GeneralIndustries = GeneralIndustries,
#' #' SpecificIndustries = SpecificIndustries)
#' identify_industry_stages <- function(GeneralIndustries,
#' SpecificIndustries,
#' general_industry_colname = "Industry",
#' stage_colname = "Stage",
#' specific_industry_colname = "Industry"){
#' SpecificIndustries %>%
#' dplyr::select(!!as.name(specific_industry_colname)) %>%
#' fuzzyjoin::fuzzy_left_join(GeneralIndustries %>%
#' dplyr::select(!!as.name(general_industry_colname), !!as.name(stage_colname)),
#' by = c(general_industry_colname) %>% magrittr::set_names(specific_industry_colname),
#' match_fun = function(s.ind, g.ind){
#' startsWith(s.ind, g.ind)
#' }
#' ) %>%
#' # Return only the specific industries (column 1) and the stages (column 3).
#' # It would be nice to select these by name instead of position,
#' # but when specific_industry_colname and general_industry_colname are the same,
#' # fuzzy_left_join renames the columns with a ".x" and a ".y" suffix.
#' # Because we can't know ahead of time how the columns will be named,
#' # we select by position.
#' dplyr::select(c(1,3))
#' }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.