R/util.R

Defines functions create0Tcolumns Zij Z_byname

Documented in create0Tcolumns Z_byname Zij

#' Z matrix from \code{X_0} and \code{X_T} matrices
#'
#' The formula for \code{Z} is
#' \deqn{Z_ij = logmean(v_Ti1, v_0i1) * log(X_Tij / X_0ij)}
#' where \code{v} is a column vector formed by row products of \code{X}.
#' The \code{0} and \code{T} subscripts on \code{X} and \code{v} indicate an initial time (\code{0})
#' and a final time (\code{T}).
#' The \code{i} and \code{j} subscripts on \code{Z}, \code{v}, and \code{X} are
#' matrix indices.
#'
#' \code{Z} and \code{X} are category-by-factor matrices, and
#' \code{v} is a category-by-1 column vector.
#'
#' When a category comes and goes through time,
#' the category row can be absent from the \code{X} matrix at one time
#' but present in the \code{X} matrix at an adjacent time.
#' If the missing category row is replaced by \code{0}s,
#' the LMDI algorithm fails due to \code{log(0)} errors.
#' The usual advice is to insert a row consisting not of \code{0}s
#' but rather filled with small numbers (e.g., \code{1e-10}).
#'
#' But the usual advice doesn't represent reality for some LMDI decomposition analyses.
#' For example, the missing row may be caused by only one of the many factors being zero,
#' the other factors being non-zero.
#' To provide greater flexibility, this function provides the \code{fillrow} argument.
#' Callers can supply their own \code{fillrow},
#' a single-row \code{matrix} with column names identical
#' to \code{X_0} and \code{X_T}.
#' If \code{fillrow} is not specified,
#' the usual advice will be followed, and
#' a row vector consisting of very small values (\code{1e-10})
#' for each factor will be inserted into
#' \code{X_0} or \code{X_T}, as appropriate.
#'
#' Note that the \code{\link{lmdi}} function passes its \code{fillrow} argument, if present,
#' to \code{Z_byname}.
#'
#' The nomenclature for this function comes from
#' \href{https://doi.org/10.1016/s0360-5442(98)00016-4}{Ang, Zhang, and Choi (1998)}.
#' This function fully accounts for the degenerate cases
#' found in Table 2, p. 492 of
#' \href{https://doi.org/10.1016/s0360-5442(98)00016-4}{Ang, Zhang, and Choi (1998)}.
#' When \code{0}s are encountered in \code{emptyrow},
#' this function employs the method for dealing with the \code{log(0)} problem suggested by
#' \href{https://doi.org/10.1016/j.enpol.2004.11.010}{Wood and Lenzen (2006)}.
#'
#' @param X_0 an \code{X} matrix for initial time \code{0}
#' @param X_T an \code{X} matrix for final time \code{T}
#' @param fillrow a row vector of type \code{matrix} with column names identical
#'                to \code{X_0} and \code{X_T}. (See details.)
#'
#' @return a \code{Z} matrix
#'
#' @export
#'
Z_byname <- function(X_0, X_T, fillrow = NULL){
  Z_func <- function(X_0, X_T, fillrow = NULL){
    # At this point, X_0 and X_T are single matrices.
    # We need to take control of completing and sorting X_0 and X_T matrices here, because
    # we have a more-complex situation than simply filling the missing rows with 0s.
    if (is.null(fillrow)) {
      # If the caller didn't supply a value for fillrow, we default to the
      # "small value" approximation suggested by
      # B.W. Ang and F.Q. Zhang and Ki-Hong Choi, 1998,
      # Factorizing changes in energy and environmental indicators through decomposition,
      # Energy, Volume 23, Number 6, pp. 489-495.
      fillrow <- matrix(1e-10, nrow = 1, ncol = ncol(X_0),
                         dimnames = list("row", colnames(X_0))) %>%
        matsbyname::setrowtype(matsbyname::rowtype(X_0)) %>% matsbyname::setcoltype(matsbyname::coltype(X_0))
    }
    # Complete the matrices relative to one another, using fillrow.
    X_0_comp <- matsbyname::complete_rows_cols(X_0, X_T, fillrow = fillrow, margin = 1)
    X_T_comp <- matsbyname::complete_rows_cols(X_T, X_0, fillrow = fillrow, margin = 1)
    # Sort the matrices relative to one another.
    X_0_comp_sort <- matsbyname::sort_rows_cols(X_0_comp)
    X_T_comp_sort <- matsbyname::sort_rows_cols(X_T_comp)
    # At this point, X_0_comp_sort and X_T_comp_sort should be the same type of matrices.
    # I.e., they have the same row and column names.
    # And they have the same row and column types.
    # Ensure that this is so!
    stopifnot(matsbyname::samestructure_byname(X_0_comp_sort, X_T_comp_sort))

    # Create an empty Z matrix.
    # The empty Z is filled with default entries (NA).
    Z <- matrix(nrow = nrow(X_0_comp_sort), ncol = ncol(X_0_comp_sort)) %>%
      matsbyname::setrownames_byname(rownames(X_0_comp_sort)) %>% matsbyname::setcolnames_byname(colnames(X_0_comp_sort)) %>%
      matsbyname::setrowtype(matsbyname::rowtype(X_0_comp_sort)) %>% matsbyname::setcoltype(matsbyname::coltype(X_0_comp_sort))
    # Use an old-fashioned for loop to fill all elements of the Z matrix
    for (i in 1:nrow(Z)) {
      for (j in 1:ncol(Z)) {
        Z[i, j] <- Zij(i = i, j = j, X_0 = X_0_comp_sort, X_T = X_T_comp_sort)
      }
    }
    return(Z)
  }

  matsbyname::binaryapply_byname(Z_func, a = X_0, b = X_T,
                     .FUNdots = list(fillrow = fillrow), match_type = "all", .organize = FALSE)
}

#' Calculate element \code{Z_i,j}
#'
#' There are many special cases for calculating the \code{i,j}th term in \code{Z}.
#' These special cases must be handled on a term-by term basis.
#' This function handles all those cases.
#' See \code{\link{Z_byname}} for details.
#' The \code{X} matrices must be same size, must have same row and column names, and
#' must have same row and column types.
#' Note that \code{i} and \code{j} must be positive and less than or equal to
#' \code{nrow(X)} and \code{ncol(X)}, respectively.
#'
#' Arguments \code{i}, \code{j}, \code{X_0}, and \code{X_T} are optional.
#' If they are not specified,
#' arguments \code{v_0i1}, \code{v_Ti1}, \code{X_0ij}, and \code{X_Tij}
#' must be given.
#'
#' @param i optional row index for \code{X_0} and \code{X_T}.
#' @param j optional column index for \code{X_0} and \code{X_T}.
#' @param X_0 optional sub-sector by factor matrix for time 0.
#' @param X_T optional sub-sector by factor matrix for time T.
#' @param v_0i1 the i,1th element of the \code{v_0} column vector. (v_0 is formed from the row products of the \code{X_0} matrix.)
#' @param v_Ti1 the i,1th element of the \code{v_T} column vector. (v_T is formed from the row products of the \code{X_T} matrix.)
#' @param X_0ij the i,jth element of the \code{X_0} matrix
#' @param X_Tij the i,jth element of the \code{X_T} matrix
#'
#' @return the Z value corresponding to the \code{v_0i1}, \code{v_Ti1}, \code{X_0ij}, and \code{X_Tij} values
#'
#' @export
Zij <- function(i = NULL, j = NULL, X_0 = NULL, X_T = NULL,
                v_0i1 = matsbyname::rowprods_byname(X_0)[i, 1],
                v_Ti1 = matsbyname::rowprods_byname(X_T)[i, 1],
                X_0ij = X_0[i, j],
                X_Tij = X_T[i, j]){

  # Check the conditions, found in Table 2, p. 492 of
  # B.W. Ang and F.Q. Zhang and Ki-Hong Choi, 1998,
  # Factorizing changes in energy and environmental indicators through decomposition,
  # Energy, Volume 23, Number 6, pp. 489-495.
  if (v_0i1 == 0 & v_Ti1 > 0 & X_0ij == 0 & X_Tij > 0) {
    # Case 1
    return(v_Ti1)

  } else if (v_0i1 > 0 & v_Ti1 == 0 & X_0ij > 0 & X_Tij == 0) {
    # Case 2
    return(-v_0i1)

  } else if (v_0i1 == 0 & v_Ti1 > 0 & X_0ij > 0 & X_Tij > 0) {
    # Case 3
    return(0)

  } else if (v_0i1 > 0 & v_Ti1 == 0 & X_0ij > 0 & X_Tij > 0) {
    # Case 4
    return(0)

  } else if (v_0i1 == 0 & v_Ti1 == 0 & X_0ij > 0 & X_Tij > 0) {
    # Case 5
    return(0)

  } else if (v_0i1 == 0 & v_Ti1 == 0 & X_0ij == 0 & X_Tij == 0) {
    # Case 6
    return(0)

  } else if (v_0i1 == 0 & v_Ti1 == 0 & X_0ij > 0 & X_Tij == 0) {
    # Case 7
    return(0)

  } else if (v_0i1 == 0 & v_Ti1 == 0 & X_0ij == 0 & X_Tij > 0) {
    # Case 8
    return(0)

  } else if (v_0i1 > 0 & v_Ti1 > 0 & X_0ij > 0 & X_Tij > 0) {
    # This is the non-degenerate case
    return(matsbyname::logmean(v_Ti1, v_0i1) * log(X_Tij / X_0ij))
  }
  # We should never get here.
  stop("Unknown conditions for v_0i1, v_Ti1, X_0ij, and X_Tij in Zij")
}

#' Make columns for time 0 and time T
#'
#' @param XvV a data frame containing \code{X}, \code{v}, and \code{V} columns.
#' @param time_colname the name of the time column (a string)
#' @param X_colname the name of the \code{X} column (default is "\code{X}")
#' @param v_colname the name of the \code{v} column (default is "\code{v}")
#' @param V_colname the name of the \code{V} column (default is "\code{V}")
#' @param zero_suffix suffix for "\code{0}" variables (default is "\code{_0}")
#' @param T_suffix suffix for "\code{T}" variables (default is "\code{_T}")
#'
#' @importFrom dplyr do
#' @importFrom dplyr group_vars
#' @importFrom dplyr rename
#' @importFrom dplyr select
#' @importFrom rlang .data
#' @importFrom utils head
#' @importFrom utils tail
#'
#' @return a data frame containing metadata (\code{time_colname} and grouping variables)
#'         and columns for X_0, v_0, V_0, X_T, v_T, and V_T.
create0Tcolumns <- function(XvV,
                            time_colname,
                            X_colname = "X", v_colname = "v", V_colname = "V",
                            zero_suffix = "_0",
                            T_suffix = "_T"){

  # Eliminate the NOTE in R CMD chk about no visible global binding for "."
  . <- NULL

  # Establish names for new columns.
  X0_colname <- paste0(X_colname, zero_suffix)
  v0_colname <- paste0(v_colname, zero_suffix)
  V0_colname <- paste0(V_colname, zero_suffix)
  XT_colname <- paste0(X_colname, T_suffix)
  vT_colname <- paste0(v_colname, T_suffix)
  VT_colname <- paste0(V_colname, T_suffix)

  # Add a repeat of the first row at the top.
  # Doing so ensures that
  # the calculation of the first row deltaV values gives 0 and
  # the calculation of the first row D values gives 1, as it should.
  # Performing this action with "do" ensures that each group has a repeated first row.
  # . refers to the current group.
  # (See https://dplyr.tidyverse.org/reference/do.html for details.)
  XvV_repeat1strow <- XvV %>%
    dplyr::do(
      rbind(head(., n = 1), .)
    )

  # Set up for aligning the rows for further calculations.
  .DF0 <- XvV_repeat1strow %>%
    dplyr::do(
      # do works in groups, which is what we want.
      # . refers to the current group.
      head(., -1)
    ) %>%
    rename(
      !!as.name(X0_colname) := !!as.name(X_colname),
      !!as.name(v0_colname) := !!as.name(v_colname),
      !!as.name(V0_colname) := !!as.name(V_colname)
    ) %>%
    # Don't need grouping variables or time variable here.
    # We'll pick them up from .DFT.
    dplyr::ungroup() %>%
    dplyr::select(!!as.name(X0_colname), !!as.name(v0_colname), !!as.name(V0_colname))

  .DFT <- XvV_repeat1strow %>%
    dplyr::do(
      # do works in groups, which is what we want.
      # . refers to the current group.
      tail(., -1)
    ) %>%
    dplyr::rename(
      !!as.name(XT_colname) := !!as.name(X_colname),
      !!as.name(vT_colname) := !!as.name(v_colname),
      !!as.name(VT_colname) := !!as.name(V_colname))

  # Bind everything together and return it
  cbind(.DF0 %>% dplyr::ungroup(), .DFT %>% dplyr::ungroup()) %>%
    dplyr::select(dplyr::group_vars(XvV), time_colname, dplyr::everything()) %>%
    dplyr::group_by(!!!dplyr::groups(XvV))
}


#' Create a simple LMDI data frame
#'
#' This function creates a simple LMDI data frame that can be used for testing or examples.
#' It contains \code{X} matrices for two fictitious countries (\code{AB} and \code{XY})
#' and four years (\code{1971-1974}).
#' Countries \code{AB} and \code{YZ} are identical for the purposes of illustration.
#'
#' @return a data frame of example LMDI data
#'
#' @importFrom matsindf collapse_to_matrices
#'
#' @export
#'
#' @examples
#' library(matsindf)
#' DF <- create_simple_LMDI()
#' DF
#' DF$X[[1]]
create_simple_LMDI <- function(){
  simple.factor.names <- c("factor 1", "factor 2", "factor 3")
  simple.subsubcat.names <- c("subsubcat 1", "subsubcat 2")
  # Eliminate the NOTE in R CMD chk about no visible global binding for these variables
  Country <- NULL
  Year <- NULL
  x <- NULL
  # Create a tidy data frame of x values
  # which will be collapsed to matrices
  AB <- data.frame(Year = rep.int(1971, 6), x = c(1, 10, 2, 4, 5, 3)) %>%
    rbind(data.frame(Year = rep.int(1972, 6), x = c(4, 5, 3, 5, 6, 4))) %>%
    rbind(data.frame(Year = rep.int(1973, 6), x = c(8, 2, 4, 5, 7, 5))) %>%
    rbind(data.frame(Year = rep.int(1974, 6), x = c(10, 1, 5, 6, 8, 6))) %>%
    dplyr::mutate(
      matnames = "X",
      rowtypes = "subsubcat",
      coltypes = "factor",
      rownames = rep.int(c(rep.int(simple.subsubcat.names[[1]], 3), rep.int(simple.subsubcat.names[[2]], 3)), 4),
      colnames = rep.int(c(rep.int(simple.factor.names, 2)), 4),
      Country = "AB"
    ) %>%
    dplyr::group_by(Country, Year) %>%
    matsindf::collapse_to_matrices(matnames = "matnames", matvals = "x",
                         rownames = "rownames", colnames = "colnames",
                         rowtypes = "rowtypes", coltypes = "coltypes") %>%
    dplyr::rename(X = x)
  rbind(AB, AB %>% dplyr::mutate(Country = "YZ"))
}
MatthewHeun/LMDIR documentation built on Feb. 16, 2020, 9:10 p.m.