R/Utilities.R

Defines functions vec_from_store_byname kvec_from_template_byname create_colvec_byname create_rowvec_byname create_matrix_byname ncol_byname nrow_byname logmean iszero_byname clean_byname select_cols_byname select_rows_byname coltype rowtype setcoltype setrowtype margin_from_types_byname rename_to_piece_byname rename_to_pref_suff_byname setcolnames_byname setrownames_byname getcolnames_byname getrownames_byname list_of_rows_or_cols prep_vector_arg organize_args

Documented in clean_byname coltype create_colvec_byname create_matrix_byname create_rowvec_byname getcolnames_byname getrownames_byname iszero_byname kvec_from_template_byname list_of_rows_or_cols logmean margin_from_types_byname ncol_byname nrow_byname organize_args prep_vector_arg rename_to_piece_byname rename_to_pref_suff_byname rowtype select_cols_byname select_rows_byname setcolnames_byname setcoltype setrownames_byname setrowtype vec_from_store_byname

#' Organize binary arguments
#'
#' Organizes arguments of binary (2 arguments) `_byname` functions.
#' Actions performed are:
#' \itemize{
#'  \item{if only one argument is a list, make the other argument also a list of equal length.}
#'  \item{if both arguments are lists, ensure that they are same length.}
#'  \item{if one argument is a matrix and the other is a constant, make the constant into a matrix.}
#'  \item{ensures that row and column types match for \code{typematch_margins}.}
#'  \item{ensures that list item names match if both \code{a} and \code{b} are lists; 
#'        no complaints are made if neither \code{a} nor \code{b} has names.}
#'  \item{completes and sorts the matrices.}
#' }
#'
#' @param a the first argument to be organized
#' @param b the second argument to be organized
#' @param match_type one of \code{"all"}, \code{"matmult"}, \code{"none"}.
#' When both \code{a} and \code{b} are matrices,
#' "\code{all}" (the default) indicates that
#' rowtypes of \code{a} must match rowtypes of \code{b} and
#' coltypes of \code{a} must match coltypes of \code{b}.
#' If "\code{matmult}",
#' coltypes of \code{a} must match rowtypes of \code{b}.
#' @param fill a replacement value for \code{a} or \code{b} if either is missing or \code{NULL}.
#'
#' @return a list with two elements (named \code{a} and \code{b}) containing organized versions of the arguments
organize_args <- function(a, b, match_type = "all", fill){
  if (missing(a)) {
    if (missing(fill)) {
      stop("Missing argument a with no fill in organize_args.")
    } else {
      a <- fill
    }
  }
  if (is.null(a)) {
    if (missing(fill)) {
      stop("Null argument a with no fill in organize_args.")
    } else {
      a <- fill
    }
  }
  if (missing(b)) {
    if (missing(fill)) {
      stop("Missing argument b with no fill in organize_args.")
    } else {
      b <- fill
    }
  }
  if (is.null(b)) {
    if (missing(fill)) {
      stop("Null argument b with no fill in organize_args.")
    } else {
      b <- fill
    }
  }
  if (is.list(a) | is.list(b)) {
    if (!is.list(a)) {
      # b is a list, but a is not.  Make a into a list and give it same names as b.
      a <- make_list(a, n = length(b)) %>% magrittr::set_names(names(b))
    }
    if (!is.list(b)) {
      # a is a list, but b is not.  Make b into a list and give it same names as a.
      b <- make_list(b, n = length(a)) %>% magrittr::set_names(names(a))
    }
  }
  if (is.list(a) & is.list(b)) {
    # Both a and b are lists. Ensure they're the same length.
    stopifnot(length(a) == length(b))
    # Ensure that a and b have same length of names
    stopifnot(length(names(a)) == length(names(b)))
    # Ensure that a and b have same names if either has names
    stopifnot(names(a) == names(b))
    # Now return the lists.
    return(list(a = a, b = b))
  }
  
  # Neither a nor b are lists.
  if (!is.matrix(a) & !is.matrix(b)) {
    # Neither a nor b are matrices. Assume we have two constants. Return the constants in a vector.
    return(list(a = a, b = b))
  }
  
  # Neither a nor b are lists.
  # First check whether matchtype is a known value.
  if (!match_type %in% c("all", "matmult", "none"))  {
    stop(paste("Unknown match_type", match_type, "in organize_args."))
  }
  # We don't know if one or both a and b is a matrix.
  # If one is not a matrix, assume it is a constant and try to make it into an appropriate-sized matrix.
  if (!is.matrix(a) & is.matrix(b)) {
    a <- matrix(a, nrow = nrow(b), ncol = ncol(b), dimnames = dimnames(b))
    if (match_type == "all") {
      a <- a %>% setrowtype(rowtype(b)) %>% setcoltype(coltype(b))
    } 
    if (match_type == "matmult") {
      a <- a %>% setcoltype(rowtype(b))
    }
    # If matchtype == "none", we don't to anything.
  } else if (is.matrix(a) & !is.matrix(b)) {
    b <- matrix(b, nrow = nrow(a), ncol = ncol(a), dimnames = dimnames(a))
    if (match_type == "all") {
      b <- b %>% setrowtype(rowtype(a)) %>% setcoltype(coltype(a))
    }
    if (match_type == "matmult") {
      b <- b %>% setrowtype(coltype(a))
    }
    # If matchtype == "none", we don't to anything.
  }
  
  # Assume that both a and b are now matrices.
  
  # Verify that row and column types are appropriate.
  if (match_type == "all") {
    # If neither rowtype nor coltype are set,
    # skip these tests
    if (!is.null(rowtype(a)) & !is.null(coltype(a)) & !is.null(rowtype(b)) & !is.null(coltype(b))) {
      # Verify that the row type of a and b are the same.
      if (rowtype(a) != rowtype(b)) {
        stop(paste0("rowtype(a) (", rowtype(a), ") != rowtype(b) (", rowtype(b),")."))
      }
      # Verify that the column type of a and b are the same.
      if (coltype(a) != coltype(b)) {
        stop(paste0("coltype(a) (", coltype(a), ") != coltype(b) (", coltype(b),")."))
      }
    }
  } 
  if (match_type == "matmult") {
    # If neither coltype(a) nor rowtype(b) are set,
    # skip this test
    if (!is.null(coltype(a)) & !is.null(rowtype(b))) {
      # Verify that the column type of a and the row type of b are the same.
      if (coltype(a) != rowtype(b)) {
        stop(paste0("coltype(a) != rowtype(b): ", coltype(a), " != ", rowtype(b),"."))
      }
    }
  } 
  
  # We already ensured that match_type was one of the known types. 
  # Ensure that matrices have correct row and column names and are in same order.
  if (match_type == "all") {
    matrices <- complete_and_sort(a, b)
    outa <- matrices$a %>% setrowtype(rowtype(a)) %>% setcoltype(coltype(a))
    outb <- matrices$b %>% setrowtype(rowtype(b)) %>% setcoltype(coltype(b))
  } else if (match_type == "matmult") {
    # When the match_type is "matmult", we need to ensure that the columns of a match the rows of b.
    # To do so, we transpose b prior to completing and sorting, and we complete and sort on columns.
    matrices <- complete_and_sort(a, transpose_byname(b), margin = 2)
    outa <- matrices$a %>% setrowtype(rowtype(a)) %>% setcoltype(coltype(a))
    # Before sending back, we need to re-transpose b.
    outb <- matrices$b %>% transpose_byname %>% setrowtype(rowtype(b)) %>% setcoltype(coltype(b))
  } else if (match_type == "none") {
    outa <- a
    outb <- b
  }
  return(list(a = outa, b = outb))
}


#' Prepare a vector argument
#' 
#' This is a helper function for many `*_byname` functions.
#' 
#' It is potentially ambiguous to specify a vector or matrix argument, say, `margin = c(1, 2)` when applying
#' the `*_byname` functions to unary list of `a`.
#' Rather, one should specify, say, `margin = list(c(1, 2)`
#' to avoid ambiguity.
#' If `a` is a list, 
#' `vector_arg` is not a list and has length > 1 and length not equal to the length of a,
#' this function returns a list value for `vector_arg`.
#' If `a` is not a list and `vector_arg` is a list, 
#' this function returns an un-recursive, unlisted version of `vector_arg`.
#' 
#' Note that if `vector_arg` is a single matrix, it is automatically enclosed by a list when `a` is a list.
#'
#' @param a a matrix or list of matrices
#' @param vector_arg the vector argument over which to apply a calculation
#'
#' @return `vector_arg`, possibly modified when `a` is a list 
#' 
#' @export
#'
#' @examples
#' m <- matrix(c(2, 2))
#' matsbyname:::prep_vector_arg(list(m, m), vector_arg = c(1,2))
prep_vector_arg <- function(a, vector_arg) {
  if (is.list(a)) {
    if (is.matrix(vector_arg) | (!is.list(vector_arg) & length(vector_arg) > 1 & length(vector_arg) != length(a))) {
      # We probably want to make vector_arg into a list.
      vector_arg <- list(vector_arg)
    }
  } else {
    # a is not a list
    if (is.list(vector_arg)) {
      # We can unlist this vector_arg to use it directly.
      vector_arg <- unlist(vector_arg, recursive = FALSE)
    }
  }

  vector_arg
}


#' Named list of rows or columns of matrices
#' 
#' This function takes matrix \code{m} and converts it to a list of 
#' single-row (if \code{margin == 1}) or single-column(if \code{margin == 2})
#' matrices.
#' Each item in the list is named for its row (if \code{margin == 1}) 
#' or column (if \code{margin == 2}).
#'
#' Note that the result provides column vectors, regardless of the value of \code{margin}.
#'
#' @param a a matrix or list of matrices (say, from a column of a data frame)
#' @param margin the margin of the matrices to be extracted (\code{1} for rows, \code{2} for columns)
#'
#' @return a named list of rows or columns extracted from \code{m}
#' 
#' @export
#' 
#' @examples
#' m <- matrix(data = c(1:6), 
#'             nrow = 2, ncol = 3, 
#'             dimnames = list(c("p1", "p2"), c("i1", "i2", "i3"))) %>%
#'   setrowtype(rowtype = "Products") %>% setcoltype(coltype = "Industries")
#' list_of_rows_or_cols(m, margin = 1)
#' list_of_rows_or_cols(m, margin = 2)
list_of_rows_or_cols <- function(a, margin){
  margin <- prep_vector_arg(a, margin)
  
  lrc_func <- function(a, margin){
    stopifnot(length(margin) == 1)
    stopifnot(margin == 1 | margin == 2)
    stopifnot(inherits(a, "matrix"))
    # Strategy: perform all operations with margin to be split into a list in columns.
    if (margin == 1) {
      # Caller requested rows to be split into list items.
      # Transpose so operations will be easier.
      out <- transpose_byname(a)
    } else {
      out <- a
    }
    out <- lapply(seq_len(ncol(out)), function(i){
      matrix(out[,i], nrow = nrow(out), ncol = 1, dimnames = list(rownames(out), colnames(out)[[i]])) %>%
        setrowtype(rowtype(out)) %>% setcoltype(coltype(out))
    }) %>%
      magrittr::set_names(colnames(out))
    return(out)
  }
  unaryapply_byname(lrc_func, a = a, .FUNdots = list(margin = margin), 
                    rowcoltypes = "none")
}


#' Gets row names
#'
#' Gets row names in a way that is amenable to use in chaining operations in a functional programming way
#'
#' @param a The matrix or data frame on which row names are to be retrieved
#'
#' @return row names of \code{a}
#' 
#' @export
#'
#' @examples
#' m <- matrix(c(1:6), nrow = 2, dimnames = list(paste0("i", 1:2), paste0("c", 1:3))) %>%
#'   setrowtype("Industries") %>% setcoltype("Commodities")
#' getrownames_byname(m)
#' # This also works for lists
#' getrownames_byname(list(m,m))
#' DF <- data.frame(m = I(list()))
#' DF[[1,"m"]] <- m
#' DF[[2,"m"]] <- m
#' getrownames_byname(DF$m)
getrownames_byname <- function(a){
  unaryapply_byname(rownames, a = a, rowcoltypes = "none")
}


#' Gets column names
#'
#' Gets column names in a way that is amenable to use in chaining operations in a functional programming way
#'
#' @param a The matrix or data frame from which column names are to be retrieved
#'
#' @return Column names of `m`.
#' 
#' @export
#'
#' @examples
#' m <- matrix(c(1:6), nrow = 2, dimnames = list(paste0("i", 1:2), paste0("c", 1:3))) %>%
#'   setrowtype("Industries") %>% setcoltype("Commodities")
#' getcolnames_byname(m)
#' # This also works for lists
#' getcolnames_byname(list(m,m))
#' DF <- data.frame(m = I(list()))
#' DF[[1,"m"]] <- m
#' DF[[2,"m"]] <- m
#' getcolnames_byname(DF$m)
getcolnames_byname <- function(a){
  unaryapply_byname(colnames, a = a, rowcoltypes = "none")
}


#' Sets row names
#'
#' Sets row names in a way that is amenable to use in piping operations in a functional programming way.
#' If \code{a} is \code{NULL}, \code{NULL} is returned.
#' If \code{a} is a constant, it is converted to a matrix and \code{rownames} are applied.
#' If \code{a} is a matrix, \code{rownames} should be a vector of new row names
#' that is as long as the number of rows in \code{a}.
#' If \code{a} is a list of matrices, 
#' \code{rownames} can also be a list, and it should be as long \code{a}.
#' Or \code{rownames} can be a vector of row names which will be applied to every matrix in
#' the list of \code{a}.
#' Each item in the list should be a vector containing row names for the corresponding 
#' matrix in \code{a}.
#'
#' @param a A matrix or a list of matrices in which row names are to be set
#' @param rownames A vector of new row names or a list of vectors of new row names
#'
#' @return a copy of \code{m} with new row names
#' 
#' @export
#'
#' @examples
#' library(dplyr)
#' m <- matrix(c(1:6), nrow = 2, dimnames = list(paste0("i", 1:2), paste0("c", 1:3))) %>%
#'   setrowtype("Industries") %>% setcoltype("Commodities")
#' setrownames_byname(m, c("a", "b"))
#' setrownames_byname(m %>% setrowtype("Industries") %>% setcoltype("Commodities"), c("c", "d"))
#' m %>% setrownames_byname(NULL)
#' m %>% setrownames_byname(c(NA, NA))
#' 2 %>% setrownames_byname("row")
#' # This also works for lists
#' setrownames_byname(list(m,m), list(c("a", "b")))
#' DF <- data.frame(m = I(list()))
#' DF[[1,"m"]] <- m
#' DF[[2,"m"]] <- m
#' setrownames_byname(DF$m, list(c("r1", "r2")))
#' setrownames_byname(DF$m, list(c("c", "d")))
#' DF <- DF %>% mutate(m = setrownames_byname(m, list(c("r1", "r2"))))
#' DF$m[[1]]
setrownames_byname <- function(a, rownames){
  rownames <- prep_vector_arg(a, rownames)
  if (is.null(a)) {
    return(NULL)
  }
  rowname_func <- function(a, rownames){
    if (is.null(dim(a))) {
      # a has no dimensions. It is a constant.
      # Turn it into a matrix and set the row names.
      out <- matrix(a, nrow = 1, ncol = 1)
    } else {
      out <- a
    }
    if (is.null(rownames)) {
      # replace with default row names
      rownames(out) <- NULL
    } else {
      rownames(out) <- rownames
    }
    return(out)
  }
  unaryapply_byname(rowname_func, a = a, .FUNdots = list(rownames = rownames), 
                    rowcoltypes = "all")
}


#' Sets column names
#'
#' Sets column names in a way that is amenable to use in piping operations in a functional programming way.
#' if \code{a} is \code{NULL}, \code{NULL} is returned.
#' If \code{a} is a constant, it is converted to a matrix and \code{colnames} are applied.
#' If \code{a} is a matrix, \code{colnames} should be a vector of new column names
#' that is as long as the number of columns in \code{a}.
#' If \code{a} is a list of matrices, 
#' \code{colnames} can also be a list, and it should be as long as \code{a}.
#' Or \code{colnames} can be a vector of column names which will be applied to every matrix in
#' the list of \code{a}.
#' Each item in the list should be a vector containing column names for the corresponding 
#' matrix in \code{a}.
#'
#' @param a A matrix or a list of matrices in which column names are to be set
#' @param colnames A vector of new column names or a list of vectors of new column names
#'
#' @return a copy of \code{a} with new column names
#' 
#' @export
#'
#' @examples
#' m <- matrix(c(1:6), nrow = 2, dimnames = list(paste0("i", 1:2), paste0("c", 1:3))) %>%
#'   setrowtype("Industries") %>% setcoltype("Commodities")
#' setcolnames_byname(m, c("a", "b", "c"))
setcolnames_byname <- function(a, colnames){
  if (is.null(a)) {
    return(NULL)
  }
  if (is.list(a) & !is.list(colnames)) {
    colnames <- list(colnames)
  }
  a %>% 
    transpose_byname() %>% 
    setrownames_byname(rownames = colnames) %>% 
    transpose_byname()
}


#' Rename matrix rows and columns by prefix and suffix
#' 
#' `r lifecycle::badge("superseded")`
#' It can be convenient to rename rows or columns of matrices 
#' based on retaining prefixes or suffixes.
#' This function provides that capability.
#' 
#' A prefix is defined by an opening string (`prefix_open`) and a closing string (`prefix_close`).
#' A suffix is defined by an opening string (`suffix_open`) and a closing string (`suffix_close`).
#' If `sep` is provided and none of `prefix_open`, `prefix_close`, `suffix_open`, and `suffix_close` are provided,
#' default arguments become:
#'     * `prefix_open`: "",
#'     * `prefix_close`: `sep`, 
#'     * `suffix_open`: `sep`, and
#'     * `suffix_close`: "".
#'     
#' The `keep` parameter tells which portion to retain (prefixes or suffixes), 
#' 
#' If prefixes or suffixes are not found in a row and/or column name, that name is unchanged.
#' 
#' @param a a matrix or list of matrices whose rows or columns will be renamed.
#' @param keep one of "prefix" or "suffix" indicating which part of the row or column name to retain.
#' @param margin one of `1`, `2`, or `c(1, 2)` where `1` indicates rows and `2` indicates columns.
#' @param notation See `notation_vec()`.
#'
#' @return `a` with potentially different row or column names.
#' 
#' @export
#'
#' @examples
#' # This function is superseded. 
#' # Instead, use `rename_to_pieces_byname()`.
#' # For example:
#' m <- matrix(c(1, 2, 
#'               3, 4, 
#'               5, 6), nrow = 3, byrow = TRUE, 
#'             dimnames = list(c("a -> b", "r2", "r3"), c("a -> b", "c -> d")))
#' m
#' rename_to_piece_byname(m, piece = "pref", notation = RCLabels::arrow_notation)
#' # Note, labels are lost, because some labels are missing a suffix.
#' rename_to_piece_byname(m, piece = "suff", notation = RCLabels::arrow_notation)
#' # Original documentation:
#' rename_to_pref_suff_byname(m, keep = "pref", notation = RCLabels::arrow_notation)
#' rename_to_pref_suff_byname(m, keep = "suff", notation = RCLabels::arrow_notation)
rename_to_pref_suff_byname <- function(a, keep, margin = c(1, 2), notation) {
  rename_to_piece_byname(a, piece = keep, margin = margin, 
                         notation = notation, prepositions = RCLabels::prepositions)
}


#' Rename matrix rows and columns by piece of row or column names
#' 
#' It can be convenient to rename rows or columns of matrices 
#' based on retaining only a piece of the row and/or column names.
#' This function provides that capability.
#' 
#' Internally, this function finds pieces of row and column names 
#' via the `RCLabels` package. 
#' `piece` can be anything that `RCLabels::get_piece()` understands.
#' Note that `margin` can be either an integer vector or
#' a character vector. 
#' If `margin` is a character vector, 
#' it is interpreted as a row or column type, and
#' `margin_from_types_byname()` is called internally to 
#' resolve the integer margins of interest.
#' 
#' Note that if row and/or column type are present,
#' the row and/or column type are also renamed according to `piece`.
#'
#' @param a A matrix or list of matrices whose rows or columns will be renamed.
#' @param piece A character string indicating which piece of the row or column names to retain, 
#'              one of "noun", "pps", "pref" or "suff", or a preposition,
#'              indicating which part of the row or column name is to be retained.
#' @param margin As a character, the row type or column type to be renamed.
#'               As an integer, the margin to be renamed.
#'               Default is `c(1, 2)`, meaning that both 
#'               rows (`margin = 1`) and columns (`margin = 2`)
#'               will be renamed.
#' @param notation The notation used for row and column labels. 
#'                 Default is `RCLabels::bracket_notation`.
#'                 See `RCLabels`.
#'                 
#' @param prepositions Prepositions that can be used in the row and column label.
#'                     Default is `RCLabels::prepositions`.
#'
#' @return A version of `a` with renamed rows and columns.
#' 
#' @export
#'
#' @examples
#' m <- matrix(c(1, 2, 
#'               3, 4, 
#'               5, 6), nrow = 3, byrow = TRUE, 
#'             dimnames = list(c("a -> b", "r2", "r3"), c("a -> b", "c -> d")))
#' m
#' rename_to_piece_byname(m, piece = "pref", notation = RCLabels::arrow_notation)
#' m2 <- m %>%
#'   setrowtype("rows") %>% setcoltype("cols")
#' m2
#' rename_to_piece_byname(m2, piece = "pref", margin = "rows",
#'                        notation = RCLabels::arrow_notation)
#' rename_to_piece_byname(m2, piece = "suff", margin = "rows",
#'                        notation = RCLabels::arrow_notation)
rename_to_piece_byname <- function(a,
                                   piece,
                                   margin = c(1, 2),
                                   notation = RCLabels::bracket_notation,
                                   prepositions = RCLabels::prepositions) {
  piece <- prep_vector_arg(a, piece)
  margin <- prep_vector_arg(a, margin)
  notation <- prep_vector_arg(a, notation)
  prepositions <- prep_vector_arg(a, prepositions)
  
  rename_func <- function(a_mat, this_piece, this_margin, this_notation, these_prepositions) {
    # At this point, a should be a single matrix, 
    # this_* should be individual items ready for use in this function.
    
    # Figure out the margin.
    this_margin <- margin_from_types_byname(a_mat, this_margin)

    if (2 %in% this_margin) {
      # Want to rename columns.
      # Easier to transpose, recursively call ourselves to rename rows, and then transpose again.
      a_mat <- transpose_byname(a_mat) %>% 
        rename_func(this_piece = this_piece, 
                    this_margin = 1,
                    this_notation = this_notation, 
                    these_prepositions = these_prepositions) %>% 
        transpose_byname()
    }
    
    if (1 %in% this_margin) {
      new_rnames <- rownames(a_mat) %>% 
        RCLabels::get_piece(piece = this_piece, 
                            notation = this_notation,
                            prepositions = these_prepositions)
      new_rt <- rowtype(a_mat)
      if (!is.null(new_rt)) {
        new_rt <- new_rt %>%
          RCLabels::get_piece(piece = this_piece, 
                              notation = this_notation, 
                              prepositions = these_prepositions)
      }
      
      # Set new rownames, without the names on the list (parts of the previous name)
      rownames(a_mat) <- unname(new_rnames)
      # Set new rowtype
      a_mat <- setrowtype(a_mat, unname(new_rt))
    }
    
    return(a_mat)
  }
  unaryapply_byname(rename_func, a = a,
                    .FUNdots = list(this_piece = piece,
                                    this_margin = margin,
                                    this_notation = notation, 
                                    these_prepositions = prepositions), 
                    rowcoltypes = "none")
}


#' Translate row and column types to integer margins
#' 
#' Converts row and column types to integer margins,
#' based on `a` and `types`.
#' If `types` is not a character vector, `types` is returned unmodified.
#' If `types` is a character vector, an integer vector is returned
#' corresponding to the margins on which `types` are found.
#' If `types` are not found in the row or column types of `a`, 
#' `NA_integer_` is returned.
#'
#' @param a A matrix or list of matrices.
#' @param types A character vector or list of character vectors 
#'              representing row or column types whose 
#'              corresponding integer margins in `a` are to be determined.
#'
#' @return A vector of integers or list of vectors of integers 
#'         corresponding to the margins on which `types` exist.
#' 
#' @export
#'
#' @examples
#' # Works for single matrices
#' m <- matrix(1) %>%
#'   setrowtype("Product") %>% setcoltype("Industry")
#' margin_from_types_byname(m, "Product")
#' margin_from_types_byname(m, "Industry")
#' margin_from_types_byname(m, c("Product", "Industry"))
#' margin_from_types_byname(m, c("Industry", "Product"))
#' # Works for lists of matrices
#' margin_from_types_byname(list(m, m), types = "Product")
#' margin_from_types_byname(list(m, m), types = "Industry")
#' margin_from_types_byname(list(m, m), types = c("Product", "Product"))
#' margin_from_types_byname(list(m, m), types = c("Industry", "Industry"))
#' margin_from_types_byname(list(m, m), types = c("Product", "Industry"))
#' margin_from_types_byname(list(m, m), types = list("Product", "Industry"))
#' margin_from_types_byname(list(m, m), types = list(c("Product", "Industry")))
#' margin_from_types_byname(list(m, m), types = list(c("Product", "Industry"), 
#'                                                   c("Product", "Industry")))
#' # Works in a data frame
#' m2 <- matrix(2) %>%
#'   setrowtype("Industry") %>% setcoltype("Product")
#' df <- tibble::tibble(m = list(m, m2), types = list("Product", c("Product", "Industry")))
#' res <- df %>%
#'   dplyr::mutate(
#'     margin = margin_from_types_byname(m, types)
#'  )
#' res$margin
margin_from_types_byname <- function(a, types) {
  
  types <- prep_vector_arg(a, types)
  
  mft_fun <- function(a_mat, these_types) {
    # At this point, a_mat and these_types should be single 
    # items, ready for use.
    if (!is.character(these_types)) {
      return(these_types)
    }
    margin <- c()
    if (rowtype(a_mat) %in% these_types) {
      margin <- margin %>% 
        append(1)
    }
    if (coltype(a_mat) %in% these_types) {
      margin <- margin %>%
        append(2)
    }
    if (length(margin) == 0) {
      return(NA_integer_)
    }
    return(margin)
  }
  unaryapply_byname(mft_fun, a = a, .FUNdots = list(these_types = types), rowcoltypes = "none")
}


#' Sets row type for a matrix or a list of matrices
#'
#' This function is a wrapper for \code{attr} so that 
#' setting can be accomplished by the pipe operator (\code{\%>\%}).
#' Row types are strings stored in the \code{rowtype} attribute.
#' 
#' If \code{is.null(rowtype)}, the rowtype attribute is deleted
#' and subsequent calls to \code{rowtype} will return \code{NULL}.
#'
#' @param a the matrix on which row type is to be set
#' @param rowtype the type of item stored in rows
#'
#' @return \code{a} with rowtype attribute set to \code{rowtype}.
#' 
#' @export
#'
#' @examples
#' library(dplyr)
#' commoditynames <- c("c1", "c2")
#' industrynames <- c("i1", "i2")
#' U <- matrix(1:4, ncol = 2, dimnames = list(commoditynames, industrynames))
#' U %>% setrowtype("Commodities")
#' # This also works for lists
#' setrowtype(list(U,U), rowtype = "Commodities")
#' setrowtype(list(U,U), rowtype = list("Commodities", "Commodities"))
#' DF <- data.frame(U = I(list()))
#' DF[[1,"U"]] <- U
#' DF[[2,"U"]] <- U
#' setrowtype(DF$U, "Commodities")
#' DF <- DF %>% mutate(newcol = setrowtype(U, "Commodities"))
#' DF$newcol[[1]]
#' DF$newcol[[2]]
setrowtype <- function(a, rowtype){
  rt_func <- function(a, rowtype){
    attr(a, "rowtype") <- rowtype
    return(a)
  }
  unaryapply_byname(rt_func, a = a, .FUNdots = list(rowtype = rowtype),
                    rowcoltypes = "none")
}


#' Sets column type for a matrix or a list of matrices
#'
#' This function is a wrapper for \code{attr} so that 
#' setting can be accomplished by the pipe operator (\code{\%>\%}).
#' Column types are strings stored in the \code{coltype} attribute.
#' 
#' #' If \code{is.null(coltype)}, the coltype attribute is deleted
#' and subsequent calls to \code{coltype} will return \code{NULL}.
#'
#' @param a the matrix on which column type is to be set
#' @param coltype the type of item stored in columns
#'
#' @return \code{a} with \code{coltype} attribute set.
#' 
#' @export
#'
#' @examples
#' library(dplyr)
#' commoditynames <- c("c1", "c2")
#' industrynames <- c("i1", "i2")
#' U <- matrix(1:4, ncol = 2, dimnames = list(commoditynames, industrynames))
#' U %>% setcoltype("Industries")
#' # This also works for lists
#' setcoltype(list(U,U), coltype = "Industries")
#' setcoltype(list(U,U), coltype = list("Industries", "Industries"))
#' DF <- data.frame(U = I(list()))
#' DF[[1,"U"]] <- U
#' DF[[2,"U"]] <- U
#' setcoltype(DF$U, "Industries")
#' DF <- DF %>% mutate(newcol = setcoltype(U, "Industries"))
#' DF$newcol[[1]]
#' DF$newcol[[2]]
setcoltype <- function(a, coltype){
  ct_func <- function(a, coltype){
    attr(a, "coltype") <- coltype
    return(a)
  }
  unaryapply_byname(ct_func, a = a, .FUNdots = list(coltype = coltype), 
                    rowcoltypes = "none")
}


#' Row type
#'
#' Extracts row type of \code{a}.
#'
#' @param a the object from which you want to extract row types
#'
#' @return the row type of \code{a}
#' 
#' @export
#'
#' @examples
#' library(dplyr)
#' commoditynames <- c("c1", "c2")
#' industrynames <- c("i1", "i2")
#' U <- matrix(1:4, ncol = 2, dimnames = list(commoditynames, industrynames)) %>%
#'   setrowtype(rowtype = "Commodities") %>% setcoltype("Industries")
#' rowtype(U)
#' # This also works for lists
#' rowtype(list(U,U))
rowtype <- function(a){
  unaryapply_byname(attr, a = a, .FUNdots = list(which = "rowtype"), 
                    rowcoltypes = "none")
}


#' Column type
#'
#' Extracts column type of \code{a}.
#'
#' @param a the object from which you want to extract column types
#'
#' @return the column type of \code{a}
#' 
#' @export
#'
#' @examples
#' commoditynames <- c("c1", "c2")
#' industrynames <- c("i1", "i2")
#' U <- matrix(1:4, ncol = 2, dimnames = list(commoditynames, industrynames)) %>%
#'   setrowtype(rowtype = "Commodities") %>% setcoltype("Industries")
#' coltype(U)
#' # This also works for lists
#' coltype(list(U,U))
coltype <- function(a){
  unaryapply_byname(attr, a = a, .FUNdots = list(which = "coltype"), 
                    rowcoltypes = "none")
}


#' Select (or de-select) rows of a matrix (or list of matrices) by name
#'
#' Arguments indicate which rows are to be retained and which are to be removed.
#' For maximum flexibility, arguments are extended regex patterns
#' that are matched against row names.
#'
#' If \code{a} is \code{NULL}, \code{NULL} is returned.
#' 
#' Patterns are compared against row names using extended regex.
#' If no row names of \code{m} match the \code{retain_pattern}, \code{NULL} is returned.
#' If no row names of \code{m} match the \code{remove_pattern}, \code{m} is returned.
#' Note that the default \code{retain_pattern} and \code{remove_pattern} (\code{$^}) 
#' retain nothing and remove nothing.
#'
#' Retaining rows takes precedence over removing rows, always.
#'
#' Some typical patterns are:
#' \itemize{
#'   \item{\code{^Electricity$|^Oil$}: row names that are EXACTLY \code{Electricity} or EXACTLY \code{Oil}.}
#'   \item{\code{^Electricity|^Oil}: row names that START WITH \code{Electricity} or START WITH \code{Oil}.}
#'   \item{\code{Electricity|Oil}: row names that CONTAIN \code{Electricity} or CONTAIN \code{Oil} anywhere within them.}
#' }
#'
#' Given a list of row names, a pattern can be constructed easily using the \code{make_pattern} function.
#' `RCLabels::make_or_pattern()` escapes regex strings using `Hmisc::escapeRegex()`.
#' This function assumes that \code{retain_pattern} and \code{remove_pattern} have already been
#' suitably escaped.
#' 
#' Note that if all rows are removed from `a`, `NULL` is returned.
#'
#' @param a a matrix or a list of matrices
#' @param retain_pattern an extended regex or list of extended regular expressions that specifies which rows of \code{m} to retain.
#' Default pattern (\code{$^}) retains nothing.
#' @param remove_pattern an extended regex or list of extended regular expressions that specifies which rows of \code{m} to remove
#' Default pattern (\code{$^}) removes nothing.
#'
#' @return a matrix that is a subset of \code{m} with rows selected by \code{retain_pattern} and \code{remove_pattern}.
#' 
#' @export
#'
#' @examples
#' m <- matrix(1:16, ncol = 4, dimnames=list(c(paste0("i", 1:4)), paste0("p", 1:4))) %>%
#'   setrowtype("Industries") %>% setcoltype("Commodities")
#' select_rows_byname(m, 
#'                    retain_pattern = RCLabels::make_or_pattern(c("i1", "i4"),
#'                    pattern_type = "exact"))
#' select_rows_byname(m, 
#'                    remove_pattern = RCLabels::make_or_pattern(c("i1", "i3"), 
#'                    pattern_type = "exact"))
#' # Also works for lists and data frames
#' select_rows_byname(list(m,m), retain_pattern = "^i1$|^i4$")
select_rows_byname <- function(a, retain_pattern = "$^", remove_pattern = "$^"){
  if (is.null(a)) {
    return(NULL)
  }
  # Note default patterns ("$^") retain nothing and remove nothing,
  # because $ means end of line and ^ means beginning of line.
  # The default pattern would match lines where the beginning of the line is the end of the line.
  # That is impossible, so nothing is matched.
  select_func <- function(a, retain_pattern, remove_pattern){
    retain_indices <- grep(pattern = retain_pattern, x = rownames(a))
    remove_indices <- grep(pattern = remove_pattern, x = rownames(a))
    if (length(retain_indices) == 0) {
      # Nothing to be retained, so try removing columns
      if (length(remove_indices) == 0) {
        # Nothing to be retained and nothing to be removed.
        # If the caller wanted to retain something,
        # which is indicated by a non-default retain_pattern,
        # don't retain anything.
        # Do this first, because retain takes precedence.
        if (retain_pattern != "$^") {
          return(NULL)
        }
        # If the caller wanted to remove something,
        # which is indicated by a non-default remove_pattern,
        # don't remove anything. Simply return a.
        if (remove_pattern != "$^") {
          return(a)
        }
        # Neither retain_pattern nor remove_pattern is different from the default.
        # This is almost surely an error.
        stop("neither retain_pattern nor remove_pattern are different from default.")
      }
      # Remove
      # Check to see if we will remove all rows from a
      rows_remaining <- nrow(a) - length(remove_indices)
      if (rows_remaining <= 0) {
        return(NULL)
      }
      return(a[-remove_indices , ] %>%
               # When only 1 row is selected, the natural result will be a numeric vector
               # We want to ensure that the return value is a matrix
               # with correct rowtype and coltype.
               # Thus, we need to take these additional steps.
               matrix(nrow = rows_remaining,
                      dimnames = list(dimnames(a)[[1]][setdiff(1:nrow(a), remove_indices)],
                                      dimnames(a)[[2]])) %>%
               setrowtype(rowtype(a)) %>% setcoltype(coltype(a))
      )
    }
    # Retain
    return(a[retain_indices , ] %>%
             matrix(nrow = length(retain_indices),
                    dimnames = list(dimnames(a)[[1]][retain_indices],
                                    dimnames(a)[[2]])) %>%
             setrowtype(rowtype(a)) %>% setcoltype(coltype(a))
    )
  }
  unaryapply_byname(select_func, a = a, 
                    .FUNdots = list(retain_pattern = retain_pattern, remove_pattern = remove_pattern), 
                    rowcoltypes = "none")
}


#' Select columns of a matrix (or list of matrices) by name
#'
#' Arguments indicate which columns are to be retained and which are to be removed.
#' For maximum flexibility, arguments are extended regex patterns
#' that are matched against column names.
#'
#' If \code{a} is \code{NULL}, \code{NULL} is returned.
#' 
#' Patterns are compared against column names using extended regex.
#' If no column names of \code{a} match the \code{retain_pattern}, \code{NULL} is returned.
#' If no column names of \code{a} match the \code{remove_pattern}, \code{a} is returned.
#'
#' Retaining columns takes precedence over removing columns, always.
#'
#' Some typical patterns are:
#' \itemize{
#'   \item{\code{^Electricity$|^Oil$}: column names that are EXACTLY \code{Electricity} or \code{Oil}.}
#'   \item{\code{^Electricity|^Oil}: column names that START WITH \code{Electricity} or \code{Oil}.}
#'   \item{\code{Electricity|Oil}: column names that CONTAIN \code{Electricity} or \code{Oil} anywhere within them.}
#' }
#'
#' Given a list of column names, a pattern can be constructed easily using the \code{make_pattern} function.
#' 
#' `RCLabels::make_or_pattern()` escapes regex strings using `Hmisc::escaprRegex()`.
#' This function assumes that \code{retain_pattern} and \code{remove_pattern} have already been
#' suitably escaped.
#' 
#' Note that the default \code{retain_pattern} and \code{remove_pattern} (\code{$^}) 
#' retain nothing and remove nothing.
#' 
#' Note that if all columns are removed from `a`, `NULL` is returned.
#' 
#' @param a a matrix or a list of matrices
#' @param retain_pattern an extended regex or list of extended regular expressions that specifies which columns of \code{m} to retain.
#' Default pattern (\code{$^}) retains nothing.
#' @param remove_pattern an extended regex or list of extended regular expressions that specifies which columns of \code{m} to remove
#' Default pattern (\code{$^}) removes nothing.
#'
#' @return a matrix that is a subset of \code{a} with columns selected by \code{retain_pattern} and \code{remove_pattern}.
#' 
#' @export
#'
#' @examples
#' m <- matrix(1:16, ncol = 4, dimnames=list(c(paste0("i", 1:4)), paste0("p", 1:4))) %>%
#'   setrowtype("Industries") %>% setcoltype("Commodities")
#' select_cols_byname(m, 
#'                    retain_pattern = RCLabels::make_or_pattern(c("p1", "p4"), 
#'                    pattern_type = "exact"))
#' select_cols_byname(m, 
#'                    remove_pattern = RCLabels::make_or_pattern(c("p1", "p3"), 
#'                    pattern_type = "exact"))
#' # Also works for lists and data frames
#' select_cols_byname(list(m,m), retain_pattern = "^p1$|^p4$")
select_cols_byname <- function(a, retain_pattern = "$^", remove_pattern = "$^"){
  if (is.null(a)) {
    return(NULL)
  }
  out <- a %>% 
    transpose_byname() %>% 
    select_rows_byname(retain_pattern = retain_pattern, remove_pattern = remove_pattern)
  if (is.null(out)) {
    return(NULL)
  }
  out %>% 
    transpose_byname()
}


#' Clean (delete) rows or columns of matrices that contain exclusively `clean_value`
#' 
#' Cleaning is performed when all entries in a row or column or both, depending on the value of `margin`
#' are within `+/- tol` of `clean_value`.
#' Internally, values are deemed within +/- of tol when 
#' `abs(x - clean_value) <= tol`.
#' 
#' If there is concern about machine precision, you might want to call this function with 
#' `tol = .Machine$double.eps`.
#'
#' @param a the matrix to be cleaned
#' @param margin the dimension over which cleaning should occur, `1` for rows, `2` for columns,
#'               or `c(1, 2)` for both rows and columns. Default is `c(1, 2)`.
#' @param clean_value the undesirable value. Default is `0`.
#' @param tol the tolerance with which any value is deemed equal to `clean_value`. Default is `0`.
#'
#' When a row (when `margin = 1`) or a column (when `margin = 2`)
#' contains exclusively `clean_value` (within `tol`), the row or column is deleted from the matrix.
#'
#' @return a "cleaned" matrix, expunged of rows or columns that contain exclusively `clean_value.`
#' 
#' @export
#'
#' @examples
#' m <- matrix(c(-20, 1, -20, 2), nrow = 2, dimnames = list(c("r1", "r2"), c("c1", "c2")))
#' m
#' m %>% clean_byname(margin = 1, clean_value = -20) # Eliminates -20, -20 row
#' # Nothing cleaned, because no columns contain all 0's (the default clean_value).
#' m %>% clean_byname(margin = 2) 
#' # Also works with lists
#' list(m, m) %>% clean_byname(margin = 1, clean_value = -20)
#' # Also works with data frames
#' DF <- data.frame(m = I(list()))
#' DF[[1,"m"]] <- m
#' DF[[2,"m"]] <- m
#' DF %>% clean_byname(margin = 1, clean_value = -20)
#' m2 <- matrix(c(-20, -20, 0, -20, -20, 0, -20, -20, -20), nrow = 3,
#'              dimnames = list(c("r1", "r2", "r3"), c("c1", "c2", "c3")) )
#' m2
#' clean_byname(m2, margin = c(1,2), clean_value = -20)
#' DF2 <- data.frame(m2 = I(list()))
#' DF2[[1, "m2"]] <- m2
#' DF2[[2, "m2"]] <- m2
#' DF2 %>% clean_byname(margin = c(1, 2), clean_value = -20)
clean_byname <- function(a, margin = c(1, 2), clean_value = 0, tol = 0){
  margin <- prep_vector_arg(a, margin)
  clean_value <- prep_vector_arg(a, clean_value)
  tol = prep_vector_arg(a, tol)
  
  clean_func <- function(a, margin, clean_value, tol){
    assertthat::assert_that(1 %in% margin | 2 %in% margin, msg = paste("margin =", margin, "in clean_byname(). Must be 1 or 2."))
    out <- a
    if (1 %in% margin) {
      # Want to clean rows. Code below assumes want to clean columns.
      # Transpose and then transpose again before returning.
      out <- transpose_byname(out) %>% 
        clean_func(margin = 2, clean_value = clean_value, tol = tol) %>% 
        transpose_byname()
    }
    if (2 %in% margin) {
      keepcols <- apply(out, 2, function(x) {
        # !all(x == clean_value)
        !all(abs(x - clean_value) <= tol)
      })
      out <- out[ , keepcols, drop = FALSE]
    } 
    return(out)
  }
  unaryapply_byname(clean_func, a = a, .FUNdots = list(margin = margin, clean_value = clean_value, tol = tol), 
                    rowcoltypes = "all")
}


#' Test whether this is the zero matrix
#' 
#' Note that this function tests whether the elements of \code{abs(a)} are \code{<= tol}.
#' So, you can set \code{tol = 0} to discover if \code{a} is EXACTLY the zero matrix.
#'
#' @param a a matrix of list of matrices
#' @param tol the allowable deviation from 0 for any element
#' @return \code{TRUE} iff this is the zero matrix within \code{tol}.
#' 
#' @export
#'
#' @examples
#' zero <- matrix(0, nrow = 50, ncol = 50)
#' iszero_byname(zero)
#' nonzero <- matrix(1:4, nrow = 2)
#' iszero_byname(nonzero)
#' # Also works for lists
#' iszero_byname(list(zero, nonzero))
#' # And it works for data frames
#' DF <- data.frame(A = I(list()), B = I(list()))
#' DF[[1,"A"]] <- zero
#' DF[[2,"A"]] <- nonzero
#' DF[[1,"B"]] <- nonzero
#' DF[[2,"B"]] <- zero
#' iszero_byname(DF$A)
#' iszero_byname(DF$B)
#' iszero_byname(matrix(1e-10, nrow = 2))
#' iszero_byname(matrix(1e-10, nrow = 2), tol = 1e-11)
iszero_byname <- function(a, tol = 1e-6){
  zero_func <- function(a, tol){
    all(abs(a) <= tol)
  }
  unaryapply_byname(zero_func, a = a, .FUNdots = list(tol = tol), 
                    rowcoltypes = "none")
}


#' Logarithmic mean of two numbers
#' 
#' Calculates the logarithmic mean of two numbers.
#' 
#' This is an internal helper function for \code{logarithmicmean_byname}.
#'
#' @param a the first operand (must be non-negative)
#' @param b the second operand (must be non-negative)
#' @param base the base of the logarithm used in this calculation. 
#'        (Default is \code{exp(1)}.)
#'
#' @return \code{0} if \code{a = 0} or \code{b = 0}; \code{x1} if \code{a == b}; and
#'         \code{(a - b) / log(a/b, base = base)} 
#'         for all other values of \code{a} and \code{b}
#'         
#' @export
#'
#' @examples
#' matsbyname:::logmean(0, 0) # 0
#' matsbyname:::logmean(0, 1) # 0
#' matsbyname:::logmean(1, 0) # 0
#' matsbyname:::logmean(1, 1) # 1
#' matsbyname:::logmean(2, 1)
#' matsbyname:::logmean(1, 2) # commutative
#' matsbyname:::logmean(1, 10) # base = exp(1), the default
#' matsbyname:::logmean(1, 10, base = 10)
logmean <- function(a, b, base = exp(1)){
  # Take care of pathological cases.  
  # See https://en.wikipedia.org/wiki/Logarithmic_mean for details.
  if (a == 0) {
    return(0)
  }
  if (b == 0) {
    return(0)
  }
  if (a == b) {
    return(a)
  }
  (a - b) / log(a/b, base = base)
}


#' Get the number of rows in a "byname" matrix.
#' 
#' The functionn gets the number of rows in a "byname" matrix, or for each "byname" matrix contained
#' in a column of a data frame.
#'
#' @param a A matrix or a column of a data frame populated with "byname" matrices.
#'
#' @return The number of rows of the matrix, or a list containing the number of rows 
#'         in each of the matrices contained in the column of a data frame.
#' @export
#'
#' @examples
#' productnames <- c("p1", "p2")
#' industrynames <- c("i1", "i2")
#' U <- matrix(1:4, ncol = 2, dimnames = list(productnames, industrynames)) %>% 
#'   setrowtype("Products") %>% 
#'   setcoltype("Industries")
#' productnames <- c("p1", "p2")
#' industrynames <- c("i1", "i2", "i3")
#' U2 <- matrix(1:3, ncol = length(industrynames), 
#'              nrow = length(productnames), dimnames = list(productnames, industrynames)) %>% 
#'   setrowtype("Products") %>% 
#'   setcoltype("Industries")
#' productnames <- c("p1", "p2", "p3")
#' industrynames <- c("i1", "i2", "i3", "i4")
#' U3 <- matrix(1:4, ncol = length(industrynames), 
#'              nrow = length(productnames), dimnames = list(productnames, industrynames)) %>% 
#'   setrowtype("Products") %>% 
#'   setcoltype("Industries")
#' dfUs <- data.frame(
#'   year = numeric(),
#'   matrix_byname = I(list())
#' )
#' dfUs[[1, "matrix_byname"]] <- U
#' dfUs[[2, "matrix_byname"]] <- U2
#' dfUs[[3, "matrix_byname"]] <- U3
#' dfUs[[1, "year"]] <- 2000
#' dfUs[[2, "year"]] <- 2001
#' dfUs[[3, "year"]] <- 2002
#' number_rows <- matsbyname::nrow_byname(dfUs$matrix_byname)
nrow_byname <- function(a) {
  nrow_func <- function(a) {
    nrow(a)
  }
  unaryapply_byname(nrow_func, a = a, rowcoltypes = "none")
}


#' Get the number of columns in a "byname" matrix.
#'
#' The functionn gets the number of columns in a "byname" matrix, or for each "byname" matrix contained
#' in a column of a data frame.
#'
#'
#' @param a A matrix or a column of a data frame populated with "byname" matrices.
#'
#' @return The number of columns of the matrix, or a list containing the number of columns
#'         in each of the matrices contained in the column of a data frame.
#' @export
#'
#' @examples
#' productnames <- c("p1", "p2")
#' industrynames <- c("i1", "i2")
#' U <- matrix(1:4, ncol = 2, dimnames = list(productnames, industrynames)) %>% 
#'   setrowtype("Products") %>% 
#'   setcoltype("Industries")
#' productnames <- c("p1", "p2")
#' industrynames <- c("i1", "i2", "i3")
#' U2 <- matrix(1:3, ncol = length(industrynames), 
#'              nrow = length(productnames), dimnames = list(productnames, industrynames)) %>% 
#'   setrowtype("Products") %>% 
#'   setcoltype("Industries")
#' productnames <- c("p1", "p2", "p3")
#' industrynames <- c("i1", "i2", "i3", "i4")
#' U3 <- matrix(1:4, ncol = length(industrynames), 
#'              nrow = length(productnames), dimnames = list(productnames, industrynames)) %>% 
#'   setrowtype("Products") %>% 
#'   setcoltype("Industries")
#' dfUs <- data.frame(
#'   year = numeric(),
#'   matrix_byname = I(list())
#' )
#'   dfUs <- data.frame(
#' year = numeric(),
#' matrix_byname = I(list())
#' )
#' dfUs[[1, "matrix_byname"]] <- U
#' dfUs[[2, "matrix_byname"]] <- U2
#' dfUs[[3, "matrix_byname"]] <- U3
#' dfUs[[1, "year"]] <- 2000
#' dfUs[[2, "year"]] <- 2001
#' dfUs[[3, "year"]] <- 2002
#' number_cols <- ncol_byname(dfUs$matrix_byname) %>% 
#' print()
ncol_byname <- function(a) {
  ncol_func <- function(a) {
    ncol(a)
  }
  unaryapply_byname(ncol_func, a = a, rowcoltypes = "none")
}


#' Create a "byname" matrix from a vector
#'
#' This function creates a "byname" matrix, or list of matrices, from `.dat`, 
#' depending on the input arguments.
#' This function is similar to `matrix()`, but with "byname" characteristics.
#' 
#' Row and column names are taken from the `dimnames` argument.
#' 
#' Any row or column type information on `.dat` is preserved on output.
#'
#' @param .dat The data to be used to create the matrix, in a list format, or as a data frame column
#'             containing a list of the data to be used for each observation.
#' @param nrow The number of rows to be used to create the matrix, in a list format, or as a data frame column
#'             containing a list of the number of rows to be used for each observation.
#' @param ncol The number of columns to be used to create the matrix, in a list format, or as a data frame column
#'             containing a list of the number of columns to be used for each observation.
#' @param byrow The argument stating whether the matrix should be filled by rows or by columns (FALSE by column, TRUE by row),
#'              in a list format, or as a data frame column containing a list of the byrow argument for each observation.
#'              Default is `FALSE.`
#' @param dimnames The dimension names to be used for creating the matrices, in a list format, or as a data frame column
#'                 containing a list of the dimension names to be used for each observation.
#'
#' @return A matrix, list of matrices, or column in a data frame, depending on the input arguments.
#' 
#' @export
#'
#' @examples
#' create_matrix_byname(c(1, 2), nrow = 2, ncol = 1,
#'                      dimnames = list(c("r1", "r2"), "c1"))
#' create_matrix_byname(list(1, 2), nrow = list(1, 1), ncol = list(1,1), 
#'                      dimnames = list(list("r1", "c1"), list("R1", "C1")))
create_matrix_byname <- function(.dat, nrow, ncol, byrow = FALSE, dimnames) {
  
  matrix_func <- function(a, nrow_val, ncol_val, byrow_val, 
                          dimnames_val, rowtype_val = NA, coltype_val = NA) {
    matrix(a, nrow = nrow_val, ncol = ncol_val, byrow = byrow_val, dimnames = dimnames_val)
  }
  
  unaryapply_byname(FUN = matrix_func, a = .dat,
                    .FUNdots = list(nrow_val = nrow, ncol_val = ncol, 
                                    byrow_val = byrow,
                                    dimnames_val = dimnames),
                    # Transfer any row or column type information in .dat to the output.
                    rowcoltypes = "all")
}


#' Create row vectors from data
#' 
#' This function takes data in the `.dat` and creates row vectors.
#' 
#' The row and column names in the resulting row vector are taken from 
#' `rowname` and the names of `.dat`.
#' If set, `dimnames` overrides `rowname` and the names of `.dat`.
#' 
#' Row types and column types are taken from the row type and column type attributes of `.dat`.
#' 
#' This function is a "byname" function that can accept a single number,
#' a vector, a list, or a data frame in `.dat`.
#' 
#' @param .dat Data to be converted to row vectors.
#' @param rowname The name of the row of the row vector.
#' @param dimnames The dimension names to be used for creating the row vector, in a list format, or as a data frame column
#'                 containing a list of the dimension names to be used for each observation.
#'
#' @return A row vector, a list of row vectors, or a data frame column of row vectors, depending on the 
#'         value of `.dat`.
#'         
#' @export
#'
#' @examples
#' # Works with single numbers
#' create_rowvec_byname(c(c1 = 1) %>% setrowtype("rt") %>% setcoltype("ct"), rowname = "r1")
#' # Works with vectors
#' create_rowvec_byname(c(c1 = 1, c2 = 2), rowname = "r1")
#' # Works with a list
#' create_rowvec_byname(list(c(c1 = 1, c2 = 2), c(C1 = 3, C2 = 4, C3 = 5)), 
#'                      rowname = list("r1", "R1"))
#' # Works in a tibble, too.
#' # (Must be a tibble, not a data frame, so that names are preserved.)
#' dat <- list(c(c1 = 1),
#'             c(C1 = 2, C2 = 3), 
#'             c(c1 = 1, c2 = 2, c3 = 3, c4 = 4, c5 = 5, c6 = 6))
#' rnms <- list("r1", "R1", "r1")
#' df1 <- tibble::tibble(dat, rnms)
#' df1
#' df1 <- df1 %>%
#'   dplyr::mutate(
#'     rowvec_col = create_rowvec_byname(dat, rowname = rnms)
#'   )
#' df1$rowvec_col[[1]]
#' df1$rowvec_col[[2]]
#' df1$rowvec_col[[3]]
create_rowvec_byname <- function(.dat, dimnames = NA, rowname = NA){

  rowvec_func <- function(a, dimnames_val, rowname_val) {

    # Figure out the column names.
    # The dimnames argument overrides any names present in a.
    # So we check here if dimnames has been set.
    # If not, we just take names from a, if available.
    if (any(is.na(dimnames_val))) {
      dimnames_val <- list(rowname_val, names(a))
    }
    # Create the row vector using the rowtype and coltype of a.
    create_matrix_byname(a, nrow = 1, ncol = length(a), dimnames = dimnames_val)
  }

  unaryapply_byname(FUN = rowvec_func, 
                    a = .dat,
                    .FUNdots = list(dimnames_val = dimnames, rowname_val = rowname), rowcoltypes = "all")
}


#' Create column vectors from data
#' 
#' This function takes data in the `.dat` and creates column vectors.
#' 
#' The row and column names in the resulting column vector are taken from 
#' the names of `.dat` and `colname`.
#' If set, `dimnames` overrides the names of `.dat` and `colname`.
#' 
#' This function is a "byname" function that can accept a single number,
#' a vector, a list, or a data frame in `.dat`.
#' 
#' Row types and column types are taken from the row type and column type attributes of `.dat`.
#' 
#' @param .dat Data to be converted to column vectors.
#' @param colname The name of the column of the colvector.
#' @param dimnames The dimension names to be used for creating the column vector, in a list format, or as a data frame column
#'                 containing a list of the dimension names to be used for each observation.
#'
#' @return A column vector, a list of column vectors, or a data frame column of column vectors, depending on the 
#'         value of `.dat`.
#'         
#' @export
#'
#' @examples
#' # Works with single numbers
#' create_colvec_byname(c(r1 = 1) %>% setrowtype("rt") %>% setcoltype("ct"), 
#'                      colname = "r1")
#' # Works with vectors
#' create_colvec_byname(c(r1 = 1, r2 = 2), colname = "c1")
#' # Works with a list
#' create_colvec_byname(list(c(r1 = 1, r2 = 2), c(R1 = 3, R2 = 4, R3 = 5)), 
#'                      colname = list("c1", "C1"))
#' # Works in a tibble, too.
#' # (Must be a tibble, not a data frame, so that names are preserved.)
#' dat <- list(c(r1 = 1, r2 = 2),
#'             c(R1 = 2, R2 = 3), 
#'             c(r1 = 1, r2 = 2, r3 = 3, r4 = 4, r5 = 5, r6 = 6))
#' cnms <- list("c1", "C1", "c1")
#' df1 <- tibble::tibble(dat, cnms)
#' df1
#' df1 <- df1 %>%
#'   dplyr::mutate(
#'     colvec_col = create_colvec_byname(dat, colname = cnms)
#'   )
#' df1$colvec_col[[1]]
#' df1$colvec_col[[2]]
#' df1$colvec_col[[3]]
create_colvec_byname <- function(.dat, dimnames = NA, colname = NA) {
  
  colvec_func <- function(a, dimnames_val, colname_val) {

    # Figure out the row names.
    # The dimnames argument overrides any names present in a.
    # So we check here if dimnames has been set.
    # If not, we just take names from a, if available.
    if (any(is.na(dimnames_val))) {
      dimnames_val <- list(names(a), colname_val)
    }
    # Create the row vector using the rowtype and coltype of a.
    create_matrix_byname(a, nrow = length(a), ncol = 1, dimnames = dimnames_val)
  }

  unaryapply_byname(FUN = colvec_func,
                    a = .dat,
                    .FUNdots = list(dimnames_val = dimnames, colname_val = colname),
                    rowcoltypes = "all")
}


#' Create a constant vector from matrix `a`
#' 
#' This function creates a vector using `a` as a template
#' and `k` as its value.
#' Row names are taken from the row names of `a`. 
#' The column name of the output is given by `colname`.
#' Row and column types are transferred from `a` to the output, directly.
#' 
#' If `column` is `TRUE`, the output is a column vector with 
#' row names taken from row names of `a` and a column named by `colname`.
#' If `column` is `FALSE`, the output is a row vevtor with 
#' column names taken from column names of `a` and a row named by `colname`.
#'
#' @param a The template matrix for the column vector.
#' @param k The value of the entries in the output column vector.
#' @param colname The name of the output vector's 1-sized dimension 
#'                (the only column if `column` is `TRUE`, the only row otherwise).
#' @param column Tells whether a column vector (if `TRUE`, the default) or a row vector (if `FALSE`) should be created.
#'
#' @return A vector vector formed from `a`.
#' 
#' @export
#'
#' @examples
#' kvec_from_template_byname(matrix(42, nrow = 4, ncol = 2,
#'                                  dimnames = list(c("r1", "r2", "r3", "r4"), c("c1", "c2"))), 
#'                           colname = "new column")
#' kvec_from_template_byname(matrix(42, nrow = 4, ncol = 2,
#'                                  dimnames = list(c("r1", "r2", "r3", "r4"), c("c1", "c2"))), 
#'                           colname = "new row", column = FALSE)
kvec_from_template_byname <- function(a, k = 1, colname = NA, column = TRUE) {
  
  k_from_template_func <- function(a_mat, k_val, colname_val, column_val) {
    # When we get here, a_mat should be a single matrix
    if (column_val) {
      create_colvec_byname(rep(k_val, nrow(a_mat)), dimnames = list(rownames(a_mat), colname_val))
    } else {
      create_rowvec_byname(rep(k_val, ncol(a_mat)), dimnames = list(colname_val, colnames(a_mat)))
    }
  }

  unaryapply_byname(FUN = k_from_template_func, a = a, 
                    .FUNdots = list(k_val = k, colname_val = colname, column_val = column), rowcoltypes = "all")
}



#' Create a vector with labels from a matrix and values from a vector store
#' 
#' When a matrix is multiplied by a vector byname, 
#' naming can be tricky. 
#' There are times when pieces of the vector labels should be matched to 
#' pieces of the matrix labels. 
#' This function helps by performing the matching byname.
#' For this function, vector `v` is considered a store of values 
#' from which the output vector is created
#' using special matching rules between `a` and `v`.
#' 
#' The output of this function is a vector
#' (a column vector if `column` is `TRUE`, the default; 
#' a row vector if `column` is `FALSE`).
#' The label of the size = 1 dimension is taken from `colname`
#' (so named, because the default is to return a column vector).
#' The labels of the long dimension are taken from matrix `a`
#' (the row names of `a` if `column` is `TRUE`; 
#' the column names of `a` if `column` is `FALSE`).
#' The values of the output vector are obtained from v
#' when `a_piece` matches `v_piece` using the `RCLabels` package.
#' The `v_piece`s of `v` must be unique.
#' The default values for `a_piece` and `v_piece` are "all", 
#' meaning that the entire label should be matched.
#' Other options for `a_piece` and `v_piece` are "pref" and "suff",
#' which will match the prefix or suffix of the labels.
#' Alternatively, prepositions can be given such that 
#' objects of prepositions will be matched.
#' Examples include "from" or "in".
#' Row and column types from `v` are applied to the output.
#' If the piece given in `a_piece` is not present in row or column names of `a`, 
#' `NA_real_` is returned.
#' If the piece given in `v_piece` is not present in row or column names of `v`, 
#' `NA_real_` is returned.
#' 
#' Note that `notation` and `prepositions` should be lists if `a` is a list
#' but a single value otherwise. 
#' The default values of `notation` and `prepositions` take care of this requirement,
#' switching on the type of `a` (list or not).
#'
#' @param a A matrix from which row or column labels are taken.
#'          Can also be a list or the name of a column in a data frame.
#' @param v A vector from which values are taken, when `a_piece` matches `v_piece`.
#'          Can also be a list or the name of a column in a data frame.
#' @param a_piece The piece of labels on `a` that is to be matched. Default is "all".
#' @param v_piece The piece of labels on `v` that is to be matched. Default is "all".
#' @param colname The name of the output vector's 1-sized dimension 
#'                (the only column if `column` is `TRUE`, the only row otherwise).
#'                Default is `NULL`, meaning that the name of the 1-sized dimension in `v` 
#'                should be used.
#' @param column Tells whether a column vector (if `TRUE`, the default) or a row vector (if `FALSE`) should be created.
#' @param notation The notation for the row and column labels.
#'                 Default is `RCLabels::bracket_notation`, wrapped as a list if `a` is a list.
#' @param prepositions The strings that will count for prepositions.
#'                     Default is `RCLables::prepositions`, wrapped as a list if `a` is a list..
#'
#' @return A vector with names from `a` and values from `v`.
#' 
#' @export
#'
#' @examples
#' a <- matrix(42, nrow = 3, ncol = 5, 
#'             dimnames = list(c("Electricity [from b in c]", 
#'                               "Coal [from e in f]", 
#'                               "Crude oil [from Production in USA]"), 
#'                             c("Main activity producer electricity plants", 
#'                               "Wind turbines", 
#'                               "Oil refineries", 
#'                               "Coal mines", 
#'                               "Automobiles"))) %>%
#'   setrowtype("Product") %>% setcoltype("Industry")
#' a
#' v <- matrix(1:7, nrow = 7, ncol = 1, 
#'             dimnames = list(c("Electricity", 
#'                               "Peat", 
#'                               "Hydro", 
#'                               "Crude oil",
#'                               "Coal", 
#'                               "Hard coal (if no detail)", 
#'                               "Brown coal"), 
#'                             "phi")) %>%
#'   setrowtype("Product") %>% setcoltype("phi")
#' v
#' vec_from_store_byname(a, v, a_piece = "pref")
#' vec_from_store_byname(a, v, a_piece = "noun")
#' 
#' v2 <- matrix(1:7, nrow = 7, ncol = 1, 
#'              dimnames = list(c("Electricity", 
#'                                "Peat", 
#'                                "USA", 
#'                                "c",
#'                                "Coal", 
#'                                "Hard coal (if no detail)", 
#'                                "f"), 
#'                              "phi")) %>%
#'   setrowtype("Product") %>% setcoltype("phi")
#' vec_from_store_byname(a, v2, a_piece = "in")
#' 
#' # Works with lists
#' v3 <- matrix(1:7, nrow = 7, ncol = 1, 
#'              dimnames = list(c("Electricity [from USA]", 
#'                                "Peat [from nowhere]", 
#'                                "Production [from GHA]", 
#'                                "e [from ZAF]",
#'                                "Coal [from AUS]", 
#'                                "Hard coal (if no detail) [from GBR]", 
#'                                "b [from Nebraska]"), 
#'                              "phi")) %>%
#'   setrowtype("Product") %>% setcoltype("phi")
#' a_list <- list(a, a)
#' v_list <- list(v3, v3)
#' vec_from_store_byname(a_list, v_list, a_piece = "in", v_piece = "from")
#' 
#' # Also works in a data frame
#' df <- tibble::tibble(a = list(a, a, a), 
#'                      v = list(v3, v3, v3))
#' df %>%
#'   dplyr::mutate(
#'     actual = vec_from_store_byname(a = a, v = v, a_piece = "in", v_piece = "from")
#'   )
vec_from_store_byname <- function(a, v, a_piece = "all", v_piece = "all", colname = NULL, column = TRUE, 
                                  notation = if (is.list(a)) {list(RCLabels::bracket_notation)} else {RCLabels::bracket_notation}, 
                                  prepositions = if (is.list(a)) {list(RCLabels::prepositions)} else {RCLabels::prepositions}) {
  
  
  vec_func <- function(a_mat, v_vec, a_piece_val, v_piece_val, colname_val, column_val, 
                       notation_val = notation, 
                       prepositions_val = prepositions) {
    # Get size of v vectors
    v_size <- dim(v_vec)
    # Make sure v is a matrix.
    assertthat::assert_that(length(v_size) == 2, 
                            msg = "v must be a matrix with 2 dimensions in vec_from_store_byname()")
    # Make sure one of the dimensions is size 1
    assertthat::assert_that(v_size[[1]] == 1 | v_size[[2]] == 1,
                            msg = "v must be a vector with one dimension of size 1 in vec_from_store_byname()")
    # Get the names that matter to us from the vector store
    if (v_size[[1]] == 1) {
      # Turn it into a column vector, so we can assume a column vector 
      # for the rest of this function.
      return(vec_func(a_mat, transpose_byname(v_vec), a_piece_val, v_piece_val, colname_val, column_val))
    }
    # If we want a row vector, transpose a_mat so that we want a column vector.
    # By doing this, we can assume we want a column vector in the rest of this function.
    if (!column_val) {
      return(
        a_mat %>%
          transpose_byname() %>%
          vec_func(v_vec, a_piece_val, v_piece_val, colname_val, column_val = TRUE) %>%
          transpose_byname()        
      )
    }
    # At this point, we have a_mat and v_vec such that we want
    # names from the rows of a_mat and the values from the rows of v_vec.

    # Get row names from the matrix and the vector
    a_rownames <- dimnames(a_mat)[[1]]
    v_rownames <- dimnames(v_vec)[[1]]
    a_pieces <- RCLabels::get_piece(a_rownames, piece = a_piece, notation = notation_val, prepositions = prepositions_val)
    v_pieces <- RCLabels::get_piece(v_rownames, piece = v_piece, notation = notation_val, prepositions = prepositions_val)
    # Ensure that v_pieces are unique
    assertthat::assert_that(length(v_pieces) == length(unique(v_pieces)), 
                            msg = "v_pieces must be unique in vec_from_store_byname()")
    # Find the size of the outgoing column vector
    out_size <- dim(a_mat)[[1]]
    if (is.null(colname_val)) {
      # Pick up the column name from v_vec.
      colname_val <- dimnames(v_vec)[[2]]
    }
    # Build the outgoing vector with NA's everywhere
    out <- matrix(NA_real_, nrow = out_size, ncol = 1, 
                  dimnames = list(a_rownames, colname_val)) %>%
      # Be sure to retain the row and column type of v_vec.
      setrowtype(rowtype(v_vec)) %>% setcoltype(coltype(v_vec))
    # Fill the vector
    for (i in 1:out_size) {
      # Get the value we want
      this_piece <- a_pieces[[i]]
      rownum_in_v <- which(v_pieces == this_piece, arr.ind = TRUE)
      
      # We need both this_piece to be something (not "") and
      # rownum_in_v to be different from 0 (i.e. present somewhere)
      # to assign something different from NA_real_, the default value.
      if (this_piece != "" & length(rownum_in_v) != 0) {
        val <- v_vec[[rownum_in_v, 1]]
        out[i, 1] <- val
      }
    }
    return(out)
  }
  
  binaryapply_byname(vec_func, a = a, b = v, .organize = FALSE, set_rowcoltypes = FALSE,
                     .FUNdots = list(a_piece_val = a_piece, v_piece_val = v_piece, 
                                     colname_val = colname, column_val = column, 
                                     notation_val = notation, 
                                     prepositions_val = prepositions))
}

Try the matsbyname package in your browser

Any scripts or data that you put into this service are public.

matsbyname documentation built on April 2, 2022, 1:06 a.m.