R/index_to_sprinkle.R

Defines functions index_to_sprinkle

Documented in index_to_sprinkle

#' @name index_to_sprinkle
#' @title Determine the Indices to Sprinkle
#' 
#' @description The sprinkle methods accept the rows and columns that are
#'   to be modified as matrix coordinates.  The \code{dust} object stores
#'   the table data in a long form.  The tabular coordinates are translated
#'   into row indices using this function.
#'   
#' @param x An object of class \code{dust}.
#' @param rows Either a numeric vector of rows in the tabular object to be 
#'   modified or an object of class \code{call}.  When a \code{call}, 
#'   generated by \code{quote(expression)}, the expression resolves to 
#'   a logical vector the same length as the number of rows in the table.
#'   Sprinkles are applied to where the expression resolves to \code{TRUE}.
#' @param cols Either a numeric vector of columns in the tabular object to
#'   be modified, or a character vector of column names. A mixture of 
#'   character and numeric indices is permissible.
#' @param fixed \code{logical(1)} indicating if the values in \code{rows} and 
#'   \code{cols} should be read as fixed coordinate pairs.  See Details.
#' @param part \code{character} string.  Specifies if the sprinkles are 
#'   being applied to the head, body, foot, or interfoot of the table. Partial
#'   matching is supported.
#' @param recycle \code{character} string. Indicates how recycling is to be
#'   performed.  Partial matching is supported. See Details.
#' @param coll An optional \code{AssertCollection} object. When \code{NULL},
#'   an \code{AssertCollection} object will be created and reported within
#'   the call to this function.  When not \code{NULL}, any failed assertions
#'   will be added to the object in reported in the function that called
#'   \code{index_to_sprinkle}.
#'   
#' @details When \code{fixed = FALSE}, sprinkles are applied at the 
#'   intersection of \code{rows} and \code{cols}, meaning that the arguments 
#'   do not have to share the same length.  When \code{fixed = TRUE}, they must
#'   share the same length.
#'   
#'   The value of \code{recycle} determines how sprinkles are 
#'   managed when the sprinkle input doesn't match the length of the region
#'   to be sprinkled.  By default, recycling is turned off.  Recycling 
#'   may be performed across rows first (left to right, top to bottom), 
#'   or down columns first (top to bottom, left to right).  \code{"cols"} 
#'   and \code{"columns"} have the same effect. The two choices to specify 
#'   are motivated by the fact that I sometimes get confused about which
#'   it should be. :)
#'   
#' @author Benjamin Nutter
#' 
#' @seealso \code{sprinkle}
#' 
#' @section Functional Requirements:
#' \enumerate{
#'  \item Return the indices of the intersection of \code{rows} and \code{cols}
#'  \item If \code{rows = NULL}, assume all rows.
#'  \item If \code{rows} is an expression where no values resolve to 
#'    \code{TRUE}, return \code{x} unchanged.
#'  \item If any value in \code{rows} is not a valid row in the table,
#'    cast an error.
#'  \item If \code{cols = NULL}, assume all columns.
#'  \item If any value in \code{cols} does not identify a column in the table,
#'    cast an error.
#'  \item If \code{fixed = TRUE}, \code{length(rows)} (or \code{sum(rows),
#'    if an expression}) and \code{cols} must have the same length.
#'  \item Cast an error if \code{fixed} is not a \code{logical(1)}
#'  \item Cast an error if \code{part} is not one of \code{"body"}, 
#'    \code{"head"}, \code{"foot"}, or \code{"interfoot"}.
#' }
#'   

index_to_sprinkle <- function(x, rows = NULL, cols = NULL, fixed = FALSE,
                              part = c("body", "head", "foot", "interfoot"),
                              recycle = c("none", "rows", "cols", "columns"),
                              coll = NULL)
{
  report_here <- is.null(coll)
  
  if (report_here) coll <- checkmate::makeAssertCollection()

# First pass at argument validation ---------------------------------
  # The first pass validates the arguments are of the correct type.
  # The second pass will validate characteristics that depend on 
  # the types being correct.
  
  checkmate::assert_class(x = x,
                          classes = "dust",
                          add = coll,
                          .var.name = "x")
  
  if (!is.null(rows))
  {
    if (!is.numeric(rows) & !is.call(rows))
    {
      coll$push("`rows` must be either numeric or a call object (via `quote`)")
    }
  }
  
  if (!is.null(cols))
  {
    if (!is.numeric(cols) & !is.character(cols))
    {
      coll$push("`cols` must be a numeric or character vector")
    }
  }
  
  checkmate::assert_logical(x = fixed,
                            len = 1,
                            add = coll,
                            .var.name = "fixed")
  
  part <- 
    checkmate::matchArg(x = part,
                        choices = c("body", "head", "foot", 
                                    "interfoot", "table"),
                        add = coll,
                        .var.name = "part")
  
  recycle <- 
    checkmate::matchArg(x = recycle,
                        choices = c("none", "rows", "cols", "columns"),
                        add = coll,
                        .var.name = "recycle")
  
  if (report_here) checkmate::reportAssertions(coll)
  else if (!length(part) | 
           !length(recycle) |
           !checkmate::test_logical(x = fixed,
                                    len = 1)) 
  {
    # If there is no match for `part`, there is no need to proceed to 
    # the rest of the function.  If this function is called from 
    # another with a `coll` object, return to that function's execution
    # and report the error there.
    return(invisible(NULL))
  }
  
# Second pass at argument validations -------------------------------

  if (fixed)
  {
    if (length(rows) != length(cols))
    {
      coll$push("When `fixed = TRUE`, rows and cols must have the same length")
    }
  }

  if (is.null(rows)) rows <- unique(x[[part]][["row"]])
  
  if (inherits(rows, "class"))
  {
    rows <- which(eval(rows))
  }

  invalid_row <- which(!rows %in% unique(x[[part]][["row"]]))
  if (length(invalid_row))
  {
    coll$push(sprintf("The following rows given are not valid row indices: %s",
                      paste0(rows[invalid_row], collapse = ", ")))
  }

  if (is.null(cols))
  {
    cols <- unique(x[[part]][["col"]])
  }
  else
  {
    # The cols argument allows character and numeric values to be 
    # given simultaneously. This block matches the character values
    # to numeric column indices
    cols_num <- suppressWarnings(as.numeric(cols))
    cols_num <- cols_num[!is.na(cols_num)]
    
    cols_str <- match(cols, 
                      unique(x[["head"]][["col_name"]]))
    
    # We don't want to restrict ourselves to just the unique 
    # columns if we are doing fixed coordinate pairs
    if (!fixed) cols <- unique(c(cols_num, cols_str))
    
    cols <- cols[!is.na(cols)]
  }

  invalid_col <-  which(!cols %in% unique(x[[part]][["col"]]))
    
  if (length(invalid_col))
  {
    coll$push(sprintf("The following columns given are not valid columns: %s",
                      paste0(cols[invalid_col], collapse = ", ")))
  }
  
  if (report_here) checkmate::reportAssertions(coll)
  
  # There's no point in continuing if there are any errors by now
  # We return a full vector of indices just to maintain the same input.
  if (!coll$isEmpty()) return(1)
  
# Functional Code ---------------------------------------------------

  # Determine the index order for recycling
  
  if (recycle == "columns")
  {
    recycle <- "cols"
  }
  
  recycle_arrange <- 
    if (recycle == "rows")
    {
      c("row", "col")
    }
    else
    {
      c("col", "row")
    }  
  
  # Determine and arrange the indices
  
  if (!fixed)
  {
    indices <- expand.grid(rows = rows,
                           cols = cols)
    indices$i <- rep(TRUE, nrow(indices))
    
    indices <- merge(x[[part]][c("row", "col")], 
                     indices, 
                     by.x = c("col", "row"), 
                     by.y = c("cols", "rows"), 
                     all.x = TRUE)
    
    indices[["index"]] <- seq_len(nrow(indices))

    indices <- indices[do.call("order", indices[recycle_arrange]), ]

    indices[["i"]][is.na(indices[["i"]])] <- FALSE
    indices <- indices[["index"]][indices[["i"]]]
  }
  else
  {
    indices <- 
      which(x[[part]][["row"]] %in% rows & 
              x[[part]][["col"]] %in% cols)
  }

  indices
}
nutterb/pixiedust documentation built on Oct. 17, 2023, 9:20 a.m.