R/read_timber.R

Defines functions read_timber

Documented in read_timber

#' @title
#'   Read timber
#'
#' @param timber_path
#'  Path to timber.
#'
#' @return
#'   A tibble of timber.
#'
#' @importFrom dplyr mutate rowwise
#'
#' @export


read_timber <- function(timber_path) {
  # CHECK: File Type Support --------------------------------------------------

  # Get timber file extension (lower case for later string comparisons).
  timber_file_ext <- tolower(tools::file_ext(timber_path))

  # Set supported extensions.
  supported_file_exts <- c("csv", "xls", "xlsx")

  # Abort if timber file extension is not supported.
  if (!timber_file_ext %in% supported_file_exts) {
    errmsg <- glue::glue(
      "'.{timber_file_ext}' files are not supported by",
      "sawmill. Please specify one of the following file types:",
      paste(supported_file_exts, collapse = ", "), "."
    )
    rlang::abort(message = errmsg)
  }


  # Read Timber -----------------------------------------------------------

  # By default, column types read from .CSV or .XLS/X files are guessed by
  # examining the contents of the first X number of rows. Timber are sparse
  # (i.e., many cells are empty) so this guess may fail. To ensure column types
  # are set correctly, they are specified in raw_timber_specs. Passed timber may
  # contain additional non-standard columns that must be preserved (and can be
  # read as a 'guess'). Unfortunately, column specification on read is
  # all-or-nothing. We must dynamically generate a complete column specification
  # (including additional non-standard columns) by first reading the header of
  # the file, generating a complete column list, and then setting column types.
  # Then we re-read the file with the complete and correct specifications.

  raw_timber_specs <- get_raw_timber_specs()

  # Read the header of the raw timber to get passed column names.
  if (timber_file_ext == "csv") {
    timber_col_names <- names(readr::read_csv(
      file = timber_path,
      n_max = 0,
      show_col_types = FALSE
    ))
  } else if (timber_file_ext %in% c("xls", "xlsx")) {
    timber_col_names <- names(readxl::read_excel(
      path = timber_path,
      n_max = 0
    ))
  }


  # Create a named vector of column specifications for known columns.
  # Timber may contain unknown columns; create a named vector of default (guess)
  # specifications for passed timber. Then, replace guess for known columns.

  if (timber_file_ext == "csv") {
    # Create a named vector of column specifications (CSV) for raw timber.
    raw_timber_colspec <- rlang::set_names(
      raw_timber_specs$col_spec_csv,
      raw_timber_specs$timber_col_name
    )

    # Create a column specification for the input timber. Default = "?".
    # timber_colspec            <- rlang::set_names(rep("?", length(timber_col_names)),
    #                                                 timber_col_names)

    # Replace the column specification for the input timber for the required fields in raw timber.
    # timber_colspec[intersect(names(raw_timber_colspec), names(timber_colspec))] <- raw_timber_colspec[intersect(names(raw_timber_colspec), names(timber_colspec))]

    # Re-read the timber with column specification.
    timber <- readr::read_csv(
      file = timber_path,
      col_types = raw_timber_colspec,
      show_col_types = FALSE
    )
  } else if (timber_file_ext %in% c("xls", "xlsx")) {
    # Create a named vector of column specifications (XLS/X) for raw timber.
    raw_timber_colspec <- rlang::set_names(
      raw_timber_specs$col_spec_xlsx,
      raw_timber_specs$timber_col_name
    )

    # Create a column specification for the input timber. Default = "guess".
    timber_colspec <- rlang::set_names(
      rep("guess", length(timber_col_names)),
      timber_col_names
    )

    # Replace the column specification for the input timber for the required fields in raw timber.
    timber_colspec[intersect(names(raw_timber_colspec), names(timber_colspec))] <- raw_timber_colspec[intersect(names(raw_timber_colspec), names(timber_colspec))]

    # Re-read the timber with column specification.
    timber <- readxl::read_excel(
      path = timber_path,
      col_types = timber_colspec
    )
  }

  # Initialize 'sawmill_pass' and 'sawmill_status' if they do not exist
  timber <- create_status_cols(timber)


  # Update 'sawmill_status'
  status <- "OK: timber read successfully."
  timber[, "sawmill_status"] <- status

  return(timber)
}
iAM-AMR/sawmill documentation built on June 30, 2024, 2:25 a.m.