#' Load and prep elevation data
#'
#' This function is used by the `mapper` functions to load input files and
#' prepare them for analysis. It can also be used to load input files
#' independently for plotting with \code{\link{flow_plot}} and/or
#' trouble-shooting.
#'
#' @param file Character. The location of the file containing elevation data.
#' See details for accepted file types
#' @param nrow Numeric. Number of rows in dem file (required for dbf files with
#' a single column, but can be automatically assessed from files with x and y
#' coordinates.
#' @param ncol Numeric. Number of columns in dem file (required for dbf files
#' with a single column, but can be automatically assessed from files with x
#' and y coordinates.
#' @param missing_value Vector. The number or character string specifying
#' missing data.
#' @param rlim Vector. Two numbers specifying the start and end of a subset of
#' rows to extract
#' @param clim Vector. Two numbers specifying the start and end of a subset of
#' columns to extract
#' @param edge Logical. Whether to add an edge (buffer) around the data.
#'
#' @inheritParams args
#'
#' @return Returns a data frame containing elevation data in a format suitable
#' for analysis
#'
#' @details All x/y data must be in a format such that greater values indicate
#' East or North respectively.
#'
#' This function uses file extensions to guess the file type to be loaded.
#'
#' \strong{dBase files:}
#' These files are loaded via the \code{\link[foreign]{read.dbf}} function from
#' the foreign package. Columns must be named and must have a valid name (case
#' doesn't not matter). X/Y coordinates are optional and must be named as "x",
#' "lon", "long", "longitude", or "y", "lat", "latitude". Elevation columns can
#' be "elev", "elevation", or "z". If no "x" and "y" columns are suppled,
#' \code{nrow} and \code{ncol} arguments must be supplied. Column names matter,
#' column order does not. Extra columns, if present, are ignored.
#' \itemize{
#' \item dBase files (.dbf)
#' }
#'
#' \strong{Grid file types:}
#' These file types are loaded via the \code{\link[raster]{raster}} function.
#' \itemize{
#' \item Surfer grid files (.grd)
#' \item Esri grid files (binary .adf or ascii .asc)
#' \item Geo Tiff (.tif)
#' \item Floating point raster files (.flt) (**Note** the companion header,
#' .hdr, file must also be present)
#' }
#'
#' \strong{Text/Spreadsheet file types:}
#' Data in these files are all assumed to be arranged in three columns
#' reflecting x, y, and z dimensions (z = elevation). Column **order** is
#' important. Column names don't matter, but they should be present.
#' \itemize{
#' \item Text files (.txt, .dat, .csv) are loaded via R base
#' \code{link[utils]{read.table}} function.
#' \item Excel files (.xls, .xlsx) are loaded via the
#' \code{\link[readxl]{read_excel}} function.
#' }
#'
#' @export
load_file <- function(file, nrow = NULL, ncol = NULL, missing_value = -9999,
rlim = NULL, clim = NULL, grid = NULL, edge = TRUE,
verbose = TRUE) {
if(!file.exists(file)) stop("Cannot locate ", file,
" relative to working directory, ", getwd(),
call. = FALSE)
ext <- tolower(tools::file_ext(file))
if(ext %in% c("grd", "tif", "adf", "asc", "flt", "")) {
if(ext == "" & verbose) message(" Assuming folder representing Arc/Info Binary Grid")
db <- load_raster(file)
} else if(ext == "dbf") {
db <- load_dem(file, type = "elev")
} else if(ext %in% c("xlsx", "xls")) {
db <- load_excel(file, type = "elev")
} else if(ext %in% c("txt", "csv", "dat")) {
db <- load_txt(file, type = "elev")
} else {
stop("Unknown file format", call. = FALSE)
}
# Sort if x/y present
if(ncol(db) > 1 && all(c("x", "y", "elev") %in% names(db))) {
db <- dplyr::arrange(db, dplyr::desc(y), x)
nrow <- length(unique(db$y))
ncol <- length(unique(db$x))
if(verbose) message(" Detected ", nrow, " rows and ", ncol, " columns")
} else if(!is.null(nrow) && !is.null(ncol)) {
if(verbose) message(" Using supplied ", nrow, " rows and ", ncol, " columns")
} else {
stop("dbf files with only one column require 'nrow' and 'ncol' arguments.",
call. = FALSE)
}
db_format(db, nrow = nrow, ncol = ncol, grid = grid,
missing_value = missing_value, verbose = verbose) %>%
db_prep(clim = clim, rlim = rlim, edge = edge, verbose = verbose)
}
load_dem <- function(file, type = "elev") {
if(!requireNamespace("foreign", quietly = TRUE)) {
stop("Require package 'foreign' to load .dbf files.\n
Install with \"install.packages('foreign')\", then try again")
}
# Load file
foreign::read.dbf(file, as.is = TRUE) %>%
check_names(type = type)
}
check_names <- function(db, type) {
# Format names
names(db) <- tolower(names(db))
names(db) <- stringr::str_trim(names(db))
all_names <- dplyr::filter(match_names, .data$type == !!type) %>%
dplyr::pull("name")
req_names <- dplyr::filter(match_names,
.data$type == !!type,
.data$required == TRUE) %>%
dplyr::pull("name")
# Fix fixable names
if(!is.null(fix_names[[type]])) {
names(db) <- stringr::str_replace_all(names(db), pattern = fix_names[[type]])
}
# Check if required columns present
if(!all(req_names %in% names(db))) {
stop("Required columns missing from data: '",
paste0(m, collapse = "', '"), "'")
}
if(type == "elev") db <- dplyr::mutate(db, elev = as.numeric(elev))
# Grab names which are present
dplyr::select(db, tidyselect::any_of(all_names))
}
load_excel <- function(file, type = "elev") {
if(!requireNamespace("readxl", quietly = TRUE)) {
stop("Require package 'readxl' to load .xlsx or .xls files.\n
Install with \"install.packages('readxl')\", then try again")
}
h <- readxl::read_excel(file, n_max = 5,
col_names = FALSE, .name_repair = "minimal")
header <- any(stringr::str_detect(h[1,], "[a-zA-Z]+"))
db <- readxl::read_excel(file, col_names = header)
if(!header) {
names(db) <- match_names$name[match_names$type == type]
} else {
db <- check_names(db, type = type)
}
db
}
load_txt <- function(file, type = "elev") {
t <- readLines(file, 10)
header <- any(stringr::str_detect(t[1], "[a-zA-Z]+"))
if(any(stringr::str_detect(t, ","))) sep <- "," else sep <- ""
db <- utils::read.table(file, header = header, sep = sep)
if(!header) {
names(db) <- match_names$name[match_names$type == type]
} else {
db <- check_names(db, type = type)
}
dplyr::mutate_all(db, as.numeric)
}
load_raster <- function(file) {
db <- raster::raster(file) %>%
raster::as.data.frame(xy = TRUE)
names(db) <- match_names$name[match_names$type == "elev"]
db
}
db_format <- function(db, nrow, ncol, grid, missing_value = -9999, verbose) {
if(verbose) message(" Formating grid")
# Check if valid rows/cols
if(nrow * ncol != length(db$elev)){
stop("Number of rows and columns does not match the total number of cells in the data base, Try again!")
}
# Arrange as grid
db <- db %>%
dplyr::mutate(seqno = 1:length(elev),
row = sort(rep(1:nrow, length(elev)/nrow)),
col = rep(1:ncol, length(elev)/ncol),
missing = elev == missing_value,
elev = replace(elev, missing, NA_real_)) %>%
dplyr::mutate(dplyr::across(-"missing", as.numeric))
if(any(!c("x", "y") %in% names(db))) {
if(is.null(grid)) stop("No grid dimensions in data, require 'grid' argument",
call. = FALSE)
if(verbose) message(" No x/y in file, creating x/y from cols/rows/grid")
db <- dplyr::mutate(db,
x = col * grid,
y = rev(row) * grid)
}
db
}
db_prep <- function(db, clim, rlim, edge, verbose) {
# Subset
if(!is.null(clim) || !is.null(rlim)) {
if(verbose) message(" Subsetting data")
if((!is.null(clim) & (!is.numeric(clim) | length(clim) != 2)) |
(!is.null(rlim) & (!is.numeric(rlim) | length(rlim) != 2))) {
stop("clim and rlim must be each be a vector of two numbers (start/end row/col) or NULL")
}
if(any(rlim > max(db$row)) | any(clim > max(db$col))) stop("Subset cannot be bigger than data")
if(length(rlim[1]:rlim[2]) < 2 | length(clim[1]:clim[2]) < 2) stop("Subset is too small (less than 2x2)")
db <- db %>%
dplyr::filter(row >= rlim[1] & row <= rlim[2] & col >= clim[1] & col <= clim[2]) %>%
dplyr::mutate(seqno = 1:length(seqno),
row = row - min(row) + 1,
col = col - min(col) + 1)
}
# Add edges
if(edge) {
if(verbose) message(" Adding buffer")
# Surround in impossible elevation
db <- add_buffer(db) %>%
dplyr::arrange(seqno) %>%
dplyr::mutate(elev_orig = elev) # make a backup of the original elevation data
# Note which cells are edge cells
db1 <- nb_values(db, max_cols = max(db$col), col = c("buffer", "missing"),
format = "wide")
b_cols <- names(db1)[stringr::str_detect(names(db1), "buffer_n")]
m_cols <- names(db1)[stringr::str_detect(names(db1), "missing_n")]
db1$buffer_edge <- rowSums(db1[, b_cols], na.rm = TRUE) > 0
db1$missing_edge <- rowSums(db1[, m_cols], na.rm = TRUE) > 0
db1$edge_map <- rowSums(db1[, c("buffer_edge", "missing_edge")],
na.rm = TRUE) > 0
db <- dplyr::left_join(db, db1[, c("seqno", "edge_map")], by = "seqno")
}
db
}
add_buffer <- function(db, stats = NULL) {
if(!is.null(stats)) {
if(!"buffer" %in% names(db)) stop("If adding buffer to 'stats', 'db' ",
"must already have buffer", call. = FALSE)
index <- dplyr::filter(db, !.data$buffer) %>%
dplyr::select("seqno") %>%
dplyr::mutate(seqno_orig = 1:dplyr::n())
r <- stats %>%
dplyr::mutate(dplyr::across(dplyr::matches("row|col"), ~ . + 1)) %>%
dplyr::mutate(dplyr::across(dplyr::contains("seqno"), ~index$seqno[.])) %>%
dplyr::mutate(dplyr::across(dplyr::contains("drec"), ~index$seqno[.]))
} else {
ncols <- max(db$col)
nrows <- max(db$row)
db <- dplyr::arrange(db, seqno)
if("drec" %in% names(db)) {
drec <- dplyr::select(db, "drec") %>%
dplyr::distinct() %>%
dplyr::filter(!is.na(drec))
}
buffers <- tibble::tibble(row = c(rep(1, ncols+2), #top
1:(nrows+2), #left
1:(nrows+2), #right
rep(nrows+2, ncols+2)), #bottom
col = c(1:(ncols+2), #top
rep(1, nrows+2), #left
rep(ncols+2, nrows+2), #right
1:(ncols+2)), #bottom
buffer = TRUE) %>%
dplyr::distinct()
db <- db %>%
dplyr::mutate(row = row + 1, col = col + 1, buffer = FALSE) %>%
dplyr::bind_rows(buffers) %>%
dplyr::arrange(row, col) %>%
dplyr::mutate(seqno_buffer = 1:length(row))
if("drec" %in% names(db)) {
drec <- dplyr::left_join(drec, dplyr::select(db, "seqno", "seqno_buffer"),
by = c("drec" = "seqno")) %>%
dplyr::rename("drec_buffer" = "seqno_buffer")
db <- dplyr::left_join(db, drec, by = "drec") %>%
dplyr::select(-"drec", "drec" = "drec_buffer")
}
r <- db %>%
dplyr::select(-"seqno", "seqno" = "seqno_buffer") %>%
dplyr::select("seqno", dplyr::everything()) %>%
dplyr::arrange(.data$seqno)
}
r
}
load_extra <- function(file, type) {
if(!type %in% c("arule", "crule", "zone")) {
stop("'type' must be either 'arule', 'crule', or 'zone'", call. = FALSE)
}
ext <- tolower(tools::file_ext(file))
if(ext == "dbf") {
extra <- load_dem(file, type = type)
} else if(ext %in% c("xlsx", "xls")) {
extra <- load_excel(file)
} else if(ext %in% c("txt", "csv", "dat")) {
extra <- load_txt(file)
} else {
t <- dplyr::if_else(type == "zone", "zone", "rule")
stop(paste0("Expecting ", t, " file as data base (dbf), Excel (xlsx, xls)\n",
" or text (txt, csv, dat)"), call. = FALSE)
}
extra
}
format_rule <- function(rule, type, quiet) {
if(!type %in% c("arule", "crule")) {
stop("'type' must be either 'arule' or 'crule'", call. = FALSE)
}
if(!quiet) message("Formatting ", type, " file")
rule <- dplyr::mutate_if(rule, ~!is.numeric(.), tolower)
msg <- vector()
if(type == "arule") {
if(any(rule$attr_in == "slope")) {
msg <- c(msg, " - Renaming 'slope' to 'slope_pct'")
}
rule <- dplyr::mutate(
rule,
attr_in = stringr::str_replace_all(
attr_in,
c("slope" = "slope_pct")))
}
if(!quiet) message(paste0(msg, collapse = "\n"))
if(!"zone" %in% names(rule)) rule <- dplyr::mutate(rule, zone = 0)
rule
}
seqno_to_buffer <- function(seqno, seqno_buffer) {
seqno_buffer[seqno]
}
seqno_as_buffer <- function(seqno, db) {
dplyr::mutate(db, seqno2 = seqno) %>%
add_buffer() %>%
dplyr::filter(.data$seqno2 == !!seqno) %>%
dplyr::pull(.data$seqno)
}
seqno_from_buffer <- function(seqno_buffer, db) {
dplyr::mutate(db, seqno2 = seqno) %>%
add_buffer() %>%
dplyr::filter(.data$seqno == !!seqno_buffer) %>%
dplyr::pull(.data$seqno2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.