#' @title Split Flowlines
#' @description A wrapper for split_lines that works on nhdplus attributes
#' @param flines data.frame with COMID, toCOMID, LENGTHKM
#' and LINESTRING sf column in "meters" projection
#' @param max_length maximum segment length to return
#' @param events data.frame containing events as generated by nhdplusTools::get_flowline_index()
#' if an `identifier` attribute is included, it will be passed through in the output table.
#' @param para numeric how many threads to use in parallel computation
#' @param avoid vector of ids to avoid
#' @return All the flowlines with some split apart. COMIDs are returned as strings
#' with a semantic part number appended. That is .1, .2, ... .10, .11, etc. are
#' appended and must be treated as one would treat a semantic version. .1 is the
#' most upstream and the sequence increases in the downstream direction.
#' @importFrom dplyr group_by ungroup filter select mutate lead n right_join
#' @importFrom nhdplusTools rename_geometry
#' @seealso The \code{\link{refactor_nhdplus}} function implements a complete
#' workflow using `split_flowlines()`.
#' @export
#' @examples
#' source(system.file("extdata", "new_hope_data.R", package = "hyRefactor"))
#'
#' new_hope_flowline <-
#' dplyr::right_join(dplyr::select(new_hope_flowline, COMID, REACHCODE, FromMeas, ToMeas),
#' suppressWarnings(nhdplusTools::prepare_nhdplus(
#' new_hope_flowline, 0, 0, 0, FALSE, warn = FALSE)),
#' by = "COMID")
#'
#' split <- split_flowlines(suppressWarnings(sf::st_cast(sf::st_transform(
#' new_hope_flowline, 5070), "LINESTRING")),
#' max_length = 2000, events = new_hope_events)
split_flowlines <- function(flines, max_length = NULL,
events = NULL, para = 0, avoid = NA) {
check_names(flines, "split_flowlines")
flines <- rename_geometry(flines, "geom")
split <- split_lines(flines, max_length, events,
para = para,
avoid = avoid)
if (nrow(split) > 0) {
split <- left_join(split,
select(sf::st_set_geometry(flines, NULL), COMID, toCOMID, LevelPathI, Hydroseq, TotDASqKM),
by = "COMID")
split$part <- unlist(lapply(strsplit(split$split_fID, "\\."),
function(x) x[2]))
split <- group_by(split, COMID)
split <- mutate(split, part = (ifelse(is.na(part), 0, as.integer(part)) + 1))
# Assume flowdir is with digitized for now -- need to check in prep code.
split <- ungroup(mutate(split,
toCOMID = ifelse(part == max(part),
as.character(toCOMID),
paste(lead(COMID),
lead(part), sep = "."))))
split <- mutate(split, COMID = paste(COMID, part, sep = "."),
LENGTHKM = sf::st_length(sf::st_geometry(split)) / 1000)
split <- sf::st_as_sf(select(split, -part, -split_fID))
attr(split$LENGTHKM, "units") <- NULL
split[["LENGTHKM"]] <- as.numeric(split[["LENGTHKM"]])
remove_comid <- unique(as.integer(split[["COMID"]]))
not_split <- dplyr::filter(select(flines, .data$COMID, .data$toCOMID,
.data$LENGTHKM, .data$LevelPathI,
.data$Hydroseq, .data$TotDASqKM), !(.data$COMID %in% remove_comid))
not_split <- dplyr::mutate(not_split, event_REACHCODE = "", event_REACH_meas = NA, event_identifier = NA)
flines <- rbind(not_split, split)
# Rows with COMID like this need to be updated
redirect_tocomid <- flines$COMID[which(grepl("\\.1$", flines$COMID))]
old_tocomid <- gsub("\\.1$", "", redirect_tocomid)
mutate(flines,
toCOMID = ifelse(toCOMID %in% old_tocomid,
paste0(toCOMID, ".1"),
toCOMID))
} else {
flines %>%
mutate(COMID = as.character(COMID),
toCOMID = as.character(toCOMID),
event_REACHCODE = NA,
event_REACH_meas = NA,
event_identifier = NA)
}
}
check_split_data <- function(input_lines) {
if(!"COMID" %in% names(input_lines)) stop("input lines must containt 'COMID' column.")
if(attr(input_lines, "sf_column") != "geom") stop("sf_column must be 'geom'")
if(!"sfc_LINESTRING" %in% class(st_geometry(input_lines))) stop("lines must be class LINESTRING")
sf::st_crs(input_lines)
}
#' @title split lines
#' @description Splits lines longer than a given threshold into the
#' minimum number of pieces to all be under the given threshold.
#' @param input_lines data.frame of class sf with LINESTRING sfc column.
#' @param max_length maximum segment length to return
#' @param events data.frame containing events as generated by nhdplusTools::get_flowline_index()
#' @param id name of ID column in data.frame
#' @param para how many cores to use
#' @param avoid vector of ids to avoid
#' @return only the split lines.
#' @importFrom dplyr group_by ungroup filter select mutate
#' @noRd
#'
split_lines <- function(input_lines,
max_length = NULL,
events = NULL, para = 0,
avoid = NA, status = FALSE) {
check_split_data(input_lines)
# First split by event and number parts in a seperate column
event_split_points <- split_by_event(input_lines, events)
input_lines <- dplyr::select(input_lines, COMID)
too_long_split_points <- split_by_length(input_lines, max_length, event_split_points, avoid)
if(nrow(event_split_points) > 0) {
split_points <- too_long_split_points %>%
dplyr::left_join(select(event_split_points, e_start = .data$start,
e_end = .data$end, .data$event_split_fID),
by = "event_split_fID") %>%
dplyr::mutate(start = ifelse(!is.na(.data$e_start),
.data$start * (.data$e_end - .data$e_start) + .data$e_start,
.data$start),
end = ifelse(!is.na(.data$e_start),
.data$end * (.data$e_end - .data$e_start) + .data$e_start,
.data$end)) %>%
dplyr::mutate(event_REACHCODE = ifelse((!.data$event_REACHCODE == "" & !is.na(.data$event_REACHCODE)) &
round(.data$end, digits = 5) == round(.data$e_end, digits = 5),
.data$event_REACHCODE,
NA)) %>%
dplyr::mutate(event_REACH_meas = ifelse(is.na(.data$event_REACHCODE), NA, .data$event_REACH_meas)) %>%
dplyr::mutate(event_identifier = ifelse(is.na(.data$event_identifier), NA, .data$event_identifier))
} else {
split_points <- too_long_split_points
if(!"event_identifier" %in% names(split_points)) {
split_points$event_identifier <- seq_len(nrow(split_points))
}
}
input_lines <- split_points %>%
dplyr::group_by(.data$COMID) %>%
dplyr::arrange(.data$end) %>%
dplyr::mutate(split_fID = ifelse(dplyr::row_number() == 1,
as.character(.data$COMID),
paste0(.data$COMID, ".", dplyr::row_number() - 1))) %>%
dplyr::ungroup() %>%
dplyr::arrange(.data$fID, .data$end) %>%
select(.data$COMID, .data$split_fID, .data$start, .data$end,
.data$event_REACHCODE, .data$event_REACH_meas, .data$event_identifier) %>%
split_lines_fun(input_lines, para)
if(is.logical(input_lines$split_fID)) input_lines$split_fID <- as.character(input_lines$split_fID)
return(input_lines)
}
new_line <- function(i, f, t, l) {
lwgeom::st_linesubstring(x = sf::st_geometry(l)[i],
from = f,
to = t)[[1]]
}
# rescale REACH_meas to comid_meas
rf <- function(m, f, t) {
if(any(!sapply(m, function(m, f, t) (m >= f & m <= t), f = f, t = t))) stop("m must be between f and t")
100 * (m - f) / (t - f)
}
add_length <- function(input_lines) {
input_lines$geom_len <- sf::st_length(sf::st_geometry(input_lines))
attr(input_lines$geom_len, "units") <- NULL
input_lines$geom_len <- as.numeric(input_lines$geom_len)
input_lines
}
split_lines_fun <- function(split_points, lines, para) {
cl <- NULL
if(para > 1) {
cl <- parallel::makeCluster(para)
}
pbapply::pboptions("none")
crs <- sf::st_crs(lines)
lines <- pbapply::pblapply(seq_len(nrow(split_points)),
FUN = function(x, s, lines) {
new_line(i = which(lines$COMID == s$COMID[x]),
f = split_points$start[x],
t = split_points$end[x],
l = lines)
}, cl = cl, s = split_points, lines = lines)
if(para > 1) {
parallel::stopCluster(cl)
}
split_points <- dplyr::select(split_points, -.data$start, -.data$end)
sf::st_sf(split_points, geom = sf::st_sfc(lines, crs = crs))
}
split_by_event <- function(input_lines, events) {
if(is.null(events) || nrow(events) == 0) return(data.frame(COMID = integer(0), start = numeric(0), end = numeric(0),
event_REACHCODE = character(0), event_REACH_meas = numeric(0), event_split_fID = integer(0)))
check_names(input_lines, "split_lines_event")
# identify which flowlines need to be split so we can just iterate over those.
to_split_ids <- sapply(seq_len(nrow(events)),
function(x, events, input_lines) {
e <- events[x, ]
# from is downstream -- 0 is the outlet
# to is upstream -- 100 is the inlet
dplyr::filter(input_lines, .data$REACHCODE == e$REACHCODE &
.data$ToMeas > e$REACH_meas &
.data$FromMeas < e$REACH_meas)$COMID
}, events = events, input_lines = input_lines)
# filter down so we could parallelize this without blowing out memory.
to_split <- filter(input_lines, COMID %in% to_split_ids)
if(!"identifier" %in% names(events)) {
events$identifier <- as.character(seq(1, nrow(events)))
} else {
events$identifier <- as.character(events$identifier)
}
# For each catchment to split, find all events that need to be enforced
# split into parts and add a "parts" numbered from upstream (0 measure)
# to downstream (100 measure).
split_l <- lapply(seq_len(nrow(to_split)),
function(x, to_split, events) {
l <- to_split[x, ]
# only work on events in the interval of the current flowline
e <- dplyr::filter(events,
.data$REACHCODE %in% l$REACHCODE &
.data$REACH_meas < l$ToMeas &
.data$REACH_meas > l$FromMeas)
# arrange in upstream downstream order (greater measures are upstream)
# from is downstream -- 0 is the outlet
# to is upstream -- 100 is the inlet
# This is important so things are in the right order for splitting
# and available in that order to be joined in back in.
e <- dplyr::arrange(e, desc(REACH_meas))
# This function is defined elsewhere and tested.
comid_meas <- rf(e$REACH_meas, l$FromMeas, l$ToMeas)
split_points <- data.frame(COMID = l$COMID,
start = c(0, 100 - comid_meas) / 100,
end = c(100 - comid_meas, 100) / 100,
event_REACHCODE = c(e$REACHCODE, ""),
event_REACH_meas = c(e$REACH_meas, NA),
event_identifier = c(e$identifier, ""))
}, to_split = to_split, events = events)
do.call(rbind, split_l) %>%
dplyr::mutate(event_split_fID = seq_len(nrow(.)))
}
split_by_length <- function(lines, max_length, event_split_points, avoid) {
if(is.null(max_length)) return(data.frame())
lines <- sf::st_drop_geometry(add_length(lines))
if(nrow(event_split_points) > 0) {
event_split_points <- dplyr::left_join(event_split_points,
dplyr::select(lines, .data$COMID, .data$geom_len),
by = "COMID") %>%
mutate(geom_len = .data$geom_len * (.data$end - .data$start))
lines <- lines %>%
dplyr::filter(!COMID %in% event_split_points$COMID) %>%
dplyr::bind_rows(event_split_points)
} else {
lines <- lines %>%
dplyr::mutate(event_REACHCODE = NA, event_REACH_meas = NA, event_split_fID = NA)
}
if (max_length < 50) warning(paste("short max length detected,",
"do you have your units right?"))
# pieces is the number of pieces we need to break each too long feature into.
# fID is an identifier for a given geometry which may have already been split!
lines <- lines %>%
filter((.data$geom_len >= max_length & !.data$COMID %in% avoid) | !is.na(.data$event_split_fID)) %>%
mutate(pieces = ceiling(.data$geom_len / max_length),
fID = seq_len(nrow(.))) %>%
select(-.data$geom_len)
if(nrow(lines) == 0) {
return(dplyr::select(event_split_points, .data$COMID, .data$start, .data$end,
.data$event_REACHCODE, .data$event_REACH_meas, .data$event_split_fID) %>%
dplyr::mutate(fID = .data$event_split_fID))
}
# rep here is done by row of lines so we get a row with the correct fID
# for each part we expect in the output.
lines[rep(seq_len(nrow(lines)),lines$pieces), ] %>%
dplyr::select(-.data$pieces) %>%
# Assign featureIDs to the output. start and end are decimal % along catchment
dplyr::group_by(.data$fID) %>%
dplyr::mutate(piece = 1:n()) %>%
dplyr::mutate(start = (.data$piece - 1) / dplyr::n(),
end = .data$piece / dplyr::n()) %>%
dplyr::ungroup() %>%
dplyr::select(-.data$piece)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.