Nothing
## TODO: tests https://github.com/ncss-tech/aqp/issues/180
#' Segmenting of Soil Horizon Data by Depth Interval
#'
#' This function segments or subdivides horizon data from a `SoilProfileCollection` or `data.frame` by depth interval (e.g. `c(0, 10)`, `c(0, 50)`, or `25:100`). This results in horizon records being split at the specified depth intervals, which duplicates the original horizon data but also adds new horizon depths. In addition, labels (i.e. `"segment_id"`) are added to each horizon record that correspond with their depth interval (e.g. `025-100`). This function is intended to harmonize horizons to a common support (i.e. depth interval) for further aggregation or summary. See the examples.
#' @param object either a `SoilProfileCollection` or `data.frame`
#' @param intervals a vector of integers over which to slice the horizon data (e.g. `c(25, 100)` or `25:100`)
#' @param trim logical, when `TRUE` horizons in `object` are truncated to the min/max specified in `intervals`. When `FALSE`, those horizons overlapping an interval are marked as such. Care should be taken when specifying more than one depth interval and `trim = FALSE`.
#' @param hzdepcols a character vector of length 2 specifying the names of the horizon depths (e.g. `c("hzdept", "hzdepb")`), only necessary if `object` is a `data.frame`.
#'
#' @details `segment()` performs no aggregation or resampling of the source data, rather, labels are added to horizon records for subsequent aggregation or summary. This makes it possible to process a very large number of records outside of the constraints associated with e.g. `slice()` or `slab()`.
#'
#' @return Either a `SoilProfileCollection` or `data.frame` with the original horizon data segmented by depth intervals. There are usually more records in the resulting object, one for each time a segment interval partially overlaps with a horizon. A new column called `segment_id` identifying the depth interval is added.
#'
#' @author Stephen Roecker
#'
#' @seealso [dice()], [glom()]
#'
#' @export
#'
#' @examples
#'
#' # example data
#' data(sp1)
#'
#' # upgrade to SPC
#' depths(sp1) <- id ~ top + bottom
#'
#' # segment and trim
#' z <- segment(sp1, intervals = c(0, 10, 20, 30), trim = TRUE)
#'
#' # display segment labels
#' # note that there are new horizon boundaries at segments
#' par(mar = c(0, 0, 3, 1))
#' plotSPC(z, color = 'segment_id', width = 0.3)
#'
#' # highlight new horizon records
#' par(mar = c(0, 0, 2, 1))
#' plotSPC(z, color = NA, default.color = NA, width = 0.3, lwd = 1)
#' plotSPC(sp1, color = NA, default.color = NA,
#' width = 0.3, lwd = 3, add = TRUE, name = NA, print.id = FALSE)
#' legend('top', horiz = TRUE,
#' legend = c('original', 'segmented'),
#' lwd = c(1, 3), cex = 0.85, bty = 'n')
#'
#' \donttest{
#' # same results as slab()
#' # 10 random profiles
#' s <- lapply(1:10, random_profile, n_prop = 1, SPC = TRUE, method = 'random_walk')
#' s <- combine(s)
#'
#' a.slab <- slab(s, fm = ~ p1, slab.structure = c(0, 10, 20, 30), slab.fun = mean, na.rm = TRUE)
#'
#' z <- segment(s, intervals = c(0, 10, 20, 30), trim = TRUE)
#' z <- horizons(z)
#' z$thick <- z$bottom - z$top
#'
#' a.segment <- sapply(split(z, z$segment_id), function(i) {
#' weighted.mean(i$p1, i$thick)
#' })
#'
#'
#' res <- data.frame(
#' slab = a.slab$value,
#' segment = a.segment,
#' diff = a.slab$value - a.segment
#' )
#'
#' print(res)
#' res$diff < 0.001
#'}
#'
#'
#' data(sp5)
#'
#' # segment by upper 25-cm
#' test1 <- segment(sp5, intervals = c(0, 100))
#' print(test1)
#' nrow(test1)
#' print(object.size(test1), units = "Mb")
#'
#' # segment by 1-cm increments
#' test2 <- segment(sp5, intervals = 0:100)
#' print(test2)
#' nrow(test2)
#' print(object.size(test2), units = "Mb")
#'
#'
#' # segment and aggregate
#' test3 <- segment(horizons(sp5),
#' intervals = c(0, 5, 15, 30, 60, 100, 200),
#' hzdepcols = c("top", "bottom")
#' )
#' test3$hzthk <- test3$bottom - test3$top
#' test3_agg <- by(test3, test3$segment_id, function(x) {
#' data.frame(
#' hzID = x$hzID[1],
#' segment_id = x$segment_id[1],
#' average = weighted.mean(x$clay, w = x$hzthk)
#' )
#' })
#' test3_agg <- do.call("rbind", test3_agg)
#'
#' head(test3_agg)
#'
segment <- function(object, intervals, trim = TRUE, hzdepcols = NULL) {
# depth interval rules
dep <- data.frame(
top = intervals[-length(intervals)],
bot = intervals[-1],
stringsAsFactors = FALSE
)
n <- max(nchar(intervals))
dep$id <- paste0(
formatC(dep$top, width = n, flag = 0),
"-",
formatC(dep$bot, width = n, flag = 0)
)
# argument sanity check
test_spc <- inherits(object, 'SoilProfileCollection')
test_df <- inherits(object, 'data.frame')
test_hd <- !is.null(hzdepcols) & length(hzdepcols) == 2
test_dep <- is.numeric(dep$top) & is.numeric(dep$bot) & all(dep$top < dep$bot)
if (!any(test_spc, test_df)) {
stop("the input must be either a SoilProfileCollection or data.frame")
}
if (!test_spc & (!test_df | !test_hd)) {
stop("if the input is a data.frame then hzdepcols must not be NULL and length(hzdepcols) == 2")
}
if (!test_dep) {
stop("intervals should be numeric and sequential (e.g. c(0, 1, 2, 3) or 0:100)")
}
# standardize inputs
if (test_spc) {
peid <- idname(object)
hzid <- hzidname(object)
hzdepcols <- horizonDepths(object)
h <- horizons(object)
names(h)[names(h) %in% c(peid, hzid)] <- c("peid", "hzid")
} else {
h <- object
}
names(h)[names(h) %in% hzdepcols] <- c("hzdept", "hzdepb")
## TODO: consider using dice()
# filter horizons and trim
.slice <- function(h, top = NULL, bot = NULL) {
idx <- h$hzdept <= bot & h$hzdepb >= top
h <- h[idx, ]
# causing errors when FALSE
if (trim == TRUE) {
h <- within(h, {
hzdept = ifelse(hzdept < top, top, hzdept)
hzdepb = ifelse(hzdepb > bot, bot, hzdepb)
})
h <- h[(h$hzdepb - h$hzdept) > 0, ]
}
return(h)
}
# slice spc by intervals
# dep$df <- lapply(1:nrow(dep), function(x) h[0, ]) # pre-allocate memory
dep$df <- list(h[0, ])[rep(1, nrow(dep))] # pre-allocate memory faster
h <- {
split(dep, dep$id) ->.;
lapply(., function(x) {
x$df[[1]] <- cbind(.slice(h, top = x$top, bot = x$bot), segment_id = x$id)
return(x$df[[1]])
}) ->.;
do.call("rbind", .) ->.;
}
names(h)[names(h) %in% c("hzdept", "hzdepb")] <- hzdepcols
if (test_spc) {
h <- h[order(h$peid, h[[hzdepcols[1]]]), ]
# merge to re-add spc with NA
h_orig <- data.frame(peid = names(table(horizons(object)[peid])), stringsAsFactors = FALSE)
h <- merge(h_orig, h, by = "peid", all.x = TRUE, sort = FALSE)
rm(h_orig)
## TODO: consider adding a flag to indicate "new" horizon records that have been added
# rebuild SPC
names(h)[names(h) == "peid"] <- peid
names(h)[names(h) == "hzid"] <- hzid
h$hzID <- 1:nrow(h)
replaceHorizons(object) <- h
suppressMessages(hzidname(object) <- "hzID")
}
# return
if(test_spc){
return(suppressMessages(rebuildSPC(object))) # TODO: this is protection from missing-data/ID offset
} else {
return(h)
}
}
#' @title Dissolving horizon boundaries by grouping variables
#'
#' @description This function dissolves or combines horizons that have a common set of grouping variables. It only combines those horizon records that are sequential (e.g. share a horizon boundary). Thus, it can be used to identify discontinuities in the grouping variables along a profile and their unique depths. It is particularly useful for determining the depth to the top or bottom of horizons with a specific category, and should be simpler than previous methods that require aggregating over profiles.
#'
#' @param object a \code{data.frame}
#' @param by character: column names, to be used as grouping variables, within the object.
#' @param id character: column name of the pedon ID within the object.
#' @param hztop character: column name of the horizon top depth within the object.
#' @param hzbot character: column name of the horizon bottom depth in the object.
#' @param collapse logical: indicating whether to not combine grouping variables before dissolving.
#' @param order logical: indicating whether or not to order the object by the id, hztop, and hzbot columns.
#' #'
#' @details This function assumes the profiles and horizons within the object follow the logic defined by \code{checkHzDepthLogic} (e.g. records are ordered sequentially by id, hztop, and hzbot and without gaps). If the records are not ordered, set the \code{order = TRUE}.
#'
#' @return A \code{data.frame} with the original id, by grouping variables, and non-consecutive horizon depths.
#'
#' @author Stephen Roecker
#'
#' @seealso \code{\link{checkHzDepthLogic}}
#'
#' @export
#'
#' @examples
#'
#' # example 1
#' data(jacobs2000)
#' spc <- jacobs2000
#'
#' spc$dep_5 <- spc$depletion_pct >=5
#' spc$genhz <- generalize.hz(spc$name, c("A", "E", "B", "C"), c("A", "E", "B", "C"))
#' h <- horizons(spc)
#'
#' test <- dissolve_hz(h, by = c("genhz", "dep_5"), id = "id", hztop = "top", hzbot = "bottom")
#'
#' vars <- c("id", "top", "bottom", "genhz", "dep_5")
#' h[h$id == "92-1", vars]
#' test[test$id == "92-1", ]
#'
#'
#' # example 2
#' df <- data.frame(
#' peiid = 1,
#' hzdept = c(0, 5, 10, 15, 25, 50),
#' hzdepb = c(5, 10, 15, 25, 50, 100),
#' hzname = c("A1", "A2", "E/A", "2Bt1", "2Bt2", "2C"),
#' genhz = c("A", "A", "E", "2Bt", "2Bt", "2C"),
#' texcl = c("sil", "sil", "sil", "sl", "sl", "s")
#' )
#'
#' df
#'
#' dissolve_hz(df, c("genhz", "texcl"))
#' dissolve_hz(df, c("genhz", "texcl"), collapse = TRUE)
#'
#' test <- dissolve_hz(df, "genhz")
#' subset(test, value == "2Bt")
#'
dissolve_hz <- function(object, by, id = "peiid", hztop = "hzdept", hzbot = "hzdepb", collapse = FALSE, order = FALSE) {
# id = "peiid"; hztop = "hzdept"; hzbot = "hzdepb", collapse = FALSE, order = FALSE
# test inputs ----
# argument sanity check
# test_spc <- inherits(object, 'SoilProfileCollection')
# check that object & by are the right class
test_object <- inherits(object, "data.frame")
test_by <- inherits(by, "character")
if (!any(test_object | test_by)) {
stop("the object argument must be a data.frame, and by a character", call. = FALSE)
}
# check that by is not NULL
if (is.null(by)) stop("the by argument must not be NULL")
# check that collapse is a logical of length 1
if (!inherits(collapse, "logical") || length(collapse) != 1) {
stop("the collapse argument must be logical and a length of one", call. = FALSE)
}
# check that the column names exisit within the object
var_names <- c(id = id, top = hztop, bot = hzbot, by)
if (!all(var_names %in% names(object))) {
stop("all arguments must match object names")
}
# check that "by" are characters or convert
if (any(!"character" %in% sapply(object[by], class))) {
message("non-character grouping variables are being converted to characters")
object[by] <- lapply(object[by], as.character)
}
# standardize inputs ----
df <- object
idx_names <- sapply(var_names[1:3], function(x) which(names(df) == x))
names(df)[idx_names] <- names(var_names)[1:3]
# valid
# vd_idx <- validate_depths(df, id = "id", hztop = "hzdept", bot = "hzdepb")
if (order == TRUE) {
df <- df[order(df$id, df$top, df$bot), ]
}
if (collapse == TRUE) {
by_co <- paste(by, collapse = " & ")
df[by_co] <- apply(df[by], 1, paste, collapse = " & ")
by <- by_co
}
# var thickness ----
var_dep <- lapply(by, function(x) {
con_bot <- rle( paste(df$id, df[, x]))$length
con_top <- rle(rev(paste(df$id, df[, x])))$length
bot_idx <- cumsum(con_bot)
top_idx <- cumsum(con_top)
vd <- data.frame(
id = df[bot_idx, "id"],
top = rev(rev(df$top)[top_idx]),
bot = df[bot_idx, "bot"],
variable = x,
value = df[bot_idx, x]
)
return(vd)
})
var_dep <- do.call("rbind", var_dep)
# undo standardization ----
names(var_dep)[1:3] <- var_names[1:3]
return(var_dep)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.