Nothing
#' @title Hydrologic Ordering of a Geomorphic Proportion Matrix
#'
#' @param x x `data.frame`, geomorphic proportion matrix, as created by `soilDB::fetchOSD(..., extended=TRUE)`
#' @param g character, name of geomorphic summary table, one of: `c('geomcomp', 'hillpos', 'flats', 'terrace', 'mtnpos', 'shape')`
#' @param clust logical, perform clustering of geomorphic proportion matrix
#' @param j.amount amount of noise applied to rows having a duplicate proportion vector, passed to `jitter()`
#'
#' @author D.E. Beaudette
#'
#' @return when `clust = FALSE` a vector of series names, in hydrologic ordering, otherwise a `list` with the following elements:
#' * `clust`: rotated `hclust` object
#' * `hyd.order`: vector of series names, in hydrologic ordering
#' * `clust.hyd.order`: vector of series names, after clustering + rotation, approximate hydrologic ordering
#' * `match.rate`: fraction of series matching target hydrologic ordering, after clustering + rotation
#' * `obj`: objective function value (sum of squared rank differences), used by [`iterateHydOrder()`]
#'
#' @export
#'
#' @examples
#'
#' # example data, similar to results from soilDB::fetchOSD(..., extended = TRUE)
#' data("OSDexamples")
#'
#' # no clustering of the geomorphic proportion matrix
#' h <- hydOrder(OSDexamples$hillpos, g = 'hillpos', clust = FALSE)
#'
#' # compare with original order
#'
#' data.frame(
#' original = OSDexamples$hillpos$series,
#' ordered = h
#' )
#'
#' # cluster results
#' h <- hydOrder(OSDexamples$hillpos, g = 'hillpos', clust = TRUE)
#' str(h)
hydOrder <- function(x, g, clust = TRUE, j.amount = 0) {
# sanity checks
stopifnot(g %in% c('geomcomp', 'hillpos', 'flats', 'terrace', 'mtnpos', 'shape'))
stopifnot(inherits(x, 'data.frame'))
# scoring used to create hydrologic ordering
g.stats <- switch(
g,
'geomcomp' = c(4, 2, 1, 1, -2, -4),
'hillpos' = c(-4, -2, 0.1, 2, 4),
'flats' = c(-2, 0, 0, 2),
'terrace' = c(-1, 1),
'mtnpos' = c(4, 2, 1, 1, -2, -4),
'shape' = c(-4, 1, 4, 5, 6)
)
# series name is always in the first column
# there may be extra columns `n` and `shannon_entropy`, these will be ignored
# local vars
n.prop <- length(g.stats)
n.series <- nrow(x)
# just geomorphic proportions
x.prop <- x[, 2:(n.prop + 1)]
row.names(x.prop) <- x$series
# compute hydrologic ordering scores: geomorphic proportions * score vector
# ranking this vector will always produce a reasonable ordering
# exact rotation of a dendrogram is not always possible
.rs <- rowSums(
sweep(x.prop, 2, STATS = g.stats, FUN = '*')
)
# create ordering index for labels by ordering hydrologic ordering scores
hyd.order <- order(.rs, decreasing = TRUE)
# optionally perform clustering and rotation of dendrogram
if(clust) {
# "ties" in the proportion matrix will result in problematic dendrograms which cannot be
# rotated as close to hyd.order is would otherwise be possible
# find approximate ties at proportion threshold of 0.001
.hash <- apply(round(x.prop, 3), 1, digest)
.tab <- table(.hash)
.nm <- names(which(.tab > 1))
# flag those series (rows)
# that have ties in the proportion matrix
problem.idx <- which(.hash %in% .nm)
# if there are too few unique values
# add some noise to flagged rows
if(length(problem.idx) > 0) {
# add noise to affected rows
for(i in problem.idx) {
x.prop[i, ] <- jitter(unlist(x.prop[i, ]), amount = j.amount)
}
}
# cluster proportions
# results will be close to "hydrologic" order
# force interpretation as interval-scale
x.d <- as.hclust(
diana(
daisy(x.prop, type = list(numeric = 1:n.prop))
)
)
# attempt rotate clustering according to hydrologic ordering
# perfect ordering not possible when there are many ties
x.d.hydro <- dendextend::rotate(x.d, order = x$series[hyd.order])
## TODO: consider using ape pkg since we are already importing from it
# x.d.hydro <- as.hclust(ape::rotateConstr(as.phylo(x.d), constraint = x$series[hyd.order]))
# number of exact matches
match.rate <- length(
which(
x$series[hyd.order] == x$series[x.d.hydro$order]
)
) / n.series
# objective function: sum of squared rank distances
# this is used by iterateHydOrder() when maxIter is exceeded
obj <- sum((hyd.order - x.d.hydro$order)^2)
# clustering + stats on rotation
.res <- list(
clust = x.d.hydro,
hyd.order = x$series[hyd.order],
clust.hyd.order = x$series[x.d.hydro$order],
match.rate = match.rate,
obj = obj
)
} else {
# no clustering, just vector of series names in hydrologic ordering
.res <- x$series[hyd.order]
}
return(.res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.