#' Spatial and temporal diaggregattion of paleo flow data
#'
#' The default parameter values are setup to perform the typical disaggregation
#' for CRSS, based on scaling the Upper Basin inflows, and performing a
#' direct resampling of Lower Basin tributaries, i.e., no scale factor is
#' applied to the LB tributaries. Sites 1-20 are scaled (`sf_sites = 1:20`);
#' therefore the remaining sites (21:29) are not scaled.
#'
#' @param x The annual paleo data to disaagregate.
#' @param ann_index_flow Observed annual flow data used for picking index year.
#' @param mon_flw Intervening monthly natural flow. Used for spatially and
#' temporaly disaggregating paleo data based on the index year.
#' @param nsite The number of sites to diaggregate the data too.
#' @param sf_sites The site numbers (indeces), that will scale the index year's
#' volume based on the annual flow being disaggregated. The remaining sites
#' will select the index year directly. See **Details**.
#' @param nsim Number of times to repeat the space/time disaggregation.
#' @param ofolder Optional. If specified, the disaggregated flow data and the
#' selected index years are saved to this folder as csv files. There will be
#' one csv file for each time the disaggregation is repeated (`nsim`). This
#' file will contain one column for each site (`nsite`), and one row for each
#' month of data (12 * number of years in `x`).
#' @param index_years Optional. If specified, these index years will be used
#' instead of selecting years based on weights and sampling.
#' @param k_weights If `NULL`, parameters are set based on definitions in Nowak
#' et al. (2010). Users may force `k` and the `weights` by specifiying this
#' argument. It should be a list with two named entries: `k` and `weights`.
#'
#' @author Ken Nowak
#'
#' @references Nowak, K., Prairie, J., Rajagopalan, B., Lall, U. (2010).
#' A nonparametric stochastic approach for multisite disaggregation of
#' annual to daily streamflow. *Water Resources Research.*
#'
#' @examples
#' \dontrun{
#' # read in annual synthetic mon_flw for disag
#' x <- matrix(scan("data-raw/Meko.txt"), ncol = 2, byrow = TRUE)
#' # intervening natural flow mon_flw - monthly CY text file
#' mon_flw <- as.matrix(read.table(
#' "tests/dp/NFfullbasinWY0608intervening.txt",
#' sep = "\t"
#' ))
#'
#' # observed annual flow for picking analog disag yr
#' ann_index_flow <- as.matrix(read.table("tests/dp/LFWYTotal.txt"))
#' zz <- paleo_disagg(x, ann_index_flow, mon_flw, 29, 1)
#' }
#'
#' @export
paleo_disagg <- function(x,
ann_index_flow,
mon_flw,
nsite = 29,
sf_sites = 1:20,
nsim = 1,
ofolder = NULL,
index_years = NULL,
k_weights = NULL)
{
n_paleo_yrs <- nrow(x) # 1244 for meko; length of each simulation (yrs)
# how many yrs of observed mon_flw
assert_that(
nrow(mon_flw) %% 12 == 0,
msg = "`mon_flw` needs to have an even year's worth of data"
)
n_obs_yrs <- nrow(mon_flw)/12
assert_that(
n_obs_yrs == nrow(ann_index_flow),
msg = "`ann_index_flow` and `mon_flw` must have the same number of years."
)
assert_that(ncol(ann_index_flow) == 2)
assert_that(ncol(x) == 2)
assert_that(
nsite == ncol(mon_flw),
msg = "`mon_flow` needs to have `nsite` columns."
)
if (!is.null(index_years)) {
if (ncol(index_years) != nsim)
stop(
"`index_years` must be specified for all simulations.\n",
" So, `nsim` must equal the number of columns in `index_years`.",
call. = FALSE
)
if (nrow(index_years) != n_paleo_yrs)
stop(
"`index_years` must be specified for every year in the paleo record.",
call. = FALSE
)
}
if (!is.null(index_years) && !is.null(k_weights)) {
stop(
"If specifying `index_years`, there is no need to specify `k_weights`",
call. = FALSE
)
}
if (max(sf_sites) > nsite) {
stop(
"max(`sf_sites`), must be <= the number of sites (`site`).",
call. = FALSE
)
}
if (!all(1:nsite %in% sf_sites)) {
# set ind_sites to the remaining sites
# ind_sites are selected directly
ind_sites <- 1:nsite
ind_sites <- ind_sites[!(1:nsite %in% sf_sites)]
message(
"Sites ", toString(ind_sites), "\n",
"will be selected directly from the index years, i.e., not scaled."
)
} else {
ind_sites <- NULL
}
# matrix for observed values - row(yr), col (month), index (site)
dat_a <- array(data = NA, dim=c(n_obs_yrs, 12, nsite))
# matrix for disag values - row(yr), col (month), index1 (site), index2(sim #)
disag <- array(data = NA, dim = c(n_paleo_yrs, 12, nsite, nsim))
# matrix for recording yr index for disag (optional)
index_mat <- matrix(ncol = nsim, nrow = n_paleo_yrs)
# matrix for recording scale factors used (optional)
sf_mat <- matrix(ncol = nsim, nrow = n_paleo_yrs)
# this loop moves observed monthly mon_flw from 2d matrix to 3d array
mgn <- length(dat_a[1,1,])
for (j in seq_len(mgn)) {
s <- 1
e <- 12
for (i in seq_len(n_obs_yrs)) {
dat_a[i, , j] <- mon_flw[s:e, j]
s <- s + 12
e <- e + 12
}
}
# loop through the number of simulations ---------------------
for(j in seq_len(nsim)){
# this picks the 1st year for disag based only on the annual flow
if (is.null(index_years)) {
ind_yrs <- knn_get_index_year(x, ann_index_flow, k_weights)
index_mat[, j] <- ind_yrs[, 1]
} else {
ind_yrs <- index_years[, j, drop = FALSE]
index_mat[, j] <- ind_yrs[, 1]
}
# loop through all years that need disaggregated ---------------
for(h in seq_len(n_paleo_yrs)) {
# select the index year and scaling factor
index_atts <- get_scale_factor(ind_yrs[h, 1], x[h, 2], ann_index_flow)
sf_mat[h, j] <- index_atts$SF
disag[h, , sf_sites, j] <- dat_a[index_atts$pos, , sf_sites] *
index_atts$SF
disag[h, , ind_sites, j] <- dat_a[index_atts$pos, , ind_sites]
}
}
# convert from 4-d array to list of 2-d arrays
disag_out <- lapply(seq_len(nsim), function(ii) {
do.call(
cbind,
lapply(seq_len(nsite), function(jj) as.vector(t(disag[,,jj,ii])))
)
})
# output to "flat" file
if (!is.null(ofolder)) {
write_knn_disagg(disag_out, index_mat, ofolder = ofolder)
}
invisible(list(paleo_disagg = disag_out, index_years = index_mat))
}
#' Compute the scaling factor for the current year's flow from the index year
#'
#' Selects the magnitude of flow from `ann_index_flow` for the `index_year`.
#' Then scaling factor is computed as `flow` / `ann_index_flow[index_year]`.
#'
#' @param index_year The index year to select from ann_index_flow
#' @param flow The current flow to disaggregate (length == 1)
#' @param ann_index_flow Matrix of index years and flow. n x 2 matrix. First
#' column is years.
#'
#' @noRd
#'
get_scale_factor <- function(index_year, flow, ann_index_flow)
{
assert_that(length(index_year) == 1)
assert_that(length(flow) == 1)
assert_that(ncol(ann_index_flow) == 2)
assert_that(index_year %in% ann_index_flow[,1])
# index for selected yr
pos <- match(index_year, ann_index_flow[,1])
# scaling factor to apply for disag
SF <- flow/(ann_index_flow[pos, 2])
list(pos = pos, SF = SF)
}
write_knn_disagg <- function(disag_out, index_mat, ofolder = ".")
{
nsim <- length(disag_out)
lapply(seq_len(nsim), function(ii)
utils::write.csv(
disag_out[[ii]],
file = file.path(ofolder, paste0("paleo_disagg_", ii, ".csv")),
row.names = FALSE
)
)
utils::write.csv(
index_mat,
file = file.path(ofolder, "index_years.csv"),
row.names = FALSE
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.