#' Generate a coherent list of model specs
#'
#' Generates an internally consistent list of model specifications that may be
#' passed to \code{metab_bayes}, \code{metab_mle}, etc. via the \code{specs}
#' argument. This help file gives the definitive list of all possible model
#' specs, but only a subset of these are relevant to any given
#' \code{model_name}. See the 'Relevant arguments' section below. Irrelevant
#' arguments for the given \code{model_name} should not be explicitly passed
#' into this function (but don't worry - we'll just stop and tell you if you
#' make a mistake). Relevant arguments for the given \code{model_name} either
#' have default values or do not (see Usage). Relevant arguments without a
#' default should rarely be overridden, because their values will be determined
#' based on other arguments. Relevant arguments that do have a default can, and
#' often should, be overridden to tailor the model to your needs.
#'
#' @section Relevant arguments:
#'
#' * metab_bayes: Always relevant: \code{model_name, engine, split_dates,
#' keep_mcmcs, keep_mcmc_data, day_start, day_end, day_tests, ER_daily_mu,
#' ER_daily_sigma, params_in, params_out, n_chains, n_cores, burnin_steps,
#' saved_steps, thin_steps, verbose}. The need for other arguments depends on
#' features of the model structure, as from \code{mm_parse_name(model_name)}:
#' \itemize{ \item If \code{GPP_fun=='linlight'} then \code{GPP_daily_mu,
#' GPP_daily_sigma}, while if \code{GPP_fun=='satlight'} then
#' \code{alpha_meanlog, alpha_sdlog, Pmax_mu, Pmax_sigma}. \item If
#' \code{pool_K600=='none'} then \code{K600_daily_meanlog, K600_daily_sdlog}.
#' \item If \code{pool_K600=='normal'} then \code{K600_daily_meanlog_meanlog,
#' K600_daily_meanlog_sdlog, K600_daily_sdlog_sigma}. \item If
#' \code{pool_K600=='linear'} then \code{lnK600_lnQ_intercept_mu,
#' lnK600_lnQ_intercept_sigma, lnK600_lnQ_slope_mu, lnK600_lnQ_slope_sigma,
#' K600_daily_sigma_sigma}. \item If \code{pool_K600=='binned'} then
#' \code{K600_lnQ_nodes_centers, K600_lnQ_nodediffs_sdlog,
#' K600_lnQ_nodes_meanlog, K600_lnQ_nodes_sdlog, K600_daily_sigma_sigma}.
#' \item If \code{err_obs_iid} then \code{err_obs_iid_sigma_scale}. \item If
#' \code{err_proc_acor} then \code{err_proc_acor_phi_alpha,
#' err_proc_acor_phi_beta, err_proc_acor_sigma_scale}. \item If
#' \code{err_proc_iid} then \code{err_proc_iid_sigma_scale}. \item If
#' \code{err_proc_GPP} then \code{err_mult_GPP_sdlog_sigma}.}
#'
#' * metab_mle: \code{model_name, day_start, day_end, day_tests,
#' init.GPP.daily, init.Pmax, init.alpha, init.ER.daily, init.ER20,
#' init.K600.daily}
#'
#' * metab_night: \code{model_name, day_start, day_end, day_tests}
#'
#' * metab_Kmodel: \code{model_name, engine, day_start, day_end, day_tests,
#' weights, filters, predictors, transforms, other_args}. Note that the
#' defaults for \code{weights}, \code{predictors}, \code{filters}, and
#' \code{transforms} are adjusted according to the \code{engine} implied by
#' \code{model_name}.
#'
#' * metab_sim: \code{model_name, day_start, day_end, day_tests,
#' err_obs_sigma, err_obs_phi, err_proc_sigma, err_proc_phi, sim_seed}. Those
#' arguments whose period-separated name occurs in the default data_daily
#' argument to metab(sim) can be specified here as NULL, numeric, or a
#' function to be called each time \code{predict_DO} or \code{predict_metab}
#' is called on the model. If given as a function, an argument will be called
#' with any already-evaluated parameters (including the contents of data_daily
#' and n, the number of dates) passed in as arguments; for example, K600_daily
#' can see n, discharge.daily, and GPP_daily can see n, discharge.daily, and
#' K600.daily.
#'
#' @section MLE Initial Values:
#'
#' For metab_mle models (maximum likelihood estimation), specification
#' arguments whose names begin with \code{init} are applicable. Which
#' arguments are required depends on the value of model_name and can be
#' determined by calling \code{grep('^init.', names(specs(mname)),
#' value=TRUE)} once for your model name \code{mname} before supplying any
#' arguments.
#'
#' @param model_name character string identifying the model features. Use
#' \code{\link{mm_name}} to create a valid name based on desired attributes,
#' or \code{\link{mm_valid_names}} to see all valid names. Two alternatives to
#' the names given by \code{mm_valid_names()} are also accepted: (1) a model
#' type as accepted by the \code{type} argument to \code{mm_name}, which will
#' be used to create the default model name for that model type, or (2) a full
#' model file path for custom Bayesian models, as long as basename(model_name)
#' can still be parsed correctly with \code{mm_parse_name()} and the file
#' exists. In that case the file may be specified either as a file path
#' relative to the streamMetabolizer models directory (the first assumption;
#' this directory can be found with \code{system.file("models",
#' package="streamMetabolizer")}) or as an absolute path or a path relative to
#' the current working directory (the second assumption, if the first
#' assumption turns up no files of the given name).
#' @param engine The software or function to use in fitting the model. Should be
#' specified via \code{mm_name} rather than here. For \code{type='bayes'},
#' always \code{'stan'} indicating the software package to use for the MCMC
#' process (see http://mc-stan.org/). For types in
#' \code{c('mle','night','sim')} there's again only one option per model (R
#' functions; these need not be named here but will be noted in the suffix of
#' the model name, e.g., \code{"m_np_oi_tr_plrckm.nlm"} uses \code{nlm()} for
#' model fitting). For type='Kmodel', the name of an interpolation or
#' regression method relating K to the predictor[s] of choice. One of
#' \code{c("mean", "lm", "loess")}.
#' @inheritParams mm_model_by_ply
#' @inheritParams mm_is_valid_day
#'
#' @param init.GPP.daily the inital value of daily mean GPP (gO2 d^-1 m^-2) to
#' use in the NLM fitting process. See the MLE Initial Values section under
#' Details.
#' @param init.Pmax the initial value of Pmax (gO2 d^-1 m^-2) to use in the GPP
#' versus light relationship in the NLM fitting process. Pmax is the maximum
#' GPP value of the GPP-light curve. See the MLE Initial Values section under
#' Details.
#' @param init.alpha the inital value of alpha (gO2 s d^-1 umol^-1, i.e., units
#' of GPP/light) to use in the GPP versus light relationship in the NLM
#' fitting process. alpha is the initial slope of the GPP-light curve. See the
#' MLE Initial Values section under Details.
#' @param init.ER.daily the inital value of daily mean ER (gO2 d^-1 m^-2) to use
#' in the NLM fitting process. See the MLE Initial Values section under
#' Details.
#' @param init.ER20 the initial value of ER20 (gO2 d^-1 m^-2) to use in the ER
#' versus temperature relationship in the NLM fitting process. ER20 is the
#' respiration rate at 20 degrees C. See the MLE Initial Values section under
#' Details.
#' @param init.K600.daily the inital value of daily mean K600 (d^-1) to use in
#' the NLM fitting process. Ignored if K600 is supplied in data_daily, except
#' for those dates where K600 is NA. If there are any such dates, K600_init
#' must have a numeric (non-NA) value, as this will be used to estimate K600
#' for those dates. See the MLE Initial Values section under Details.
#'
#' @param split_dates logical indicating whether the data should be split into
#' daily chunks first (TRUE) or processed within one big model (FALSE). If
#' valid days differ in their timestep length, split_dates will need to be
#' TRUE; otherwise, FALSE is generally more efficient. FALSE is also the only
#' appropriate solution for a hierarchical model that pools information on
#' error, K600, etc. across days.
#' @param keep_mcmcs TRUE, FALSE, or (for nopool models) a vector of dates
#' (coerced with as.Date if character, etc.) indicating whether to keep all of
#' the mcmc model objects (TRUE), none of them (FALSE), or specific dates. The
#' default is TRUE because these objects often need inspecting.
#' @param keep_mcmc_data FALSE, TRUE, or (for nopool models) a vector of dates
#' (coerced with as.Date if character, etc.) indicating whether to keep all of
#' the mcmc model objects (TRUE), none of them (FALSE), or specific dates. The
#' default is FALSE because these objects can be very large.
#'
#' @param GPP_daily_mu The mean of a dnorm distribution for GPP_daily, the daily
#' rate of gross primary production
#' @param GPP_daily_lower The lower bound on every fitted value of GPP_daily,
#' the daily rate of gross primary production. Use values other than -Inf with
#' caution, recognizing that sometimes the input data are unmodelable and that
#' a negative estimate of GPP_daily (when unconstrained) could be your only
#' indication.
#' @param GPP_daily_sigma The standard deviation of a dnorm distribution for
#' GPP_daily, the daily rate of gross primary production
#' @param alpha_meanlog The mean of a dlnorm (lognormal) distribution for alpha,
#' the daily initial slope of the Jassby-Platt saturating curve relating GPP
#' to light
#' @param alpha_sdlog The standard deviation parameter of a dlnorm (lognormal)
#' distribution for alpha, the daily initial slope of the Jassby-Platt
#' saturating curve relating GPP to light.
#' @param Pmax_mu The mean of a dnorm (normal) distribution for Pmax, the daily
#' maximum GPP value of a Jassby-Platt saturating curve relating GPP to light.
#' @param Pmax_sigma The standard deviation of a dnorm (normal) distribution for
#' Pmax, the daily maximum GPP value of a Jassby-Platt saturating curve
#' relating GPP to light.
#'
#' @param ER_daily_mu The mean of a dnorm distribution for ER_daily, the daily
#' rate of ecosystem respiration
#' @param ER_daily_upper The upper (less negative) bound on every fitted value
#' of ER_daily, the daily rate of ecosystem respiration. Use values other than
#' Inf with caution, recognizing that sometimes the input data are unmodelable
#' and that a positive estimate of ER_daily (when unconstrained) could be your
#' only indication.
#' @param ER_daily_sigma The standard deviation of a dnorm distribution for
#' ER_daily, the daily rate of ecosystem respiration
#'
#' @param K600_daily_meanlog Applies when pool_K600 is 'none'. The mean of a
#' dlnorm distribution for K600_daily, the daily rate of reaeration
#' @param K600_daily_sdlog The lognormal scale parameter (standard deviation) of
#' a dlnorm distribution having meanlog equal to \code{K600_daily_meanlog}
#' (when pool_K600 is 'none') or \code{K600_daily_predlog} (when pool_K600 is
#' 'normal_sdfixed') for K600_daily, the daily rate of reaeration as corrected
#' for temperature and the diffusivity of oxygen
#' @param K600_daily_sigma The standard deviation of a dnorm distribution having
#' mean equal to \code{exp(K600_daily_predlog)} (applicable when pool_K600 is
#' 'linear_sdfixed' or 'binned_sdfixed') for K600_daily, the daily rate of
#' reaeration as corrected for temperature and the diffusivity of oxygen
#' @param K600_daily_sdlog_sigma hyperparameter for pool_K600 in c('normal').
#' The scale (= sigma) parameter of a half-normal distribution of sdlog in K ~
#' lN(meanlog, sdlog), sdlog ~ halfnormal(0, sigma=sdlog_sigma). Visualize the
#' PDF of K600_daily_sdlog with \code{\link{plot_distribs}}.
#' @param K600_daily_sigma_sigma hyperparameter for pool_K600 in
#' c('linear','binned'). The scale (= sigma) parameter of a half-normal
#' distribution of sigma in K ~ lN(meanlog, sigma), sigma ~ halfnormal(0,
#' sigma=sigma_sigma). Visualize the PDF of K600_daily_sdlog with
#' \code{\link{plot_distribs}}.
#'
#' @param K600_daily_meanlog_meanlog hyperparameter for pool_K600='normal'. The
#' mean parameter (meanlog_meanlog) of a lognormal distribution of meanlog in
#' K ~ lN(meanlog, sdlog), meanlog ~ lN(meanlog_meanlog, meanlog_sdlog)
#' @param K600_daily_meanlog_sdlog hyperparameter for pool_K600='normal'. The
#' standard deviation parameter (meanlog_sdlog) of a lognormal distribution of
#' meanlog in K ~ lN(meanlog, sdlog), meanlog ~ lN(meanlog_meanlog,
#' meanlog_sdlog)
#'
#' @param lnK600_lnQ_intercept_mu hyperparameter for pool_K600 == 'linear'. The
#' mean of the prior distribution for the intercept parameter in
#' \code{log(K600) ~ lnK600_lnQ_intercept + lnK600_lnQ_slope*log(Q)}
#' @param lnK600_lnQ_intercept_sigma hyperparameter for pool_K600 == 'linear'.
#' The standard deviation of the prior distribution for the intercept
#' parameter in \code{log(K600) ~ lnK600_lnQ_intercept +
#' lnK600_lnQ_slope*log(Q)}
#' @param lnK600_lnQ_slope_mu hyperparameter for pool_K600='linear'. The mean of
#' the prior distribution for the slope parameter in \code{log(K600) ~
#' lnK600_lnQ_intercept + lnK600_lnQ_slope*log(Q)}
#' @param lnK600_lnQ_slope_sigma hyperparameter for pool_K600='linear'. The
#' standard deviation of the prior distribution for the slope parameter in
#' \code{log(K600) ~ lnK600_lnQ_intercept + lnK600_lnQ_slope*log(Q)}
#'
#' @param K600_lnQ_nodes_centers data configuration argument for
#' pool_K600='binned'. numeric vector giving the natural-log-space centers of
#' the discharge bins. See also \code{\link{calc_bins}}
#' @param K600_lnQ_nodediffs_sdlog hyperparameter for pool_K600='binned'. The
#' standard deviations of the differences in estimated K600 between successive
#' lnQ_nodes (bins), where the means of those differences are always zero
#' @param K600_lnQ_nodes_meanlog hyperparameter for pool_K600='binned'. The
#' means of lognormal prior distributions for the K600_lnQ_nodes parameters.
#' @param K600_lnQ_nodes_sdlog hyperparameter for pool_K600='binned'. The
#' standard deviations of lognormal prior distributions for the K600_lnQ_nodes
#' parameters.
#'
#' @param err_obs_iid_sigma_scale The scale (= sigma) parameter of a half-Cauchy
#' distribution for err_obs_iid_sigma, the standard deviation of the
#' observation error. Visualize the PDF of err_obs_iid_sigma with
#' \code{\link{plot_distribs}}.
#' @param err_proc_acor_phi_alpha The alpha (= shape1) parameter on a beta
#' distribution for err_proc_acor_phi, the autocorrelation coefficient for the
#' autocorrelated component of process [& sometimes observation] error.
#' Visualize the PDF of err_proc_acor_phi with \code{\link{plot_distribs}}.
#' @param err_proc_acor_phi_beta The beta (= shape2) parameter on a beta
#' distribution for err_proc_acor_phi, the autocorrelation coefficient for the
#' autocorrelated component of process [& sometimes observation] error.
#' Visualize the PDF of err_proc_acor_phi with \code{\link{plot_distribs}}.
#' @param err_proc_acor_sigma_scale The scale (= sigma) parameter of a
#' half-Cauchy distribution for err_proc_acor_sigma, the standard deviation of
#' the autocorrelated component of process [& sometimes observation] error.
#' Visualize the PDF of err_proc_acor_sigma with \code{\link{plot_distribs}}.
#' @param err_proc_iid_sigma_scale The scale (= sigma) parameter of a
#' half-Cauchy distribution for err_proc_iid_sigma, the standard deviation of
#' the uncorrelated (IID) component of process [& sometimes observation]
#' error. Visualize the PDF of err_proc_iid_sigma with
#' \code{\link{plot_distribs}}.
#' @param err_mult_GPP_sdlog_sigma The scale parameter of a half-normal
#' distribution for err_mult_GPP_sdlog, the scale parameter of the lognormal
#' distribution of err_mult_GPP. err_mult_GPP is multiplied by light and then
#' normalized to a daily mean of 1 before being multiplied by GPP_daily to
#' estimate GPP_inst. The effect is a special kind of process error that is
#' proportional to light (with noise) and is applied to GPP rather than to
#' dDO/dt.
#'
#' @param params_in Character vector of hyperparameters to pass from the specs
#' list into the data list for the MCMC run. Will be automatically generated
#' during the specs() call; need only be revised if you're using a custom
#' model that requires different hyperparameters.
#'
#' @inheritParams prepdata_bayes
#' @inheritParams runstan_bayes
#'
#' @inheritParams prepdata_Kmodel
#' @inheritParams Kmodel_allply
#'
#' @param K600_lnQ_cnode_meanlog For a sim model with pool_K600='binned'. The
#' mean of a lognormal distribution describing the y=K600 value of the middle
#' (or just past middle) node in the piecewise lnK ~ lnQ relationship
#' @param K600_lnQ_cnode_sdlog For a sim model with pool_K600='binned'. The sd
#' of a lognormal distribution describing the y=K600 value of the middle (or
#' just past middle) node in the piecewise lnK ~ lnQ relationship
#' @param K600_lnQ_nodediffs_meanlog For a sim model with pool_K600='binned'.
#' The average (in log space) difference between ln(K) values of successive
#' nodes. A non-zero value introduces a trend in K ~ Q.
#' @param lnK600_lnQ_nodes For a sim model with pool_K600='binned'. The values
#' of lnK600 at each node. The default value of this spec is a function that
#' computes lnK600s based on simulated K~Q relationships.
#'
#' @param discharge_daily Daily values, or a function to generate daily values,
#' of mean daily discharge in m^3 s^-1. Fixed values may alternatively be
#' specified as discharge.daily in the data_daily passed to
#' \code{\link{metab}}.
#' @param DO_mod_1 Daily values, or a function to generate daily values, of the
#' first DO.mod value on each date. Fixed values may alternatively be
#' specified as \code{DO.mod.1} in the \code{data_daily} passed to
#' \code{\link{metab}}. Or may be implied by a \code{DO.obs} column in
#' \code{data}, from which the first values on each date will be extracted by
#' \code{metab()}.
#' @param K600_daily Daily values, or a function to generate daily values, of
#' the reaeration rate constant K600. Fixed values may alternatively be
#' specified as \code{K600.daily} in the data_daily passed to
#' \code{\link{metab}}.
#' @param GPP_daily Daily values, or a function to generate daily values, of the
#' photosynthesis parameter GPP_daily. Fixed values may alternatively be
#' specified as \code{GPP.daily} in the data_daily passed to
#' \code{\link{metab}}.
#' @param Pmax Daily values, or a function to generate daily values, of the
#' photosynthesis parameter Pmax. Fixed values may alternatively be specified
#' as \code{Pmax} in the data_daily passed to \code{\link{metab}}.
#' @param alpha Daily values, or a function to generate daily values, of the
#' photosynthesis parameter alpha. Fixed values may alternatively be specified
#' as \code{alpha} in the data_daily passed to \code{\link{metab}}.
#' @param ER_daily Daily values, or a function to generate daily values, of the
#' respiration parameter ER_daily. Fixed values may alternatively be specified
#' as \code{ER.daily} in the data_daily passed to \code{\link{metab}}.
#' @param ER20 Daily values, or a function to generate daily values, of the
#' respiration parameter ER20. Fixed values may alternatively be specified as
#' \code{ER20} in the data_daily passed to \code{\link{metab}}.
#'
#' @param err_obs_sigma Daily values, or a function to generate daily values, of
#' the sd of observation error, or 0 for no observation error. Observation
#' errors are those applied to DO.mod after generating the full time series of
#' modeled values.
#' @param err_obs_phi Daily values, or a function to generate daily values, of
#' the autocorrelation coefficient of the observation errors, or 0 for
#' uncorrelated errors.
#' @param err_proc_sigma Daily values, or a function to generate daily values,
#' of the sd of process error, or 0 for no process error. Process errors are
#' applied at each time step, and therefore propagate into the next timestep.
#' @param err_proc_phi Daily values, or a function to generate daily values, of
#' the autocorrelation coefficient of the process errors, or 0 for
#' uncorrelated errors.
#' @param err_round A single value indicating whether simulated DO.obs should be
#' rounded to simulate the common practice of only reporting a few significant
#' figures for DO. Use NA for no effect, or an integer as in the \code{digits}
#' argument to \code{\link{round}} if simulated DO.obs should be rounded to
#' the given number of digits beyond \code{.}.
#' @param sim_seed NA to specify that each call to predict_DO should generate
#' new values, or an integer, as in the \code{seed} argument to
#' \code{\link{set.seed}}, specifying the seed to set before every execution
#' of predict_DO and/or predict_metab.
#'
#' @return an internally consistent list of arguments that may be passed to
#' \code{metab} as the \code{specs} argument
#'
#' @importFrom stats rnorm rlnorm
#' @examples
#' specs(mm_name(type='mle', err_obs_iid=FALSE, err_proc_iid=TRUE))
#' specs(mm_name(type='bayes', pool_K600='normal'))
#' @export
specs <- function(
## All or several models
model_name = mm_name(),
engine,
# inheritParams mm_model_by_ply
day_start = 4,
day_end = 28,
# inheritParams mm_is_valid_day
day_tests=c('full_day', 'even_timesteps', 'complete_data', 'pos_discharge', 'pos_depth'),
required_timestep=NA,
## MLE
# initial values
init.GPP.daily = 8,
init.Pmax = 10,
init.alpha = 0.0001,
init.ER.daily = -10,
init.ER20 = -10,
init.K600.daily = 10,
## Bayes
# model setup
split_dates,
keep_mcmcs = TRUE,
keep_mcmc_data = TRUE,
# hyperparameters for non-hierarchical GPP & ER
GPP_daily_mu = 3.1,
GPP_daily_lower = -Inf,
GPP_daily_sigma = 6.0,
alpha_meanlog = -4.6,
alpha_sdlog = 0.5,
Pmax_mu = 10,
Pmax_sigma = 7,
ER_daily_mu = -7.1,
ER_daily_upper = Inf,
ER_daily_sigma = 7.1,
# hyperparameters for non-hierarchical K600
K600_daily_meanlog = log(12),
# hyperparameters for hierarchical K600 - normal
K600_daily_meanlog_meanlog = log(12),
K600_daily_meanlog_sdlog = 1.32,
# hyperparameters for hierarchical K600 - linear. defaults should be
# reasonably constrained, not too wide
lnK600_lnQ_intercept_mu = 2,
lnK600_lnQ_intercept_sigma = 2.4,
lnK600_lnQ_slope_mu = 0,
lnK600_lnQ_slope_sigma = 0.5,
# hyperparameters for hierarchical K600 - binned. K600_daily ~
# lognormal(K600_daily_nodes_meanlog[lnQ_bin],
# K600_daily_nodes_sdlog[lnQ_bin]) with linear interpolation among bins before
# exponentiating. nodes_meanlog and nodes_sdlog may be length b =
# length(K600_daily_lnQ_nodes) or length 1 (to be replicated to length b).
# -8:6 covers almost all points in Raymond et al. 2012 and will therefore
# always be too broad a range for a single stream. -3:3 will catch some
# streams to rivers as a first cut, though users should still modify
K600_lnQ_nodes_centers = -3:3, # the x=lnQ values for the nodes
K600_lnQ_nodediffs_sdlog = 0.5, # for centers 1 apart; for centers 0.2 apart, use 1/5 of this
K600_lnQ_nodes_meanlog = rep(log(12), length(K600_lnQ_nodes_centers)), # distribs for the y=K600 values of the nodes
K600_lnQ_nodes_sdlog = rep(1.32, length(K600_lnQ_nodes_centers)),
# hyperparameters for any K pooling or non-pooling strategy
K600_daily_sdlog = switch(mm_parse_name(model_name)$pool_K600, none=1, normal_sdfixed=0.05, NA),
K600_daily_sigma = switch(mm_parse_name(model_name)$pool_K600, linear_sdfixed=10, binned_sdfixed=5, NA),
K600_daily_sdlog_sigma = switch(mm_parse_name(model_name)$pool_K600, normal=0.05, NA),
K600_daily_sigma_sigma = switch(mm_parse_name(model_name)$pool_K600, linear=1.2, binned=0.24, NA),
# normal_sdzero, linear_sdzero, and binned_sdzero all have no parameters for this
# hyperparameters for error terms
err_obs_iid_sigma_scale = 0.03,
err_proc_iid_sigma_scale = 5,
err_proc_acor_phi_alpha = 1,
err_proc_acor_phi_beta = 1,
err_proc_acor_sigma_scale = 1,
err_mult_GPP_sdlog_sigma = 1,
# vector of hyperparameters to include as MCMC data
params_in,
# inheritParams runstan_bayes
params_out,
n_chains = 4,
n_cores = 4,
burnin_steps = 500,
saved_steps = 500,
thin_steps = 1,
verbose = FALSE,
## Kmodel
#inheritParams prepdata_Kmodel
weights = c("K600/CI"), # 'K600/CI' is argued for in stream_metab_usa issue #64
filters = c(CI.max=NA, discharge.daily.max=NA, velocity.daily.max=NA),
#inheritParams Kmodel_allply
predictors = c("discharge.daily"),
transforms = c(K600='log', date=NA, velocity.daily="log", discharge.daily="log"),
other_args = c(),
## Sim
# multi-day simulation parameters. already above for bayes:
# K600_lnQ_nodes_centers, K600_lnQ_nodediffs_sdlog
K600_lnQ_cnode_meanlog = log(6), # distrib for the y=K600 values of the middle (or just past middle) node
K600_lnQ_cnode_sdlog = 1, # distrib for the y=K600 values of the middle (or just past middle) node
K600_lnQ_nodediffs_meanlog = 0.2, # non-zero introduces a trend in K ~ Q
lnK600_lnQ_nodes = function(K600_lnQ_nodes_centers, K600_lnQ_cnode_meanlog, K600_lnQ_cnode_sdlog,
K600_lnQ_nodediffs_meanlog, K600_lnQ_nodediffs_sdlog, ...) {
sim_Kb(K600_lnQ_nodes_centers, K600_lnQ_cnode_meanlog, K600_lnQ_cnode_sdlog,
K600_lnQ_nodediffs_meanlog, K600_lnQ_nodediffs_sdlog)
},
# daily simulation parameters
discharge_daily = function(n, ...) rnorm(n, 20, 3),
DO_mod_1 = NULL,
K600_daily = function(n, K600_daily_predlog=log(10), ...) pmax(0, rnorm(n, K600_daily_predlog, 4)),
GPP_daily = function(n, ...) pmax(0, rnorm(n, 8, 4)),
Pmax = function(n, ...) pmax(0, rnorm(n, 10, 2)),
alpha = function(n, ...) pmax(0, rnorm(n, 0.0001, 0.00002)),
ER_daily = function(n, ...) pmin(0, rnorm(n, -10, 5)),
ER20 = function(n, ...) pmin(0, rnorm(n, -10, 4)),
# sub-daily simulation parameters
err_obs_sigma = 0.01,
err_obs_phi = 0,
err_proc_sigma = 0.2,
err_proc_phi = 0,
err_round = NA,
# simulation replicability
sim_seed = NA
) {
# make it easier to enter custom specs by creating the type-specific default if model_name %in% 'mle', etc.
if(model_name %in% eval(formals(mm_name)$type))
model_name <- mm_name(type=model_name)
# check the validity of the model_name against the list of officially accepted model names
mm_validate_name(model_name)
# parse the model_name
features <- mm_parse_name(model_name, expand=TRUE)
# collect info about the arguments
required <- 'model_name'
all_possible <- names(formals(specs))
not_missing <- names(as.list(match.call())[-1]) # the arguments that were given explicitly
yes_missing <- all_possible[!(all_possible %in% not_missing)]
prefer_missing <- setdiff(all_possible[sapply(formals(specs), is.symbol)], 'params_out') # the arguments w/o defaults, mostly
prefer_not_missing <- if(features$type == 'bayes' && features$GPP_fun == 'satlight') {
c('alpha_meanlog', 'alpha_sdlog', 'Pmax_mu', 'Pmax_sigma')
} else {
c() # could be made more extensive
}
# argument checks
if(any(required %in% yes_missing))
stop("missing and required argument: ", paste(required[required %in% yes_missing], collapse=", "))
if(any(prefer_not_missing %in% yes_missing)) {
warn_about <- prefer_not_missing[prefer_not_missing %in% yes_missing]
warning("you should specify site-appropriate values for all parameters and especially ", paste(warn_about, collapse=", "))
}
redundant <- not_missing[not_missing %in% prefer_missing]
if('engine' %in% redundant) {
warning("'engine' should be specified in mm_name() rather than specs()")
redundant <- redundant[redundant != 'engine']
}
if(length(redundant) > 0) {
warning("argument[s] that should usually be specified in revise() rather than specs(): ", paste(redundant, collapse=", "))
}
# collect the defaults + directly specified arguments
all_specs <- as.list(environment())
# copy/calculate arguments as appropriate to the model
specs <- list()
switch(
features$type,
'bayes' = {
# list the specs that will make it all the way to the Stan model as data
all_specs$params_in <- c(
switch(
features$GPP_fun,
linlight=c('GPP_daily_mu','GPP_daily_lower','GPP_daily_sigma'),
satlight=c('alpha_meanlog', 'alpha_sdlog', 'Pmax_mu', 'Pmax_sigma')),
c('ER_daily_mu','ER_daily_upper','ER_daily_sigma'),
switch(
features$pool_K600_type,
none=c('K600_daily_meanlog'),
normal=c('K600_daily_meanlog_meanlog', 'K600_daily_meanlog_sdlog'),
linear=c('lnK600_lnQ_intercept_mu', 'lnK600_lnQ_intercept_sigma', 'lnK600_lnQ_slope_mu', 'lnK600_lnQ_slope_sigma'),
binned=c('K600_lnQ_nodediffs_sdlog', 'K600_lnQ_nodes_meanlog', 'K600_lnQ_nodes_sdlog')),
switch(
features$pool_K600_sd,
zero=c(),
fixed=switch(features$pool_K600_type, none=, normal='K600_daily_sdlog', linear=, binned='K600_daily_sigma'),
fitted=switch(features$pool_K600_type, normal='K600_daily_sdlog_sigma', linear=, binned='K600_daily_sigma_sigma')
),
if(features$err_obs_iid) 'err_obs_iid_sigma_scale',
if(features$err_proc_acor) c('err_proc_acor_phi_alpha', 'err_proc_acor_phi_beta', 'err_proc_acor_sigma_scale'),
if(features$err_proc_iid) 'err_proc_iid_sigma_scale',
if(features$err_proc_GPP) 'err_mult_GPP_sdlog_sigma'
)
# list all needed arguments
included <- c(
# model setup
'model_name', 'engine', 'split_dates', 'keep_mcmcs', 'keep_mcmc_data',
# date ply day_tests
'day_start', 'day_end', 'day_tests', 'required_timestep',
# discharge binning parameters are not params_in, though they're
# conceptually related and therefore colocated in formals(specs)
if(features$pool_K600_type == 'binned') c('K600_lnQ_nodes_centers'),
# params_in is both a vector of specs to include and a vector to include in specs
all_specs$params_in, 'params_in',
# inheritParams runstan_bayes
'params_out', 'n_chains', 'n_cores',
'burnin_steps', 'saved_steps', 'thin_steps', 'verbose'
)
# compute some arguments
if('engine' %in% yes_missing) {
all_specs$engine <- features$engine
}
if('split_dates' %in% yes_missing) {
all_specs$split_dates <- switch(
features$pool_K600_type,
'none' = FALSE, # pretty sure FALSE is faster. also allows hierarchical error terms
'normal'=, 'linear'=, 'binned' = FALSE,
stop("unknown pool_K600; unsure how to set split_dates"))
}
if(features$pool_K600_type == 'binned') {
# defaults are for linear pool_K600 & need adjustment for binned method
all_specs$K600_daily_beta_mu <- rep(10, length(all_specs$K600_daily_lnQ_nodes))
all_specs$K600_daily_beta_sigma <- rep(10, length(all_specs$K600_daily_lnQ_nodes))
}
if('params_out' %in% yes_missing) {
all_specs$params_out <- c(
c('GPP', 'ER', 'DO_R2'),
switch(
features$GPP_fun,
linlight=c('GPP_daily'),
satlight=c('alpha', 'Pmax')),
c('ER_daily', 'K600_daily'),
switch(
features$pool_K600_type,
none=c(),
normal=c('K600_daily_predlog'),
linear=c('K600_daily_predlog', 'lnK600_lnQ_intercept', 'lnK600_lnQ_slope'),
binned=c('K600_daily_predlog', 'lnK600_lnQ_nodes')),
if(features$pool_K600_sd == 'fitted')
switch(
features$pool_K600_type,
normal='K600_daily_sdlog',
linear=, binned='K600_daily_sigma'),
if(features$err_obs_iid) c('err_obs_iid_sigma', 'err_obs_iid'),
if(features$err_proc_acor) c('err_proc_acor', 'err_proc_acor_phi', 'err_proc_acor_sigma'),
if(features$err_proc_iid) c('err_proc_iid_sigma', 'err_proc_iid'),
if(features$err_proc_GPP) c('err_proc_GPP', 'GPP_pseudo_R2'))
}
# check for errors/inconsistencies
model_path <- tryCatch(
mm_locate_filename(model_name),
error=function(e) {
warning(e)
return(model_name)
})
if(features$engine == "NA")
stop('engine must be specified for Bayesian models')
},
'mle' = {
# determine which init values will be needed
. <- '.dplyr.var'
init.needs <- paste0('init.', get_param_names(model_name)$required)
# list all needed arguments
included <- c('model_name', 'day_start', 'day_end', 'day_tests', 'required_timestep', init.needs)
},
'night' = {
# list all needed arguments
included <- c('model_name', 'day_start', 'day_end', 'day_tests', 'required_timestep')
# some different defaults for night relative to other models
if('day_start' %in% yes_missing) {
all_specs$day_start <- 12
}
if('day_end' %in% yes_missing) {
all_specs$day_end <- 36
}
if('day_tests' %in% yes_missing) {
all_specs$day_tests <- c(day_tests, 'include_sunset')
}
},
'Kmodel' = {
# list all needed arguments
included <- c(
'model_name', 'engine', 'day_start', 'day_end', 'day_tests', 'required_timestep',
'weights', 'filters', 'predictors', 'transforms', 'other_args')
if('engine' %in% yes_missing) {
all_specs$engine <- features$engine
}
# some different defaults for each engine, because no one set of defaults
# makes sense for all engines
#if('weights' %in% yes_missing) all_specs$weights <- c("K600/CI") # same for all, so use default as in Usage
switch(
all_specs$engine,
mean={
if('filters' %in% yes_missing) all_specs['filters'] <- list(c()) # need special syntax to assign c(). see https://stackoverflow.com/a/7945259/3203184
if('predictors' %in% yes_missing) all_specs['predictors'] <- list(c())
if('transforms' %in% yes_missing) all_specs$transforms <- c(K600='log')
if('other_args' %in% yes_missing) all_specs$other_args <- list(possible_args=NULL)
},
lm={
if('filters' %in% yes_missing) all_specs$filters <- c(CI.max=NA, discharge.daily.max=NA)
if('predictors' %in% yes_missing) all_specs$predictors <- c("discharge.daily")
if('transforms' %in% yes_missing) all_specs$transforms <- c(K600='log', discharge.daily="log")
if('other_args' %in% yes_missing) all_specs$other_args <- list(possible_args=names(formals(lm))[-which(names(formals(lm)) %in% c('formula','data','weights'))])
},
loess={
if('filters' %in% yes_missing) all_specs$filters <- c(CI.max=NA, discharge.daily.max=NA, velocity.daily.max=NA)
if('predictors' %in% yes_missing) all_specs$predictors <- c("date", "discharge.daily")
if('transforms' %in% yes_missing) all_specs$transforms <- c(K600='log', date=NA, velocity.daily="log", discharge.daily="log")
if('other_args' %in% yes_missing) all_specs$other_args <- list(possible_args=names(formals('loess'))[-which(names(formals('loess')) %in% c('formula','data','weights'))])
}
)
},
'sim' = {
# determine which daily parameters will be needed
par_needs <- gsub('\\.', '_', unlist(get_param_names(model_name)[c('optional','required')]))
# list all needed arguments
included <- c(
'model_name', 'day_start', 'day_end', 'day_tests', 'required_timestep',
switch(
features$pool_K600,
none=c(),
normal=stop("pool_K600='normal' unavailable for now; try 'binned' instead"),
linear=stop("pool_K600='linear' unavailable for now; try 'binned' instead"), # 'discharge_daily', etc.
binned=c('K600_lnQ_nodes_centers',
'K600_lnQ_cnode_meanlog', 'K600_lnQ_cnode_sdlog', 'K600_lnQ_nodediffs_meanlog', 'K600_lnQ_nodediffs_sdlog',
'lnK600_lnQ_nodes')),
par_needs, 'err_round', 'sim_seed')
if(features$pool_K600 == 'binned') {
if('K600_lnQ_nodes_centers' %in% yes_missing) # override the default, which is for 'bayes' rather than 'sim'
all_specs$K600_lnQ_nodes_centers <- function(discharge.daily, ...) calc_bins(log(discharge.daily), 'width', width=0.2)$bounds
}
}
)
# stop if truly irrelevant arguments were given
if(length(irrelevant <- not_missing[!(not_missing %in% included)]) > 0)
stop("irrelevant argument: ", paste(irrelevant, collapse=", "))
# return just the arguments we actually need
add_specs_class(all_specs[included])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.