#' Fit stream metabolism models
#'
#' Passes data and model parameters to \code{streamMetabolizer} or
#' \code{BASE} according to arguments passed to \link{prep_metabolism}.
#' NOTE: support for modeling with
#' \code{BASE} is currently in development. Please use \code{streamMetabolizer}
#' in the meantime.
#'
#' This function is a wrapper for \code{streamMetabolizer}'s model specification,
#' (\code{mm_name, specs}) fitting (\code{metab}), and prediction
#' (\code{predict_metab}) functions, and all parameters except for \code{d}
#' are \code{streamMetabolizer} parameters.
#' Default arguments passed to this function
#' specify the recommended starting point for fitting a metabolism model to
#' data from a typical stream or river. In addition to the parameters
#' documented here, this function calculates \code{K600_lnQ_nodes_centers}
#' (parameter of \code{streamMetabolizer}'s \code{specs} function) as
#' a sequence of length 7 from
#' the minimum to the maximum natural log of daily discharge.
#'
#' To access the full model specification interface of \code{streamMetabolizer},
#' please call its corresponding functions
#' (\code{mm_name, specs, metab, predict_metab}) directly.
#' See the example section below for more.
#'
#' @author Mike Vlah, \email{vlahm13@gmail.com}
#' @author Aaron Berdanier
#' @param d the output of \link{prep_metabolism}.
#' @return returns a list containing the output of \code{streamMetabolizer}'s
#' \code{metab} function (the model fit object), the output of
#' \code{streamMetabolizer}'s
#' \code{predict_metab} function (metabolism predictions), and additional
#' information about model performance and specifications.
#' @param pool_K600 character. Should the model pool information among
#' days to get more consistent daily estimates for K600? Options (see Details
#' section of \code{streamMetabolizer}'s \code{mm_name} function for more):
#' \itemize{
#' \item \code{none}: no pooling of K600
#' \item \code{binned}: \eqn{K600 ~ N(B[Q_bin], sigma)} where \eqn{mu ~
#' N(mu_mu, mu_sigma)} and \eqn{sigma ~ N(sigma_mu, sigma_sigma)}
#' }
#' @param err_obs_iid logical. Should IID observation error be included? If not,
#' the model will be fit to the differences in successive DO measurements,
#' rather than to the DO measurements themselves.
#' @param err_proc_acor logical. Should autocorrelated process error (with the
#' autocorrelation term phi fitted) be included?
#' @param err_proc_iid logical. Should IID process error be included?
#' @param ode_method character. The method to use in solving the ordinary
#' differential equation for DO. Options:
#' \itemize{
#' \item \code{euler}, formerly \code{Euler}: the final change in DO from
#' t=1 to t=2 is solely a function of GPP, ER, DO, etc. at t=1
#' \item \code{trapezoid}, formerly \code{pairmeans}: the final change in DO
#' from t=1 to t=2 is a function of the mean values of GPP, ER, etc. across
#' t=1 and t=2.
#' \item for \code{type='mle'}, options also include \code{rk2} and any
#' character method accepted by \code{\link[deSolve]{ode}} in the
#' \code{deSolve} package (\code{lsoda}, \code{lsode}, \code{lsodes},
#' \code{lsodar}, \code{vode}, \code{daspk}, \code{rk4}, \code{ode23},
#' \code{ode45}, \code{radau}, \code{bdf}, \code{bdf_d}, \code{adams},
#' \code{impAdams}, and \code{impAdams_d}; note that many of these have not
#' been well tested in the context of \code{streamMetabolizer} models)
#' }
#' @param deficit_src character. From what DO estimate (observed or modeled)
#' should the DO deficit be computed? Options:
#' \itemize{
#' \item \code{DO_mod}: the DO deficit at time t will be {(DO.sat(t) -
#' DO_mod(t))}, the difference between the equilibrium-saturation value and
#' the current best estimate of the true DO concentration at that time
#' \item \code{DO_obs}: the DO deficit at time t will be {(DO.sat(t) -
#' DO.obs(t))}, the difference between the equilibrium-saturation value and
#' the measured DO concentration at that time
#' \item \code{DO_obs_filter}: applicable only to \code{type='night'}: a
#' smoothing filter is applied over the measured DO.obs values before
#' applying nighttime regression
#' \item \code{NA}: applicable only to \code{type='Kmodel'}, for which DO deficit is not estimated
#' }
#' @param skip_prompts logical. If TRUE, you will not be prompted with questions
#' about sending your model results to the StreamPULSE server. Prompting occurs in the event
#' that your model outperforms the current best model stored by StreamPULSE for the
#' site and year modeled. When \code{skip_prompts} is TRUE, your model results will never
#' be pushed to the StreamPULSE server, unless there are no results currently stored
#' for the site and year modeled. This parameter is useful when fitting models inside a loop.
#' @seealso \code{\link{request_data}} for acquiring StreamPULSE data;
#' \code{\link{prep_metabolism}} for organizing data and acquiring additional
#' variables.
#' @export
#' @examples
#' query_available_data(region='all')
#'
#' streampulse_data = request_data(sitecode='NC_Eno',
#' startdate='2016-06-10', enddate='2016-10-23')
#'
#' fitdata = prep_metabolism(d=streampulse_data, type='bayes',
#' model='streamMetabolizer', interval='15 min',
#' rm_flagged=list('Bad Data', 'Questionable'), fillgaps=fillgaps,
#' zq_curve=list(sensor_height=NULL, Z=Z_data, Q=Q_data,
#' fit='power', plot=TRUE), estimate_areal_depth=TRUE)
#'
#' ## fit model with default parameters
#' modelfit = fit_metabolism(fitdata)
#'
#' ## fit model with custom parameters
#' class(fitdata) = 'data.frame' #just a formality, execute and disregard
#' modname = mm_name(type='bayes', pool_K600='binned',
#' err_obs_iid=TRUE, err_proc_acor=FALSE, err_proc_iid=TRUE,
#' ode_method = 'trapezoid', deficit_src='DO_mod', engine='stan')
#' modspecs = specs(modname)
#'
#' #overwrite default ln(Q) node centers
#' addis = tapply(log(fitdata$discharge), substr(fitdata$solar.time,1,10), mean)
#' modspecs$K600_lnQ_nodes_centers = seq(from=min(addis),
#' to=max(addis), length.out=7)
#'
#' modelfit = metab(specs=modspecs, data=fitdata)
#' predictions = predict_metab(modelfit)
fit_metabolism = function(d, pool_K600='binned', err_obs_iid=TRUE,
err_proc_acor=FALSE, err_proc_iid=TRUE, ode_method='trapezoid',
deficit_src='DO_mod', skip_prompts=FALSE){
is_powell = grepl('nwis-[0-9]+', d$specs$site)
fitdata = d$data
# check class of fitdata to determine which model to fit
model = class(fitdata)
if(model=="streamMetabolizer"){
model_type = fitdata@type
} else if(model=="BASE"){
stop(paste("Modeling with BASE is not currently supported.\n\t",
"Please rerun prep_metabolism with model='streamMetabolizer'."),
call.=FALSE)
}
# then, reset class of fitdata to data.frame, may not be necessary?
class(fitdata) = "data.frame"
if(model=="streamMetabolizer"){
#choose appropriate model specifications based on model type
if(model_type == 'bayes'){
# if(pool_K600 == 'none'){
# message(paste0("pool_K600 can't be 'none' in Bayesian ",
# "framework.\n\tSetting it to 'binned'."))
# pool_K600 = 'binned'
# }
engine = 'stan'#; pool_K600 = 'binned'; proc_err = TRUE
if(! 'discharge' %in% colnames(fitdata) && pool_K600 == 'binned'){
stop(paste0("Discharge data required to use pool_K600='binned'.",
"\n\tTry using pool_K600='none'."), call.=FALSE)
}
} else {
if(pool_K600 == 'binned'){
stop(paste0("pool_K600 can't be 'binned' in MLE ",
"framework.\n\tTry setting it to 'none'."), call.=FALSE)
# pool_K600 = 'none'
}
if(proc_err == TRUE){
stop(paste0("proc_err can't be TRUE in MLE ",
"framework.\n\tTry setting it to FALSE."), call.=FALSE)
# proc_err = FALSE
}
engine = 'nlm'#; pool_K600 = 'none'; proc_err = FALSE
}
# set most model specs
modname = streamMetabolizer::mm_name(type=model_type, pool_K600=pool_K600,
err_obs_iid=err_obs_iid, err_proc_acor=err_proc_acor,
err_proc_iid=err_proc_iid,
ode_method=ode_method, deficit_src=deficit_src, engine=engine)
modspecs = streamMetabolizer::specs(modname)
if(engine == 'stan' && pool_K600 == 'binned'){
#get average log daily discharge; set K v. Q node centers
n_KQ_nodes = 7 #should be 7 <= x <= 17; increase with discharge range
addis = tapply(log(fitdata$discharge),
substr(fitdata$solar.time,1,10), mean)
modspecs$K600_lnQ_nodes_centers = seq(from=min(addis, na.rm=TRUE),
to=max(addis, na.rm=TRUE), length.out=n_KQ_nodes)
# #set mean log K for K v. Q relationship (from empirical relationship)
# av_veloc = 0.27
# av_slope = 0.005
# av_depth = 1.95
# av_disch = 1.94
# logK = log(5.62) + 0.504*log(av_veloc * av_slope) -
# 0.575*log(av_depth) - 0.0892*log(av_disch)
# modspecs$K600_lnQ_nodes_meanlog = rep(logK, n_KQ_nodes)
#
# #set sd for K prior (0.1 if you've measured K, 0.7 if using above
# #equation, 1 if agnostic)
# modspecs$K600_lnQ_nodes_sdlog = rep(0.8, n_KQ_nodes)
#
# #set daily bounce (how much K can vary by day; 0.7 <= x <= 2.4)
# modspecs$K600_daily_sigma_sigma = 1.5
}
#fit model
model_fit = streamMetabolizer::metab(specs=modspecs, data=fitdata)
# saveRDS(model_fit, '~/Desktop/NHC_fit.rds')
# model_fit = readRDS('~/Desktop/test_modelfit.rds')###COMMENT THIS####
}else if(model=="BASE"){
tmp = tempdir() # the temp dir for the data and model
if(!dir.exists(tmp)) dir.create(tmp) # create if does not exist
# create BASE directory
directory = file.path(tmp,"BASE")
if(!dir.exists(directory)){
dir.create(directory)
# add input folder
dir.create(file.path(directory,"input"))
# add output folder
dir.create(file.path(directory,"output"))
# - add instantaneous rates folder
dir.create(file.path(directory,"output","instantaneous rates"))
# - add validation plots folder
dir.create(file.path(directory,"output","validation plots"))
}
# download BASE_metab_model_v2.2.txt
download.file("https://raw.githubusercontent.com/streampulse/BASE/master/BASE/BASE_metab_model_v2.2.txt",
file.path(directory,"BASE_metab_model_v2.2.txt"), quiet=TRUE)
file.remove(list.files(file.path(directory,"input"),full.names=T)) # clear out input folder
fitdata = split(fitdata, fitdata$Date)
# write individual date base files
lapply(fitdata, function(xx){
if(nrow(xx)==96 && all(complete.cases(xx))){ # only full days
date = unique(xx$Date)[1]
write.csv(xx, file=paste0(directory,"/input/BASE_",date,".csv"), row.names=F)
}
})
cat("Estimating metabolism with BASE.\n")
# found in BASE_functions.R
fit_BASE(directory=directory, interval=900, n.iter=30000, n.burnin=15000)
# structure(list(output_directory = directory), class="BASE") # return the BASE directory with class BASE
model_fit = list(output_directory=directory)
attr(model_fit, 'class') = 'BASE'
}
#extract predictions from fit object (this block was formerly a
# separate predict_metabolism function)
if(class(model_fit)=="BASE"){
directory = model_fit$output_directory
preds = read.csv(paste0(directory,"/output/BASE_results.csv")) %>%
tidyr::separate(File, c("fileX", "date", "extX"), "_|\\.") %>%
select(-fileX, -extX) %>% mutate(date=as.Date(date))
#develop stuff here if we ever use BASE again
}else{
predictions = streamMetabolizer::predict_metab(model_fit)
output = list(predictions=predictions, fit=model_fit)
}
#extract data related to current model run and performance
deets = extract_model_details(model_fit, predictions, d$specs)
output$details = deets
#if model covers <= one calendar year, it's eligible for inclusion
#on the data portal; otherwise just return results to user
mod_startyr = substr(d$specs$startdate, 1, 4)
mod_endyr = substr(d$specs$enddate, 1, 4)
if(mod_startyr != mod_endyr){
cat(paste('Done. Returning your model fit and predictions.\n'))
output$details$current_best = NULL
return(output)
}
#assemble retrieval API call for the current best model's details
if(! is_powell){
cat(paste0('Checking StreamPULSE database for model results to\n\t',
'compare with the model you just fit.\n'))
u = paste0("https://data.streampulse.org/api/model_details_download?region=",
# u = paste0("localhost:5000/api/model_details_download?region=",
d$specs$region, "&site=", d$specs$site, "&year=", mod_startyr)
#retrieve details for the current best model
if(d$specs$token == 'none'){
# r = httr::GET(u)
tryCatch(R.utils::withTimeout(r <- httr::GET(u),
timeout=12, onTimeout='error'),
error=function(e){
warning(paste('Could not reach StreamPULSE.',
'Check your internet connection.'), call.=FALSE)
})
} else {
# r = httr::GET(u, httr::add_headers(Token=d$specs$token))
tryCatch(R.utils::withTimeout(r <- httr::GET(u,
httr::add_headers(Token=d$specs$token)), timeout=12,
onTimeout='error'),
error=function(e){
warning(paste('Could not reach StreamPULSE.',
'Check your internet connection.'), call.=FALSE)
})
}
if(exists('r')){
json = httr::content(r, as="text", encoding="UTF-8")
modspec = try(jsonlite::fromJSON(json), silent=TRUE)
}
#check for errors
if(!exists('modspec') || class(modspec) == 'try-error'){
cat(paste0('Failed to retrieve data from StreamPULSE.\n\t',
'Returning your model fit and predictions.\n'))
output$details$current_best = NULL
return(output)
}
# if(length(modspec) == 1 && ! is.null(modspec$error)){
# return(output) #this line should never run; just here for later
# }
#if no model data on server, push this model up and return
if(length(modspec$specs) == 0){
if(! skip_prompts){
cat(paste0("No model fits detected for this site and calendar year",
",\n\tso yours is the best fit by default!\n\t",
"May we store your results on the StreamPULSE data portal?\n"))
user_response = get_user_input('y/n > ')
if(user_response){
cat('Thank you. Just a moment.\n')
push_model_to_server(output=output, deets=deets)
}
} else {
cat("Pushing your results to the StreamPULSE database\n")
push_model_to_server(output=output, deets=deets)
}
cat(paste0("Returning model fit and predictions.\n"))
output$details$current_best = NULL
return(output)
}
#compare this model to the current best
this_model_pen = get_model_penalty(deets)
best_model_pen = get_model_penalty(modspec$specs)
mods_equal = this_model_pen == best_model_pen
pen_dif = best_model_pen - this_model_pen
coverage_dif = deets$coverage - modspec$specs$coverage
#if mod penalties equal, compare coverage. if better, push. return.
if(mods_equal){
if(coverage_dif > 0){ #this model has better coverage
if(! skip_prompts){
cat(paste0("Your model outperformed the best one on file!\n\t",
"May we store your results on the StreamPULSE data portal?\n"))
user_response = get_user_input('y/n > ')
if(user_response){
cat('Thank you. Just a moment.\n')
push_model_to_server(output=output, deets=deets)
}
} else {
cat(paste0("Your model outperformed the best one on file,\n\t",
"but skip_prompts == TRUE, so your results will not\n\t",
"be pushed to the StreamPULSE server.\n"))
}
}
cat(paste0("Returning model fit and predictions.\n"))
output$details$current_best = NULL
return(output)
}
#COVERAGE SHOULD NOT BE BASED SOLELY ON START AND END DATE, BUT ALSO NA!
#if penalties differ, evaluate penalty and coverage differences
accept = compare_models(pen_dif, coverage_dif)
# accept=TRUE ####COMMENT THIS####
if(accept){
if(! skip_prompts){
cat(paste0("Your model outperformed the best one on file!\n\t",
"May we store your results on the StreamPULSE data portal?\n"))
user_response = get_user_input('y/n > ')
if(user_response){
cat('Thank you. Just a moment.\n')
push_model_to_server(output=output, deets=deets)
}
} else {
cat(paste0("Your model outperformed the best one on file,\n\t",
"but skip_prompts == TRUE, so your results will not\n\t",
"be pushed to the StreamPULSE server.\n"))
}
cat(paste0("Returning model fit and predictions.\n"))
output$details$current_best = NULL
return(output)
}
#if here, model did did not outperform current best on server
cat(paste0("Returning model fit and predictions.\n"))
} else {
cat(paste('Done. Returning your model fit and predictions.\n'))
cat(paste0('\tNOTE: User-generated models based on Powell Center data\n\t',
'are not eligible for inclusion on the StreamPULSE server\n'))
}
output$details$current_best = NULL
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.