#' @title General Model Fitting Functon
#'
#' @param model A character vector of length one. The name of a model, it has to
#' match one of the model names in the "estimation.functions" list objects.
#' which is generated from the script R/Module_Sub_EstimationFunctions.R.
#' @param data Same as the data element from the list object generated by
#' prepData()
#' @param settings A list. Model-specific list of settings, where applicable.
#' @param tracing A Boolean. Default is FALSE.
#'
#' @description This function applies the estimation step for the various
#' models. It sets up the data inputs (e.g. loop through age classes, ) and
#' calls the estimation subroutine then stores the output in a list object.
#' Model-specific sub-routines live in the file
#' "Module_Sub_EstimationFunctions.R".
#'
#' @details FOR NOW: JUST MAKING THIS WORK WITH THE BASIC SIBLING REGRESSION AND
#' KALMAN FILTER SIBLING REGRESSION
#'
#' @return A list.
#' @export
#'
#' @examples
fitModel <- function(model= c("Naive", "ReturnRate", "Mechanistic", "SibRegSimple","SibRegKalman","SibRegLogPower","TimeSeriesArima","TimeSeriesExpSmooth"),
data = NULL, settings = NULL,tracing=FALSE){
# Check inputs
model <- match.arg(model)
# if(!(model %in% c("Naive","SibRegSimple","SibRegKalman","SibRegLogPower","TimeSeriesArima","TimeSeriesExpSmooth"))){
# warning("Specified model not part of current inventory")
# stop()
# }
# any other check needed here?
# create empty list to store outputs
out.list <- list()
# NAIVE FIT VARIATIONS
# Note this section is largely identical to the sibreg variations.
# The only differences are:
# - here the naive fit is applied to all age classes, not just the youngest
# - the year range for the running avg HAS TO BE specified as an input setting.
if(model %in% c("Naive")){
# Note: This uses "v2" of the output labels from prepData(). The code below does
# not automatically detect the column labels (i.e. "Run_Year" is hardwired)
if(tracing){print("starting naive variations -------------------")}
#replacing stop() with return(NULL) will stop the function, not put it into debug mode
#print("flag1")
if(is.null(settings) | !("avg.yrs" %in% names(settings))){print("flag2");warning("avg.yrs for naive model not specified"); stop()}
if(tracing){print("starting data reorg for naive fits")}
age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)
# for now this handles the "noage" version, need to test to ensure robustness
# also: should be able to combine the 2 versions into 1 generic, but for now just make it work
if(any(is.na(ages))){
data.in <- data[["Total"]][,2] # FIX to dynamic col label
names(data.in) <- data[["Total"]][,1] # FIX to dynamic col label
out.list[["Total"]] <- estimation.functions[[model]]$estimator(model.data = data.in,avg.yrs=settings$avg.yrs)
} # end if no age classes
if(!any(is.na(ages))){ # if have age classes, loop through them
for(age.do in ages){
if(tracing){print(paste("starting age",age.do))}
data.in <- data[[paste(age.prefix,age.do,sep="")]][,3] # FIX to dynamic col label
names(data.in) <- data[[paste(age.prefix,age.do,sep="")]][,1] # FIX to dynamic col label
out.list[[paste(age.prefix,age.do,sep="")]] <- estimation.functions[[model]]$estimator(model.data = data.in,avg.yrs=settings$avg.yrs)
}} # end looping through age classes if have them
} # end if naive variation
if(model %in% c("ReturnRate")){
if(tracing){print("starting return rate model")}
#check if there are settings
if(is.null(settings)) {warning("required settings avg, pred.label, and last.n for rate model not specified. Using default values")
avg.use <- "wtmean"
pred.label.use <- NULL
last.n.use <- NULL
}
if(!is.null(settings)) {
avg.use <- settings$avg
pred.label.use <- settings$pred.label
last.n.use <- settings$last.n
}
# too many variations, so just forcing all or nothing settings
#if(!is.null(settings)){
# if(!("avg" %in% names(settings))){warning("avg for rate model not specified. Using default = wtmean")}
# if(!("pred.label" %in% names(settings))){warning("pred.label for rate model not specified.Using default = first Pred_ column from left")}
# if(!("last.n" %in% names(settings)) ){ warning("last.n for rate model not specified.Using default = all years")}
# # Need to decide how to handle last.n vs missing years, (as per https://github.com/MichaelFolkes/forecastR_package/issues/15)
# # then build in check and warning accordingly
# } # end checking settings
if(tracing){print("starting data reorg for rate fits")}
age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)
# for now this handles the "noage" version, need to test to ensure robustness
# also: should be able to combine the 2 versions into 1 generic, but for now just make it work
if(any(is.na(ages))){
data.in <- data[["Total"]][,c("Run_Year","Total",pred.label.use)]
# check the data
dt.chk <- rate.datacheck(data.use = data.in, pred.label = pred.label.use, tracing=FALSE)
if(!dt.chk$var.check){warning("selected predictor variable not in data set"); stop()}
if(!dt.chk$yrs.check & !is.null(last.n.use)){warning("last.n specified, but missing years in data"); stop()}
out.list[["Total"]] <- estimation.functions[[model]]$estimator(data.use = data.in,avg=avg.use, pred.label = pred.label.use, last.n = last.n.use)
} # end if no age classes
if(!any(is.na(ages))){ # if have age classes, loop through them
for(age.do in ages){
if(tracing){print(paste("starting age",age.do))}
data.in <- data[[paste(age.prefix,age.do,sep="")]][,c("Run_Year",paste(gsub(" ","_",age.prefix),age.do,sep=""),pred.label.use)]
if(tracing){print(data.in)}
# check the data
dt.chk <- rate.datacheck(data.use = data.in, pred.label = pred.label.use, tracing=FALSE)
if(!dt.chk$var.check){warning("selected predictor variable not in data set"); stop()}
if(!dt.chk$yrs.check & !is.null(last.n.use)){warning("last.n specified, but missing years in data"); stop()}
out.list[[paste(age.prefix,age.do,sep="")]] <- estimation.functions[[model]]$estimator(data.use = data.in,avg=avg.use, pred.label = pred.label.use, last.n = last.n.use)
}} # end looping through age classes if have them
}#END Rate
#SIBLING REGRESSION VARIATIONS
if(model %in% c("SibRegSimple","SibRegKalman","SibRegLogPower")){
# if doing any of the sibling regressions, fit the specified model to all age classes
# except the youngest. For the youngest age class, use N5 -> eventually convert this into a user choice
# Note: to be consistent with old code, the sibreg variations use
# "v2" of the output labels from prepData(). The code below does not
# automatically detect the column labels (i.e. "Run_Year" is hardwired)
if(tracing){print("starting sibling regression variations -------------------")}
if(tracing){print("starting data reorg for sibling regressions")}
age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)
if(any(is.na(ages))){ warning("Input data does not have age classes. Cannot do sibling regressions!");stop()}
# do the youngest age class (only 1 pass through, but keeps the variable assignment contained)
for(age.do in ages[1]){
if(tracing){print(paste("starting age",age.do))}
data.in <- data[[paste(age.prefix,age.do,sep="")]][,3] # FIX to dynamic col label
names(data.in) <- data[[paste(age.prefix,age.do,sep="")]][,1] # FIX to dynamic col label
out.list[[paste(age.prefix,age.do,sep="")]] <- estimation.functions[["Naive"]]$estimator(model.data = data.in,avg.yrs=5)
}
# do the "not youngest" age classes
for(age.do in ages[-1]){
if(tracing){print(paste("starting age",age.do))}
# match up the run years
age.y <- data[[paste(age.prefix,age.do,sep="")]] # response age class
age.x <- data[[paste(age.prefix,age.do-1,sep="")]] # predictor age class
# need this to handle inputs where have different starting brood years for age classes
# (e.g. after trimming data by run year using slide in GUI)
yrs.use.y <- age.y[,"Run_Year"] # run years available for the older age class
yrs.use.x <- yrs.use.y-1 # run years available for th eyounger age class
# only keep the ones that are available
yrs.use.x <- yrs.use.x[yrs.use.x %in% age.x[,"Run_Year"]] # check which of the "required" years are available for the younger age
yrs.use.y <- yrs.use.y[yrs.use.y %in% (yrs.use.x+1)]
# debug testing
#print("yrs.x")
#print(yrs.use.x)
#print("yrs.y")
#print(yrs.use.y)
data.in <- data.frame(y = age.y[age.y[,"Run_Year"] %in% (yrs.use.y),paste("Age_",age.do,sep="")],
x = age.x[age.x[,"Run_Year"] %in% yrs.use.x,paste("Age_",age.do-1,sep="")])
names(data.in) <- paste("Age",c(age.do,age.do-1),sep="")
if(tracing){ print(yrs.match); print(data.in)}
# plan is that all the sibreg models are identical before and after this step
# only the estimator code differs (and the output will have some model-specific list elements)
out.list[[paste(age.prefix,age.do,sep="")]] <- c(estimation.functions[[model]]$estimator(data.in,settings=settings) ,list(run.yrs = yrs.use.y))
}
} # end if sibling regression variation
if(tolower(model) == "complexsibreg"){
#browser()
age.classes <- names(data)
}#END if(tolower(model) == "complexsibreg"){
#TIME SERIES VARIATIONS
# Note this section is largely identical to the other variations.
# The only differences are:
# - TBI
if(model %in% c("TimeSeriesArima","TimeSeriesExpSmooth")){
# Note: This uses "v2" of the output labels from prepData(). The code below does not
# automatically detect the column labels (i.e. "Run_Year" is hardwired)
if(tracing){print("starting time series variations -------------------")}
if(tracing){print("starting data reorg for time series fits")}
age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)
# for now this handles the "noage" version, need to test to ensure robustness
# also: should be able to combine the 2 versions into 1 generic, but for now just make it work
if(any(is.na(ages))){
data.in <- data[["Total"]][,2] # FIX to dynamic col label
names(data.in) <- data[["Total"]][,1] # FIX to dynamic col label
out.list[["Total"]] <- c(estimation.functions[[model]]$estimator(model.data = data.in,settings = settings),
list(run.yrs = data[["Total"]][,1]))
} # end if no age classes
if(!any(is.na(ages))){ # if have age classes, loop through them
for(age.do in ages){
if(tracing){print(paste("starting age",age.do))}
data.in <- data[[paste(age.prefix,age.do,sep="")]][,3] # FIX to dynamic col label (col 3 has the values)
# note: auto.arima assumes all the values have the same interval and no missing intervals.
# => should handle this in the data check function for ARIMA. For now just made a note there
out.list[[paste(age.prefix,age.do,sep="")]] <- c(estimation.functions[[model]]$estimator(model.data = data.in,settings = settings),
list(run.yrs = data[[paste(age.prefix,age.do,sep="")]][,1]))
}} # end looping through age classes if have them
} # end if time series variation
#calculate performance measure summary
pm.out <- resids.pm(out.list,type="fitted")
out.list <- c(out.list,list(fitted.pm=pm.out))
return(out.list)
}# end fit.FC()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.