#' @title General Model Fitting Function
#'
#' @param model A character vector of length one. The name of a model, it has to
#' match one of the model names in the "estimation.functions" list objects.
#' which is generated from the script R/Module_Sub_EstimationFunctions.R.
#' @param data Same as the data element from the list object generated by
#' prepData()
#' @param data.sibreg Same as the sibling regression input element *sibreg.in* from the
#' list object generated by prepData()
#' @param settings A list. Model-specific list of settings, where applicable.
#' @param tracing A Boolean. Default is FALSE.
#'
#' @description This function applies the estimation step for the various
#' models. It sets up the data inputs (e.g. loop through age classes, ) and
#' calls the estimation subroutine then stores the output in a list object.
#' Model-specific sub-routines live in the file
#' "Module_Sub_EstimationFunctions.R".
#'
#' @details FOR NOW: JUST MAKING THIS WORK WITH THE BASIC SIBLING REGRESSION AND
#' KALMAN FILTER SIBLING REGRESSION
#'
#' @return A list.
#' @export
#'
#' @examples
fitModel <- function(model= c("Naive", "ReturnRate", "Mechanistic",
"SibRegSimple","SibRegKalman","SibRegLogPower","SibRegPooledSimple","SibRegPooledLogPower", "SibRegComplex",
"TimeSeriesArima","TimeSeriesExpSmooth", "NoAgeCovar"),
data = NULL, data.sibreg = NULL, settings = NULL,tracing=FALSE){
# Check inputs
model <- match.arg(model)
# if(!(model %in% c("Naive","SibRegSimple","SibRegKalman","SibRegLogPower","TimeSeriesArima","TimeSeriesExpSmooth"))){
# warning("Specified model not part of current inventory")
# stop()
# }
# any other check needed here?
# create empty list to store outputs
out.list <- list()
# NAIVE FIT VARIATIONS
# Note this section is largely identical to the sibreg variations.
# The only differences are:
# - here the naive fit is applied to all age classes, not just the youngest
# - the year range for the running avg HAS TO BE specified as an input setting.
if(model %in% c("Naive")){
# Note: This uses "v2" of the output labels from prepData(). The code below does
# not automatically detect the column labels (i.e. "Run_Year" is hardwired)
if(tracing){print("starting naive variations -------------------")}
#replacing stop() with return(NULL) will stop the function, not put it into debug mode
#print("flag1")
if(is.null(settings) | !("avg.yrs" %in% names(settings))){print("flag2");warning("avg.yrs for naive model not specified"); stop()}
if(tracing){print("starting data reorg for naive fits")}
age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)
# for now this handles the "noage" version, need to test to ensure robustness
# also: should be able to combine the 2 versions into 1 generic, but for now just make it work
if(any(is.na(ages))){
data.in <- data[["Total"]][,2] # FIX to dynamic col label
names(data.in) <- data[["Total"]][,1] # FIX to dynamic col label
out.list[["Total"]] <- estimation.functions[[model]]$estimator(model.data = data.in,avg.yrs=settings$avg.yrs)
} # end if no age classes
if(!any(is.na(ages))){ # if have age classes, loop through them
for(age.do in ages){
if(tracing){print(paste("starting age",age.do))}
data.in <- data[[paste(age.prefix,age.do,sep="")]][,3] # FIX to dynamic col label
names(data.in) <- data[[paste(age.prefix,age.do,sep="")]][,1] # FIX to dynamic col label
out.list[[paste(age.prefix,age.do,sep="")]] <- estimation.functions[[model]]$estimator(model.data = data.in,avg.yrs=settings$avg.yrs)
}} # end looping through age classes if have them
} # end if naive variation
if(model %in% c("ReturnRate")){
if(tracing){print("starting return rate model")}
#check if there are settings
if(is.null(settings)) {warning("required settings avg, pred.label, and last.n for rate model not specified. Using default values")
avg.use <- "wtmean"
pred.label.use <- NULL
last.n.use <- NULL
}
if(!is.null(settings)) {
avg.use <- settings$avg
pred.label.use <- settings$pred.label
last.n.use <- settings$last.n
}
# too many variations, so just forcing all or nothing settings
#if(!is.null(settings)){
# if(!("avg" %in% names(settings))){warning("avg for rate model not specified. Using default = wtmean")}
# if(!("pred.label" %in% names(settings))){warning("pred.label for rate model not specified.Using default = first Pred_ column from left")}
# if(!("last.n" %in% names(settings)) ){ warning("last.n for rate model not specified.Using default = all years")}
# # Need to decide how to handle last.n vs missing years, (as per https://github.com/MichaelFolkes/forecastR_package/issues/15)
# # then build in check and warning accordingly
# } # end checking settings
if(tracing){print("starting data reorg for rate fits")}
age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)
# for now this handles the "noage" version, need to test to ensure robustness
# also: should be able to combine the 2 versions into 1 generic, but for now just make it work
if(any(is.na(ages))){
data.in <- data[["Total"]][,c("Run_Year","Total",pred.label.use)]
# check the data
dt.chk <- rate.datacheck(data.use = data.in, pred.label = pred.label.use, tracing=FALSE)
if(!dt.chk$var.check){warning("selected predictor variable not in data set"); stop()}
if(!dt.chk$yrs.check & !is.null(last.n.use)){warning("last.n specified, but missing years in data"); stop()}
out.list[["Total"]] <- estimation.functions[[model]]$estimator(data.use = data.in,avg=avg.use, pred.label = pred.label.use, last.n = last.n.use)
} # end if no age classes
if(!any(is.na(ages))){ # if have age classes, loop through them
for(age.do in ages){
if(tracing){print(paste("starting age",age.do))}
data.in <- data[[paste(age.prefix,age.do,sep="")]][,c("Run_Year",paste(gsub(" ","_",age.prefix),age.do,sep=""),pred.label.use)]
if(tracing){print(data.in)}
# check the data
dt.chk <- rate.datacheck(data.use = data.in, pred.label = pred.label.use, tracing=FALSE)
if(!dt.chk$var.check){warning("selected predictor variable not in data set"); stop()}
if(!dt.chk$yrs.check & !is.null(last.n.use)){warning("last.n specified, but missing years in data"); stop()}
out.list[[paste(age.prefix,age.do,sep="")]] <- estimation.functions[[model]]$estimator(data.use = data.in,avg=avg.use, pred.label = pred.label.use, last.n = last.n.use)
}} # end looping through age classes if have them
}#END Rate
#SIBLING REGRESSION VARIATIONS
if(model %in% c("SibRegSimple","SibRegKalman","SibRegLogPower","SibRegPooledSimple","SibRegPooledLogPower",
"SibRegComplex")){
# if doing any of the sibling regressions, fit the specified model to all age classes
# except the youngest. For the youngest age class, use N5 -> eventually convert this into a user choice
# Note: to be consistent with old code, the sibreg variations use
# "v2" of the output labels from prepData(). The code below does not
# automatically detect the column labels (i.e. "Run_Year" is hardwired)
if(tracing){print("starting sibling regression variations -------------------")}
if(tracing){print("starting data reorg for sibling regressions")}
age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)
if(any(is.na(ages))){ warning("Input data does not have age classes. Cannot do sibling regressions!");stop()}
# do the youngest age class (only 1 pass through, but keeps the variable assignment contained)
for(age.do in ages[1]){
if(tracing){print(paste("starting age",age.do))}
data.in <- data[[paste(age.prefix,age.do,sep="")]][,3] # FIX to dynamic col label
names(data.in) <- data[[paste(age.prefix,age.do,sep="")]][,1] # FIX to dynamic col label
out.list[[paste(age.prefix,age.do,sep="")]] <- estimation.functions[["Naive"]]$estimator(model.data = data.in,avg.yrs=5)
}
# do the "not youngest" age classes
for(age.do in ages[-1]){
if(tracing){print(paste("starting age",age.do,"--------------------"))}
# old version, before prepSibRegData()
#if(!grepl("Pool",model )){
#if(FALSE){
# match up the run years
#age.y <- data[[paste(age.prefix,age.do,sep="")]] # response age class
#age.x <- data[[paste(age.prefix,age.do-1,sep="")]] # predictor age class
# need this to handle inputs where have different starting brood years for age classes
# (e.g. after trimming data by run year using slide in GUI)
#yrs.use.y <- age.y[,"Run_Year"] # run years available for the older age class
#yrs.use.x <- yrs.use.y-1 # run years available for th eyounger age class
# only keep the ones that are available
#yrs.use.x <- yrs.use.x[yrs.use.x %in% age.x[,"Run_Year"]] # check which of the "required" years are available for the younger age
#yrs.use.y <- yrs.use.y[yrs.use.y %in% (yrs.use.x+1)]
# debug testing
#print("yrs.x")
#print(yrs.use.x)
#print("yrs.y")
#print(yrs.use.y)
#data.in <- data.frame(y = age.y[age.y[,"Run_Year"] %in% (yrs.use.y),paste("Age_",age.do,sep="")],
# x = age.x[age.x[,"Run_Year"] %in% yrs.use.x,paste("Age_",age.do-1,sep="")])
#names(data.in) <- paste("Age",c(age.do,age.do-1),sep="")
#} # end excluded old version
#} # end if NOT pooled old version
# print(data.sibreg)
if(! grepl("Complex",model)){
# the complex model with covariates needs a different source for the input
# NOT POOLED
# this generates a string like "Age_4"
if(!grepl("Pool",model )){ pred.col <- paste0("Age_",age.do-1) }
# POOLED
# this generates a string like "Pooled_4_3" with numbers depending on age.do and max.pool
if(grepl("Pool",model )){
num.pool <- min(age.do- min(ages), settings$max.pool) #adjust how many age classes are pooled if have fewer than max.pool sibling ages
if(num.pool >1) {pool.label<-"Pooled_"} else {pool.label<- "Age_"} # if have only 1, switch "Pooled" to "Age"
pred.col <- paste0(pool.label,paste(age.do - c(1:num.pool),collapse = "_"))
}
# extract the identified predictor col from the new sibreg data frame
# need to exclude NA, because brood year coverage differ by age class
if(tracing){print("predictor label");print(pred.col)}
data.sub <- data.frame(br.yr = data.sibreg$Brood_Year,
y = data.sibreg[, paste0("Age_",age.do)],
x = data.sibreg[, pred.col ]) %>%
drop_na()
#print("source: data.sibreg")
#print(head(data.sibreg))
#print("sibreg input (not complex)")
#print(head(data.sub))
data.in <- data.sub %>% select(y,x)
yrs.use.y <- data.sub[,"br.yr"] + age.do # for consistency with old version, needed downstream
# Changed from brood year to run year, need to verify
} # end data prep if sibreg, but not complex
# set up the data for the complex sibreg
if(grepl("Complex",model)){
# add the previous age to the df for the forecasted age df which also has the covariates
# then standardize the covariates to mean = 0, sd =1
age.fc.label <- paste(age.prefix,age.do,sep="")
age.predictor.label <- paste(age.prefix,age.do-1,sep="")
pred.col <- gsub(" ","_",age.predictor.label)
complex.in.df <- data[[age.fc.label]] %>% left_join(data[[age.predictor.label]] %>%
select(Brood_Year,all_of(pred.col)), by= "Brood_Year")
complex.in.df[,grepl("Cov_",names(complex.in.df))] <- transformDF(X = complex.in.df[,grepl("Cov_",names(complex.in.df)),drop=FALSE],
type = "standardize", logfirst = FALSE)
#print(test.df)
#stop()
data.in <- complex.in.df
yrs.use.y <- data.in[,"Run_Year"] # for consistency with old version, needed downstream
# Changed from brood year to run year, need to verify
settings$base.eq = paste(gsub(" ","_",age.fc.label), "~ -1 +", pred.col)
#print(settings)
} # end data prep if complex sibreg
if(tracing){print("sibreg data.in");print(data.in)}
# plan is that all the sibreg models are identical before and after this step
# only the estimator code differs (and the output will have some model-specific list elements)
out.list[[paste(age.prefix,age.do,sep="")]] <- c(estimation.functions[[model]]$estimator(data.in,settings=settings) ,
list(run.yrs = yrs.use.y,sib.pred.used = pred.col))
} # end looping through age classes
} # end if simple or pooled sibling regression variation
if(tolower(model) == "complexsibreg"){
#browser()
age.classes <- names(data)
}#END if(tolower(model) == "complexsibreg"){
#TIME SERIES VARIATIONS
# Note this section is largely identical to the other variations.
# The only differences are:
# - TBI
if(model %in% c("TimeSeriesArima","TimeSeriesExpSmooth")){
# Note: This uses "v2" of the output labels from prepData(). The code below does not
# automatically detect the column labels (i.e. "Run_Year" is hardwired)
if(tracing){print("starting time series variations -------------------")}
if(tracing){print("starting data reorg for time series fits")}
age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)
# for now this handles the "noage" version, need to test to ensure robustness
# also: should be able to combine the 2 versions into 1 generic, but for now just make it work
if(any(is.na(ages))){
data.in <- data[["Total"]][,2] # FIX to dynamic col label
names(data.in) <- data[["Total"]][,1] # FIX to dynamic col label
out.list[["Total"]] <- c(estimation.functions[[model]]$estimator(model.data = data.in,settings = settings),
list(run.yrs = data[["Total"]][,1]))
} # end if no age classes
if(!any(is.na(ages))){ # if have age classes, loop through them
for(age.do in ages){
if(tracing){print(paste("starting age",age.do))}
data.in <- data[[paste(age.prefix,age.do,sep="")]][,3] # FIX to dynamic col label (col 3 has the values)
# note: auto.arima assumes all the values have the same interval and no missing intervals.
# => should handle this in the data check function for ARIMA. For now just made a note there
out.list[[paste(age.prefix,age.do,sep="")]] <- c(estimation.functions[[model]]$estimator(model.data = data.in,settings = settings),
list(run.yrs = data[[paste(age.prefix,age.do,sep="")]][,1]))
}} # end looping through age classes if have them
} # end if time series variation
# NO Age Data with Covariate model
# Note this section is largely identical to the other variations.
# The only differences are:
# - TBI
if(model %in% c("NoAgeCovar")){
# Note: This uses "v2" of the output labels from prepData(). The code below does not
# automatically detect the column labels (i.e. "Run_Year" is hardwired)
if(tracing){print("starting noage with covar -------------------")}
if(tracing){print("starting data reorg for noage with covar")}
age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)
# if(any(is.na(ages))){
data.in <- data[["Total"]]
out.list[["Total"]] <- c(estimation.functions[[model]]$estimator(data.in,settings = settings),
list(run.yrs = data[["Total"]][,1]))
print(names(out.list[["Total"]] ))
print(out.list[["Total"]]$fitted.values )
print(out.list[["Total"]]$obs.values )
print(out.list[["Total"]]$fitted.values - out.list[["Total"]]$obs.values)
# } # end if no age classes
# SET IT UP TO WORK WITH AGE CLASS DATA AS WELL?
} # end if no age with covar
#calculate performance measure summary
pm.out <- resids.pm(out.list,type="fitted")
out.list <- c(out.list,list(fitted.pm=pm.out),list(settings = settings))
return(out.list)
}# end fit.FC()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.