forecastR_package: Forecast stuff

Documented in fitModel

#' @title General Model Fitting Functon
#'
#' @param model A character vector of length one. The name of a model, it has to
#'   match one of the model names in the "estimation.functions" list objects.
#'   which is generated from the script R/Module_Sub_EstimationFunctions.R.
#' @param data Same as the data element from the list object generated by
#'   prepData()
#' @param settings A list. Model-specific list of settings, where applicable.
#' @param tracing A Boolean. Default is FALSE.
#'
#' @description This function applies the estimation step for the various
#'   models. It sets up the data inputs (e.g. loop through age classes, ) and
#'   calls the estimation subroutine then stores the output in a list object.
#'   Model-specific sub-routines live in the file
#'   "Module_Sub_EstimationFunctions.R".
#'
#' @details FOR NOW: JUST MAKING THIS WORK WITH THE BASIC SIBLING REGRESSION AND
#'   KALMAN FILTER SIBLING REGRESSION
#'
#' @return A list.
#' @export
#'
#' @examples
fitModel <- function(model= c("Naive", "ReturnRate", "Mechanistic", "SibRegSimple","SibRegKalman","SibRegLogPower","TimeSeriesArima","TimeSeriesExpSmooth"),
										 data = NULL, settings = NULL,tracing=FALSE){
# Check inputs
model <- match.arg(model)

# if(!(model %in% c("Naive","SibRegSimple","SibRegKalman","SibRegLogPower","TimeSeriesArima","TimeSeriesExpSmooth"))){
# 					warning("Specified model not part of current inventory")
# 					stop()
# 					}

# any other check needed here?


#  create empty list to store outputs
out.list <- list()


# NAIVE FIT VARIATIONS
# Note this section is largely identical to the sibreg variations.
# The only differences are:
# - here the naive fit is applied to all age classes, not just the youngest
# - the year range for the running avg HAS TO BE specified as an input setting.

if(model %in%  c("Naive")){
# Note: This uses "v2" of the output labels from prepData(). The code below does
# not automatically detect the column labels (i.e. "Run_Year" is hardwired)


if(tracing){print("starting naive variations -------------------")}

#replacing stop() with return(NULL) will stop the function, not put it into debug mode
#print("flag1")
if(is.null(settings) | !("avg.yrs" %in% names(settings))){print("flag2");warning("avg.yrs for naive model not specified"); stop()}


if(tracing){print("starting data reorg for naive fits")}

age.classes <- names(data)

ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)

# for now this handles the "noage" version, need to test to ensure robustness
# also: should be able to combine the 2 versions into 1 generic, but for now just make it work
if(any(is.na(ages))){
	data.in <- data[["Total"]][,2] # FIX to dynamic col label
	names(data.in) <- data[["Total"]][,1] # FIX to dynamic col label
	out.list[["Total"]] <- estimation.functions[[model]]$estimator(model.data = data.in,avg.yrs=settings$avg.yrs)
} # end if no age classes


if(!any(is.na(ages))){  # if have age classes, loop through them
	for(age.do in ages){
		if(tracing){print(paste("starting age",age.do))}
		data.in <- data[[paste(age.prefix,age.do,sep="")]][,3] # FIX to dynamic col label
		names(data.in) <- data[[paste(age.prefix,age.do,sep="")]][,1] # FIX to dynamic col label
		out.list[[paste(age.prefix,age.do,sep="")]] <- estimation.functions[[model]]$estimator(model.data = data.in,avg.yrs=settings$avg.yrs)
}} # end looping through age classes if have them



} # end if naive variation


if(model %in%  c("ReturnRate")){


	if(tracing){print("starting return rate model")}

	#check if there are settings
	if(is.null(settings)) {warning("required settings avg, pred.label, and last.n for rate model not specified. Using default values")
		avg.use <- "wtmean"
		pred.label.use <- NULL
		last.n.use <- NULL
		}

	if(!is.null(settings)) {
		avg.use <- settings$avg
		pred.label.use <- settings$pred.label
		last.n.use <- settings$last.n
	}


  # too many variations, so just forcing all or nothing settings
	#if(!is.null(settings)){
	#	  if(!("avg" %in% names(settings))){warning("avg for rate model not specified. Using default = wtmean")}
	#		if(!("pred.label" %in% names(settings))){warning("pred.label for rate model not specified.Using default = first Pred_ column from left")}
	#	  if(!("last.n" %in% names(settings)) ){	warning("last.n for rate model not specified.Using default = all years")}
	#			# Need to decide how to handle last.n vs missing years, (as per https://github.com/MichaelFolkes/forecastR_package/issues/15)
	#    	# then build in check and warning accordingly
	#			} # end checking settings


	if(tracing){print("starting data reorg for rate fits")}

	age.classes <- names(data)

	ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
	age.prefix <- gsub(ages[1],"",age.classes[1])
	#print(age.prefix)

	# for now this handles the "noage" version, need to test to ensure robustness
	# also: should be able to combine the 2 versions into 1 generic, but for now just make it work
	if(any(is.na(ages))){
		data.in <- data[["Total"]][,c("Run_Year","Total",pred.label.use)]

		# check the data
		dt.chk <- rate.datacheck(data.use = data.in, pred.label = pred.label.use, tracing=FALSE)
		if(!dt.chk$var.check){warning("selected predictor variable not in data set"); stop()}
		if(!dt.chk$yrs.check & !is.null(last.n.use)){warning("last.n specified, but missing years in data"); stop()}

		out.list[["Total"]] <- estimation.functions[[model]]$estimator(data.use = data.in,avg=avg.use, pred.label = pred.label.use, last.n  = last.n.use)
	} # end if no age classes


	if(!any(is.na(ages))){  # if have age classes, loop through them
		for(age.do in ages){
			if(tracing){print(paste("starting age",age.do))}

			data.in <- data[[paste(age.prefix,age.do,sep="")]][,c("Run_Year",paste(gsub(" ","_",age.prefix),age.do,sep=""),pred.label.use)]
			if(tracing){print(data.in)}
   		# check the data
   		dt.chk <- rate.datacheck(data.use = data.in, pred.label = pred.label.use, tracing=FALSE)
   		if(!dt.chk$var.check){warning("selected predictor variable not in data set"); stop()}
   		if(!dt.chk$yrs.check & !is.null(last.n.use)){warning("last.n specified, but missing years in data"); stop()}

   		out.list[[paste(age.prefix,age.do,sep="")]] <- estimation.functions[[model]]$estimator(data.use = data.in,avg=avg.use, pred.label = pred.label.use, last.n  = last.n.use)


		}} # end looping through age classes if have them



}#END Rate


#SIBLING REGRESSION VARIATIONS

if(model %in%  c("SibRegSimple","SibRegKalman","SibRegLogPower")){
# if doing any of the sibling regressions, fit the specified model to all age classes
# except the youngest. For the youngest age class, use N5 -> eventually convert this into a user choice
# Note: to be consistent with old code, the sibreg variations use
# "v2" of the output labels from prepData(). The code below does not
# automatically detect the column labels (i.e. "Run_Year" is hardwired)


if(tracing){print("starting sibling regression variations -------------------")}


if(tracing){print("starting data reorg for sibling regressions")}

age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)

if(any(is.na(ages))){ warning("Input data does not have age classes. Cannot do sibling regressions!");stop()}


# do the youngest age class (only 1 pass through, but keeps the variable assignment contained)
for(age.do in ages[1]){
if(tracing){print(paste("starting age",age.do))}


data.in <- data[[paste(age.prefix,age.do,sep="")]][,3] # FIX to dynamic col label
names(data.in) <- data[[paste(age.prefix,age.do,sep="")]][,1] # FIX to dynamic col label

out.list[[paste(age.prefix,age.do,sep="")]] <- estimation.functions[["Naive"]]$estimator(model.data = data.in,avg.yrs=5)
}

# do the "not youngest" age classes

for(age.do in ages[-1]){
if(tracing){print(paste("starting age",age.do))}


# match up the run years
age.y <- data[[paste(age.prefix,age.do,sep="")]] # response age class
age.x <- data[[paste(age.prefix,age.do-1,sep="")]] # predictor age class


# need this to handle inputs where have different starting brood years for age classes
# (e.g. after trimming data by run year using slide in GUI)

yrs.use.y <- age.y[,"Run_Year"] # run years available for the older age class
yrs.use.x <- yrs.use.y-1    # run years available for th eyounger age class

# only keep the ones that are available
yrs.use.x <- yrs.use.x[yrs.use.x %in% age.x[,"Run_Year"]] # check which of the "required" years are available for the younger age
yrs.use.y <- yrs.use.y[yrs.use.y %in% (yrs.use.x+1)]

# debug testing
#print("yrs.x")
#print(yrs.use.x)
#print("yrs.y")
#print(yrs.use.y)

data.in <- data.frame(y  = age.y[age.y[,"Run_Year"] %in% (yrs.use.y),paste("Age_",age.do,sep="")],
				x = age.x[age.x[,"Run_Year"] %in% yrs.use.x,paste("Age_",age.do-1,sep="")])

names(data.in) <- paste("Age",c(age.do,age.do-1),sep="")


if(tracing){ print(yrs.match); print(data.in)}


# plan is that all the sibreg models are identical before and after this step
# only the estimator code differs (and the output will have some model-specific list elements)
out.list[[paste(age.prefix,age.do,sep="")]] <- c(estimation.functions[[model]]$estimator(data.in,settings=settings) ,list(run.yrs = yrs.use.y))


}


} # end if sibling regression variation


if(tolower(model) == "complexsibreg"){
  #browser()
age.classes <- names(data)


}#END if(tolower(model) == "complexsibreg"){



#TIME SERIES VARIATIONS
# Note this section is largely identical to the other variations.
# The only differences are:
# - TBI


if(model %in%  c("TimeSeriesArima","TimeSeriesExpSmooth")){
# Note: This uses "v2" of the output labels from prepData(). The code below does not
# automatically detect the column labels (i.e. "Run_Year" is hardwired)


if(tracing){print("starting time series variations -------------------")}


if(tracing){print("starting data reorg for time series fits")}

age.classes <- names(data)
ages <- as.numeric(gsub("\\D", "", age.classes)) # as per https://stat.ethz.ch/pipermail/r-help/2011-February/267946.html
age.prefix <- gsub(ages[1],"",age.classes[1])
#print(age.prefix)


# for now this handles the "noage" version, need to test to ensure robustness
# also: should be able to combine the 2 versions into 1 generic, but for now just make it work
if(any(is.na(ages))){
	data.in <- data[["Total"]][,2] # FIX to dynamic col label
	names(data.in) <- data[["Total"]][,1] # FIX to dynamic col label
	out.list[["Total"]] <- c(estimation.functions[[model]]$estimator(model.data = data.in,settings = settings),
													list(run.yrs = data[["Total"]][,1]))
} # end if no age classes


if(!any(is.na(ages))){  # if have age classes, loop through them
	for(age.do in ages){
		if(tracing){print(paste("starting age",age.do))}

		data.in <- data[[paste(age.prefix,age.do,sep="")]][,3] # FIX to dynamic col label (col 3 has the values)
		# note: auto.arima assumes all the values have the same interval and no missing intervals.
		# => should handle this in the  data check function for ARIMA. For now just made a note there

		out.list[[paste(age.prefix,age.do,sep="")]] <- c(estimation.functions[[model]]$estimator(model.data = data.in,settings = settings),
													list(run.yrs = data[[paste(age.prefix,age.do,sep="")]][,1]))
}} # end looping through age classes if have them



} # end if time series variation

#calculate performance measure summary

pm.out <- resids.pm(out.list,type="fitted")
out.list <- c(out.list,list(fitted.pm=pm.out))

return(out.list)

}# end fit.FC()