ForecastR-Package: Forecast stuff

Documented in doRetro

data.extract <- function(data,yrs,option="obs"){
# should merge this with the data.sub() routine
#browser()
if(option=="obs"){
# extracts observed values for the resid calcs
# in this case, yrs is a vector with all the years to be included ("Run_Year")

	obs.mat <- matrix(NA, nrow = length(yrs) , ncol = length(names(data)), dimnames = list(paste("Obs",yrs,sep=""),names(data)))
	# do this in a loop for now, but should transfer to apply-style
	for(age.use in names(data)){
		#print("--------")
		#print(age.use)

		data.sub <-  data[[age.use]]
		#print(data.sub)

		# Old version, got messed up with some new return rate input files
		# always have values in last col
		# (should work for withage and noage data
		#obs.mat[,age.use]	<- data.sub[data.sub[,"Run_Year"] %in% yrs,dim(data.sub)[2] ]

		obs.mat[,age.use]	<- data.sub[data.sub[,"Run_Year"] %in% yrs,grepl("Age_",names(data.sub)) | grepl("Total",names(data.sub)) ]

		#print(obs.mat)


		}


	if(length(names(data))>1){ obs.mat <- cbind(obs.mat,Total=rowSums(obs.mat))}
		# add total column if have morew than 1 age class
		# 1 age class typically means "Total" from noage data

	out.obj <- obs.mat

} # end if option = "obs"


if(option=="retro"){
# extracts data subset to feed into retrospective
# in this case, yrs is the last year to be included ("Run_Year")

		data.sub.list <- list()

		for(age.use in names(data)){
		data.sub <-  data[[age.use]]
		data.sub.list[[age.use]] <- data.sub[data.sub[,"Run_Year"] < yrs,]
		}

		out.obj <- data.sub.list

} # end if option = "retro"


#browser()
	return(out.obj)
}



#' @title Run a retrospective evaluation.
#'
#' @param model A character vector of length one. It is the model name and must match one of the model names in the \code{estimation.functions} list object.
#' @param data A data frame. Equivalent to the "data" element in the list output from \code{\link{prepData}}.
#' @param data.sibreg Same as the sibling regression input element *sibreg.in* from the list object generated by prepData()
#' @param predictors A list. Optional input needed for return rate model. Equal to the element named \code{predictors} from output of
#'   \code{\link{prepData}}. Default is NULL.
#' @param covariates A list. Optional input needed for covariate model. Equal to the element named \code{covariates} from output of
#'   \code{\link{prepData}}. Default is NULL.
#' @param retro.settings A list
#' @param fit.settings a model-specific list of settings, where applicable
#' @param fc.settings A list
#' @param tracing A Boolean (default is FALSE)
#' @param out.type A character vector of length one. Can have value "short" (default) or "full". See value for more information.
#' @param interval.n An integer. The sample size for generating the retro-based interval. Default is 500.
#' @param interval.quants A Boolean (default is TRUE). If TRUE the output includes quantiles of the retro-based interval, rather than the full sample.
#' @param pt.fc
#'
#' @return Key output (with out.type="short") from this function includes two summary tables with performance measures from the dynamic retrospective:  retro.pm.all: for each age class, use as many retrospective years as possible (i.e more for younger ages). retro.pm.bal: balanced output, using the same retrospective years for all age classes and the total. If out.type="full", then the output also includes the full model fit info for each retro year
#'
#' @export
#'
#' @examples
doRetro <- function(model= NULL, data = NULL, data.sibreg = NULL, predictors = NULL, covariates = NULL, retro.settings = NULL, fit.settings = NULL,
		fc.settings = NULL, tracing=FALSE,out.type = c("short", "full") , interval.n = 500,
		interval.quants = TRUE,pt.fc.in = NULL){
# this function runs through a retrospective test
# (i.e. dynamic retrospective, see https://github.com/avelez-espino/forecastR_phase4/wiki/App-2-Perf.-Eval.-Details)

# this function just creates subsets of the input data and loops
# through them to apply fitModel() and calcFC(). Therefore the
# inputs for this function include all the inputs needed for those
# calls.

# Note: Bootstrap intervals are currently not used in the retrospective test.
#       (i.e. in call to calcFC(), do.boot is hardwired as FALSE)


if(is.null(retro.settings)){  # if no settings are provided, use these defaults
	min.yrs <- 20
	#warning("using default retrospective settings in calcFC()")
	}

if(!is.null(retro.settings)){  # if settings are provided, use them
	min.yrs <- retro.settings$min.yrs
	}

data.yrs <-  sapply(data,yrs.extract) # all the run years in the data set by age

range.use <- data.yrs[,1]  # using range from youngest age class,
# see wiki page at https://github.com/avelez-espino/forecastR_phase4/wiki/App-2-Perf.-Eval.-Details

retro.yrs <-  seq(range.use[1] + min.yrs,range.use[2])    # all the fc.yr used in the retrospective,

ages.vec <- as.numeric(gsub("Age ","",names(data)))

if(length(names(data))>1){  # if have more than 1 age class
 retro.mat.fc <- matrix(NA, nrow = length(retro.yrs) , ncol = length(names(data))+1,dimnames = list(paste("FC",retro.yrs,sep=""),c(names(data),"Total")))}

if(length(names(data))==1){  # Typically 1 age class = "Total" (from noage input)
retro.mat.fc <- matrix(NA, nrow = length(retro.yrs) , ncol = 1, dimnames = list(paste("FC",retro.yrs,sep=""),names(data)) )}



if(out.type=="full"){retro.fits.list <- list()}


if(exists("fitted.pm.array")){rm(fitted.pm.array)}

for(fc.yr.retro in retro.yrs){
print("----------------------------------")
print(fc.yr.retro)
  data.use <- data.extract(data,yrs=fc.yr.retro,option="retro")
  
  if(!is.null(data.sibreg)){
  data.use.sibreg <- data.sibreg %>% dplyr::filter(Brood_Year < fc.yr.retro - max(ages.vec))
  				# TEMPORARY KLUDGE: SEE https://github.com/MichaelFolkes/forecastR_package/issues/32
          # only used in fitModel. calcFC subsets as needed anyway
	}
  if(is.null(data.sibreg)){
  data.use.sibreg <- NULL
	}


	
		  

  model.fitted <- fitModel(model= model, data = data.use, data.sibreg = data.use.sibreg,
  												 settings = fit.settings,tracing=FALSE)

  print("finished fitModel")
  #print(model.fitted)


 if(out.type=="full"){retro.fits.list[[paste0("FC",fc.yr.retro)]] <- model.fitted}

 if(!exists("fitted.pm.array")){
  fitted.pm.array <- array(NA, dim = c(dim(model.fitted$fitted.pm),length(retro.yrs)), dimnames = list(dimnames(model.fitted$fitted.pm)[[1]], dimnames(model.fitted$fitted.pm)[[2]], paste("FC",retro.yrs,sep=""))) }

 fitted.pm.array[,,paste("FC",fc.yr.retro,sep="")] <- model.fitted$fitted.pm

  print("starting calcFC")

  fc.calc <- calcFC(fit.obj= model.fitted,data = data.use,
  									data.sibreg = data.sibreg,
  									predictors =  predictors, covariates = covariates,
  									fc.yr= fc.yr.retro, settings = fc.settings,tracing=FALSE)

  print("finished calcFC")
  print(fc.calc)

  retro.mat.fc[paste("FC",fc.yr.retro,sep=""),] <- fc.calc$pt.fc
 }
#browser()
	#print("-----")
	#print(data)
	retro.mat.obs <- data.extract(data = data ,yrs = retro.yrs,option="obs" )

	#print("--retro.mat.fc--")
	#print(retro.mat.fc)

	#print("--retro.mat.obs--")
	#print(retro.mat.obs)

	retro.mat.resids <- retro.mat.fc - retro.mat.obs
	log.resids <- log1p(retro.mat.fc) - log1p(retro.mat.obs)  # to handle records with 0 abd for some ages

	# sample retro-based intervals

	#take 10% more samples, then trim off the largest  and smallest 5 %

	# VERSION WITH NORMAL ERRORS
	#sd.est <-  lapply(as.data.frame(retro.mat.resids),sd,na.rm=TRUE)
	#errors.sample <- as.data.frame(lapply(sd.est,function(x){sample.raw <- rnorm(interval.n *1.1,0,x);sample.out <- sample.raw[sample.raw<=quantile(sample.raw,probs=0.9)] }))
	#int.sample <- errors.sample + unlist(lapply(pt.fc.in$pt.fc,FUN=function(x){rep(x,interval.n)})) # creates a vector with n replicates of the pt fc
#browser()
	# VERSION WITH LOGNORMAL ERRORS (NEED TO DIG INTO BIAS CORRECTION!)
	#print("-------lognormal error --------")
	#print(log.resids)
	sd.est <-  lapply(as.data.frame(log.resids),sd,na.rm=TRUE)
	#print(sd.est)
	errors.sample <- as.data.frame(lapply(sd.est,function(x){sample.raw <- rnorm(interval.n *1.1,0,x);sample.out <- sample.raw[sample.raw<=quantile(sample.raw,probs=0.9)] }))
	#bias.correction <- lapply(sd.est,FUN=function(x){bias.corr <- (x^2 * ((interval.n-2)/interval.n)) / 2})
	#errors.corr	 <- errors.sample + unlist(lapply(bias.correction,FUN=function(x){rep(x,interval.n)}))



	int.sample <- expm1(errors.sample + log1p(unlist(lapply(pt.fc.in$pt.fc,FUN=function(x){rep(x,interval.n)})))) # creates a vector with n replicates of the pt fc
	int.sample <- round(int.sample)

	# set all negative to 0

	int.sample[int.sample < 0 ] <-  0

	if(interval.quants){ int.out <- as.data.frame(lapply(int.sample,function(x){quantile(x,probs=c(0.1,0.25,0.5,0.75,0.9))}))}
	if(!interval.quants){  int.out <- int.sample }


	out.list <- list(retro.pt.fc = retro.mat.fc ,
						retro.obs = retro.mat.obs,
						retro.resids = retro.mat.resids,
						fitted.pm = fitted.pm.array,
						retro.interval = int.out
						)


	# extract that fitted.pm from the last year of the retrpspective (need to remove 3rd array)
	# more direct way of doing this?
	fitted.pm.last <- as.matrix(fitted.pm.array[,,dim(fitted.pm.array)[3]],ncol=dim(fitted.pm.array)[2])
	if(dim(fitted.pm.last)[2]==1){dimnames(fitted.pm.last)[[2]] <- "Total"}

	out.list$fitted.pm.last <- fitted.pm.last

	# apply the retro.pm function to the resids matrix
	out.list$retro.pm.all.constantyrs <- resids.pm(out.list,type="retro1")
	out.list$retro.pm.all.varyrs <- resids.pm(out.list,type="retro2")
	out.list$retro.pm.bal <- resids.pm(out.list,type="retro3")


	if(out.type=="full"){out.list$retro.fits <- retro.fits.list}

	return(out.list)

}#end doRetro()