ctctools: Analytical tools For The Pacific Salmon Commission's Chinook Technical Committee.

#' @title (SPFI) Sum SEAK CWT data
#'
#' @description CWT data in SEAK fishery 4 (AK JLO T) is revised to equal sum of
#'   fishery 4+6 (AK FALL T). This is a summation that is unique to SPFI
#'   estimation for SEAK only. If estimating for SEAK, then the hrj database
#'   variables: NomCat# (VB variable:cwtcatch), AEQCat# (VB variable:aeqcwtcat),
#'   and AEQTot# (VB variable:aeqcwttotmort) for fishery 4 are revised to equal
#'   sum of fishery 4+6. The basis of this summation is not documented. Up to
#'   2017 the SEAK catch data file has been limited to 5 strata. These are the
#'   fishery definitions: \tabular{ccccccc}{ fishery \tab gear \tab fishery \tab
#'   fishery \tab fishery \tab fishery \tab fishery \cr index   \tab      \tab
#'   type    \tab country \tab name    \tab region \tab psc\cr 4 \tab T \tab P
#'   \tab US \tab AK JLO T \tab AK \tab AABM\cr 6 \tab T \tab P \tab US \tab AK
#'   FALL T \tab AK \tab AABM }
#'
#' @param x A data frame, representation of the CWT data. Equivalent to a subset
#'   of the hrj data frame where data.type=="NomCat"
#' @param data.catch A list with structure equivalent to the output from the
#'   function \code{\link{readCatchData}}.
#'
#' @details
#'
#' @return The function returns a data frame representation of the CWT data, in
#'   same format as the first input argument.
#' @export
#'
#' @examples
#' \dontrun{
#' #user needs to define catch file:
#' data.catch <- readCatchData(...)
#' #this assumes hrj.df has been read in and transformed to long format:
#' cwtcatch <- hrj.df[hrj.df$data.type=="NomCat"
#'                    & hrj.df$fishery.index %in% 1:6
#'                    & hrj.df$Stock.Number %in% C(1,9,10,15,17,20),]
#' cwtcatch <- adjustAlaska(x = cwtcatch, data.catch = data.catch)
#' }
#'
adjustAlaska <- function(x, data.catch){
  #this puts AK FALL T (index 6) into AK JLO T (index 4)

 #i've now hard coded the index number as if the input data have six strata and I wish to collapse to 5, the TopStrata and LastStrata values are incorrect. (they only work if input data in catch file have 5 strata)

 # fishery.toupdate <- sort(unique(x$fishery.index))[data.catch$intTopStrata-1]
  fishery.toupdate <- 4

 # fishery.source <- sort(unique(x$fishery.index))[data.catch$intLastStrata]
  fishery.source <- 6
  fishery.source.data <- x[x$fishery.index==fishery.source,c("Stock.Number", "age", "brood.year", "value")]
  colnames(fishery.source.data)[colnames(fishery.source.data)=='value'] <- "value2"

  x <- merge(x, fishery.source.data, by=c("Stock.Number", "age", "brood.year"))

  x$value[x$fishery.index==fishery.toupdate] <- x$value[x$fishery.index==fishery.toupdate] + x$value2[x$fishery.index==fishery.toupdate]

  ####
  #sum catch data:
  data.catch.sub <- data.catch$data.catch[data.catch$data.catch$TempStrata %in% c(fishery.toupdate, fishery.source),]
  data.catch.sum <- aggregate(CatchContribution~TempYear, data=data.catch.sub, sum)
  data.catch.sum$TempStrata <- fishery.toupdate
  #prepare to rbind, first add columns:
  colnames.missing <- colnames(data.catch.sub)[!colnames(data.catch.sub) %in% colnames(data.catch.sum)]
  df.tmp <- data.frame(matrix(NA, ncol=length(colnames.missing)))
  colnames(df.tmp) <- colnames.missing
  data.catch.sum <- cbind(data.catch.sum, df.tmp)

  #remove strata 4 & 6 in old data:
  data.catch$data.catch <- data.catch$data.catch[!data.catch$data.catch$TempStrata %in% c(fishery.toupdate, fishery.source),]
  data.catch$data.catch <- rbind(data.catch$data.catch, data.catch.sum)
  data.catch$data.catch <- data.catch$data.catch[order(data.catch$data.catch$TempStrata, data.catch$data.catch$TempYear),]

  return(return(list(x=x, data.catch=data.catch)))
}#END adjustAlaska



#' @title (SPFI) Data imputation by the Accrued APC (AAPC) method.
#'
#' @param data.df A data frame. Typically output from \code{\link{calc_N.ty}}.
#' @param data.catch A list with structure equivalent to the output from the
#'   function \code{\link{readCatchData}}.
#' @param stratum.var A string. The name of the stratum variable. Default is
#'   "fishery.index".
#' @param year.var  A string. The name of the year variable. Default is
#'   "return.year".
#' @param value.var  A string. The name of the data variable. Default is "N.ty".
#' @param catchmin An integer of length one. The minimum catch criteria for
#'   inclusion of a stratum in the estimation. Strata below this value are set
#'   to NA and estimated by imputation. Default is 0 (i.e. no strata are reset).
#'
#' @description This is similar to APC imputation as described on page 99 and
#'   Appendix 5 of REPORT TCCHINOOK (09)-2
#'   (\url{http://www.psc.org/download/35/chinook-technical-committee/2120/tcchinook09-2.pdf}).
#'    The abundance from a year-stratum contributes to the mean proportion
#'   estimate if the year-stratum has catch >1000.
#'
#' @return A list of four elements named: \code{model,
#'   imputation.results, annual.estimate, data.df}. The first, \code{model}, is
#'   a list of the linear regression results for each stratum (including the
#'   data used in fit), the second, \code{imputation.results}, is a data frame
#'   of two columns. The first column usually has the same name as the
#'   \code{year.var} argument and the second is \code{N.t}. The other data
#'   frames represent the values calculated during intermediary steps. The time
#'   series of proportions for year with complete strata can be found in
#'   data.df.
#' @export
#'
#' @examples
#' \dontrun{
#' N.ty <- calc_N.ty(T.ty = T.ty, hcwt.ty = hcwt.ty)
#' results.list <- calc_AAPC(N.ty, data.catch)
#' }
calc_AAPC <- function(data.df, data.catch, stratum.var="fishery.index", year.var="return.year", value.var="N.ty", catchmin=0){

	imputation.name <-  as.character(match.call()[[1]])
	imputation.name <-  substr(imputation.name, 6, nchar(imputation.name))


	colnames(data.df)[colnames(data.df)==stratum.var] <- "stratum"
	colnames(data.df)[colnames(data.df)==year.var] <- "return.year"
	colnames(data.df)[colnames(data.df)==value.var] <- "value"

	# #check that there is >1 stratum:
	# if(length(unique(data.df$stratum[!is.na(data.df$value)]))==1) {
	#   stop("\nData includes just one stratum so AAPC method cannot be used. Change imputation argument to NULL and restart calculation of SPFI.")
	# }

	# years with catch <catchmin are excluded from abundance estimation of APC

	#years.lowcatch <- sort(unique(data.catch$data.catch$TempYear[data.catch$data.catch$CatchContribution<1000]))

	data.df$value[data.df$T.ty < catchmin] <- NA

	year.NA <- sort(unique(data.df$return.year[is.na(data.df$value)]))

	data.df.sum <- aggregate(value~return.year, data = data.df, sum, na.action = na.pass)
	colnames(data.df.sum)[colnames(data.df.sum)=='value'] <- "sum.complete"
	data.df.sum[!is.na(data.df.sum$sum.complete), imputation.name] <- 1
	data.df <- merge(data.df, data.df.sum, by='return.year')

	data.df$proportion <- data.df$value / data.df$sum.complete

	ap.byyear <- aggregate(proportion~stratum, data = data.df[!(data.df$return.year %in% year.NA),], FUN = function(x){

		cumsum(x) / seq_along(x)
	})

	ap.byyear.trans <- t(ap.byyear$proportion)

	ap.byyear.trans <- as.data.frame(ap.byyear.trans)
	colnames(ap.byyear.trans) <- ap.byyear$stratum
	ap.byyear.trans$year <- unique(data.df$return.year[!(data.df$return.year %in% year.NA)])
	ap.byyear.trans.long <- reshape(ap.byyear.trans, dir="long", varying = list(1:(ncol(ap.byyear.trans)-1)), times =ap.byyear$stratum , v.names="expanding.avg")

	#the linear regression:
	ap.lm <- by(data=ap.byyear.trans.long, INDICES = as.factor(ap.byyear.trans.long$time), FUN = function(x){
		lmfit <- lm(expanding.avg~year, data=x)
		return(list(lmfit=lmfit, fitdata=x))
	})

	data.df$proportion.avg <- NA

	data.df <- apply(data.df, 1, FUN=function(x, ap.lm){

		fit.obj <- ap.lm[[as.character(x['stratum'])]]$lmfit
		if(is.na(x['proportion'])) {
			x['proportion.avg'] <- predict(object = fit.obj, newdata = data.frame(year=x['return.year']) )}
		return(x)
	}, ap.lm)

	data.df <- t(data.df)
	data.df <- as.data.frame(data.df)



	### tech report method, summing proportions before division.
	#sum the abundance:
	incompleteYear.sum <- aggregate(value~return.year, data.df[data.df$return.year %in% year.NA,], sum )
	#sum proportions of strata with abundance data:
	data.df.sum4 <- aggregate(proportion.avg~return.year, data.df[data.df$return.year %in% year.NA & !is.na(data.df$value),], sum)
	data.df.sum5 <- merge(incompleteYear.sum, data.df.sum4, by='return.year')
	data.df.sum5$estimate.sum <- data.df.sum5$value/data.df.sum5$proportion.avg
	colnames(data.df.sum5)[colnames(data.df.sum5)=="proportion.avg"] <- imputation.name
	annual.estimate <- data.df.sum5

	data.df <- merge(data.df, annual.estimate[,c("return.year", "estimate.sum")], by="return.year", all.x = TRUE)
	data.df$value.imputed[is.na(data.df$value)] <- data.df$proportion.avg[is.na(data.df$value)]* data.df$estimate.sum[is.na(data.df$value)]

	results <- rbind(data.df.sum[!is.na(data.df.sum$sum.complete),], data.frame(return.year=annual.estimate$return.year, sum.complete=annual.estimate$estimate.sum, apc=annual.estimate[imputation.name]))

	results <- results[order(results$return.year),]
	colnames(results) <- c(year.var, "N.y", "scalar")

	return(list(imputation.method="aapc", catchmin=catchmin, model=ap.lm, imputation.results=results, annual.estimate=annual.estimate, data.df=data.df))
}#END calc_AAPC





#' @title (SPFI) Data imputation by the Average Proportion Correction method.
#'
#' @param data.df A data frame. Typically output from \code{\link{calc_N.ty}}.
#' @param data.catch A list with structure equivalent to the output from the
#'   function \code{\link{readCatchData}}.
#' @param stratum.var A string. The name of the stratum variable. Default is
#'   "fishery.index".
#' @param year.var  A string. The name of the year variable. Default is
#'   "return.year".
#' @param value.var  A string. The name of the data variable. Default is "N.ty".
#' @param catchmin An integer of length one. The minimum catch criteria for
#'   inclusion of a stratum in the estimation. Strata below this value are set
#'   to NA and estimated by imputation. Default is 0 (i.e. no strata are reset).
#'
#' @description This reproduces the results of the APC imputation as described
#'   on page 99 and Appendix 5 of REPORT TCCHINOOK (09)-2
#'   (\url{http://www.psc.org/download/35/chinook-technical-committee/2120/tcchinook09-2.pdf}).
#'    The abundance from a year-stratum contributes to the mean proportion
#'   estimate if the year-stratum has catch >1000.
#'
#' @return A list of four data frames. The data frames are:
#'   \code{imputation.results, annual.estimate, prop.mean, data.df}. The first,
#'   \code{imputation.results}, comprises two columns. The first column usually
#'   has the same name as the \code{year.var} argument and the second is
#'   \code{N.t}. The other data frames represent the values calculated during
#'   intermediary steps. The time series of proportions for year with complete
#'   strata can be found in data.df.
#' @export
#'
#' @examples
#' \dontrun{
#' N.ty <- calc_N.ty(T.ty = T.ty, hcwt.ty = hcwt.ty)
#' results.list <- calc_APC(N.ty, data.catch)
#' }
calc_APC <- function(data.df, data.catch=NULL, stratum.var="fishery.index", year.var="return.year", value.var="N.ty", catchmin=0){

  colnames(data.df)[colnames(data.df)==stratum.var] <- "stratum"
  colnames(data.df)[colnames(data.df)==year.var] <- "return.year"
  colnames(data.df)[colnames(data.df)==value.var] <- "value"

  #check that there is >1 stratum:
  if(length(unique(data.df$stratum[!is.na(data.df$value)]))==1) {
    stop("\nData includes just one stratum so APC method cannot be used. Change imputation argument to NULL and restart calculation of SPFI.")
  }

  # years with catch <1000 are excluded from abundance estimation of APC
  # This criteria is only applied when the data.catch data frame is included:
  if(is.null(data.catch)){
    if(!exists("data.catch.warning")){
      cat("\n!!!\nNo catch data frame included so low catch years are not excluded.\n")
      data.catch.warning <<- FALSE
     }
  }else{
    years.lowcatch <- sort(unique(data.catch$data.catch$TempYear[data.catch$data.catch$CatchContribution<catchmin]))
    data.df$value[data.df$T.ty < catchmin] <- NA

  }

  year.NA <- unique(data.df$return.year[is.na(data.df$value)])

  data.df.sum <- aggregate(value~return.year, data = data.df, sum, na.action = na.pass)
  colnames(data.df.sum)[colnames(data.df.sum)=='value'] <- "sum.complete"
  data.df.sum$apc[!is.na(data.df.sum$sum.complete)] <- 1
  data.df <- merge(data.df, data.df.sum, by='return.year')

  data.df$proportion <- data.df$value / data.df$sum.complete
  ap <- aggregate(proportion~stratum, data = data.df, mean)
  colnames(ap)[colnames(ap)=='proportion'] <- "proportion.avg"
  data.df <- merge(data.df, ap, by='stratum')

  ### tech report method, summing proportions before division:
  incompleteYear.sum <- aggregate(value~return.year, data.df[data.df$return.year %in% year.NA,], sum )
  data.df.sum4 <- aggregate(proportion.avg~return.year, data.df[data.df$return.year %in% year.NA & !is.na(data.df$value),], sum)
  data.df.sum5 <- merge(incompleteYear.sum, data.df.sum4, by='return.year')
  data.df.sum5$estimate.sum <- data.df.sum5$value/data.df.sum5$proportion.avg
  colnames(data.df.sum5)[colnames(data.df.sum5)=="proportion.avg"] <- "apc"

  ### alternate approach is to calculate a total abundance estimate based on the values in each stratum then take the mean of all the estimates.
  data.df$estimate.mean <- NA
  data.df$estimate.mean[data.df$return.year %in% year.NA] <- data.df$value[data.df$return.year %in% year.NA] / data.df$proportion.avg[data.df$return.year %in% year.NA]
  annual.estimate <- aggregate(estimate.mean~return.year, data.df, mean)

  annual.estimate <- merge(annual.estimate, data.df.sum5[,c('return.year', 'estimate.sum', "apc")], by='return.year')


  data.df <- merge(data.df, annual.estimate[,c("return.year", "estimate.sum")], by="return.year", all.x = TRUE)
  data.df$value.imputed[is.na(data.df$value)] <- data.df$proportion.avg[is.na(data.df$value)]* data.df$estimate.sum[is.na(data.df$value)]

  results <- rbind(data.df.sum[!is.na(data.df.sum$sum.complete),], data.frame(return.year=annual.estimate$return.year, sum.complete=annual.estimate$estimate.sum, apc=annual.estimate$apc))
  results <- results[order(results$return.year),]
  colnames(results) <- c(year.var, "N.y", "APCscalar")

 # return(list(imputation.results=results, annual.estimate=annual.estimate, prop.mean=ap, data.df=data.df))
  return(list(imputation.method="apc", catchmin=catchmin, imputation.results=results, annual.estimate=annual.estimate, data.df=data.df))

}#END calc_APC



#' @title (SPFI) Calculate difference between distribution parameters after each
#'   iteration.
#'
#' @param d.tsa.prior A data frame. This is a copy of \code{d.tsa}, made before
#'   an iteration loop during which \code{d.tsa} will be revised.
#' @param d.tsa A data frame. Output of \code{\link{calc_d.tsa}}.
#'
#' @description The user is not typically going to use this function. It is
#'   called by \code{\link{calc_SPFI}}.
#'
#' @return A vector of length one.
#' @export
calc_Difference <- function(d.tsa.prior, d.tsa){

  colnames(d.tsa.prior)[which(colnames(d.tsa.prior)=="d.tsa")] <- "d.tsa.prior"
  d.tsa <- merge(d.tsa,d.tsa.prior, by=c("fishery.index", "Stock.Number", "age"))
  d.tsa$d.tsa.diff <- abs(d.tsa$d.tsa.prior - d.tsa$d.tsa)

  difference.max <- aggregate(d.tsa.diff~fishery.index, data = d.tsa, max, na.rm=TRUE)
  colnames(difference.max)[2] <- "maxdifference"

  maxmaxdifference <- sum(difference.max$maxdifference, na.rm=TRUE)
  return(maxmaxdifference)

}#END calc_Difference



#' @title (SPFI) Calculate the distribution parameters grouped by fishery,
#'   stock, and age.
#'
#' @description This is equivalent to equation 1 in the draft SPFI document.
#'   This function does a single estimate of the \code{d} values. It does not
#'   perform iterations to optimize.
#'
#' @param r.tsa.sum Output from \code{\link{calc_tsa.sum}}.
#' @param n.ysa Synonymous with \code{CWTPop} in VB or:
#'   \code{hrj.df[hrj.df$data.type=="Pop" & hrj.df$fishery.index == 1,]}
#' @param hcwt.ty Output from \code{\link{calc_hcwt.ty}}.
#' @param standardize.bol A Boolean, default=FALSE.
#'
#' @details The initial call of this function will have no data for
#'   \code{hcwt.ty}. If this is true, then the values are set to 0.01 as is done
#'   in the Visual Basic.
#'
#' @return A data frame of the distribution parameter estimates grouped by
#'   fishery stratum, stock, and age.
#' @export
#'
#' @examples
#' \dontrun{
#' #look to \code{\link{buildSPFIscript}} for creating hrj.df
#' hrj.df <- hrj.df[hrj.df$spfiflag==1,]
#' cwtpop <- hrj.df[hrj.df$data.type=="Pop" & hrj.df$fishery.index == 1 & hrj.df$Stock.Number %in% stock.subset,]
#' cwtpop <- subset(cwtpop,select = -fishery.index) #n.ysa
#' cwtcatch <- hrj.df[hrj.df$data.type=="NomCat" & hrj.df$fishery.index %in% fishery.subset & hrj.df$Stock.Number %in% stock.subset,]
#' if(region=="seak") cwtcatch <- adjustAlaska(x = cwtcatch, data.catch = data.catch)
#' r.tsa.sum <- calc_tsa.sum(x = cwtcatch, newvar.name = "r.tsa.sum")
#' d.tsa <- calc_d.tsa(r.tsa.sum = r.tsa.sum, n.ysa = cwtpop, standardize.bol = TRUE)
#' }
calc_d.tsa <- function(r.tsa.sum, n.ysa, hcwt.ty=NULL, standardize.bol=FALSE){
  #n.ysa = cwt catch by stock, fishery(strata), age, across years (line 808):

  #initialize hr = 0.01 taken from line 812 of frmMain.vb calc_SPFI()
  if( is.null(hcwt.ty)){
    print("restarting hcwt.ty")
    hcwt.ty <- expand.grid(fishery.index=unique(r.tsa.sum$fishery.index), return.year=unique(n.ysa$return.year), hcwt.ty=0.01 )
  }
  colnames(n.ysa)[colnames(n.ysa)=="value"] <- "n.ysa"

  denom.tmp <- merge( hcwt.ty, n.ysa[,c("Stock.Number", 'return.year', 'age', 'n.ysa')], by='return.year', all = TRUE)

  denom.tmp$h.by.n <- denom.tmp$hcwt.ty * denom.tmp$n.ysa
  denom.sum <- aggregate(h.by.n~Stock.Number+fishery.index+age, data = denom.tmp, sum, na.rm=TRUE)
  d.tsa <- merge(r.tsa.sum, denom.sum, by=c('Stock.Number', 'fishery.index', 'age'))

  d.tsa$d.tsa <- d.tsa$r.tsa.sum/d.tsa$h.by.n

  if(standardize.bol){
    d.prime.max <- aggregate(d.tsa~fishery.index, data = d.tsa, max, na.rm=TRUE)
    colnames(d.prime.max)[2] <- "d.prime.max"
    d.tsa <- merge(d.tsa, d.prime.max, by='fishery.index', all=TRUE)
    d.tsa$d.tsa <- d.tsa$d.tsa/d.tsa$d.prime.max
  }

  attr(d.tsa, "standardize.bol") <- standardize.bol
  return(d.tsa)
}#END calc_d.tsa





#' @title (SPFI) Data imputation by the GLMC method.
#'
#' @param data.df A data frame. Typically output from \code{\link{calc_N.ty}}.
#' @param data.catch A list with structure equivalent to the output from the
#'   function \code{\link{readCatchData}}.
#' @param stratum.var A string. The name of the stratum variable. Default is
#'   "fishery.index".
#' @param year.var  A string. The name of the year variable. Default is
#'   "return.year".
#' @param value.var  A string. The name of the data variable. Default is "N.ty".
#' @param catchmin An integer of length one. The minimum catch criteria for
#'   inclusion of a stratum in the estimation. Strata below this value are set
#'   to NA and estimated by imputation. Default is 0 (i.e. no strata are reset).
#'
#' @description This reproduces the results of the GLMC imputation as described by John Carlile.
#'
#' @return A list of four data frames. The data frames are:
#'   \code{imputation.results, annual.estimate, prop.mean, data.df}. The first,
#'   \code{imputation.results}, comprises two columns. The first column usually
#'   has the same name as the \code{year.var} argument and the second is
#'   \code{N.t}. The other data frames represent the values calculated during
#'   intermediary steps. The time series of proportions for year with complete
#'   strata can be found in data.df.
#' @export
#'
#' @examples
#' \dontrun{
#' N.ty <- calc_N.ty(T.ty = T.ty, hcwt.ty = hcwt.ty)
#' results.list <- calc_GLMC(N.ty, data.catch)
#' }
calc_GLMC <- function(data.df, data.catch, stratum.var="fishery.index", year.var="return.year", value.var="N.ty", catchmin=0, ignoreCWTHR=0:2){


	colnames(data.df)[colnames(data.df)==stratum.var] <- "stratum"
	colnames(data.df)[colnames(data.df)==year.var] <- "return.year"
	colnames(data.df)[colnames(data.df)==value.var] <- "value"

	#check that there is >1 stratum:
	if(length(unique(data.df$stratum[!is.na(data.df$value)]))==1) {
		stop("\nData includes just one stratum so GLM method cannot be used. Change imputation argument to NULL and restart calculation of SPFI.")
	}

	# years with catch <catchmin are excluded from abundance estimation of glm

	years.lowcatch <- sort(unique(data.catch$data.catch$TempYear[data.catch$data.catch$CatchContribution<catchmin]))

	data.df$imputed <- FALSE


	#if hcwt.ty is na (due to no recoveries) then abundance (N.ty) will already be NA.
	#this if() handles the range of imputing options
	if(ignoreCWTHR==0){
		#impute any abundance where catch is low and abundance !=NA due to no recoveries
		#meaning, don't impute cases of low catch where abundance was already NA
		data.df$imputed[!is.na(data.df$value) & data.df$T.ty < catchmin] <- TRUE

	}else if(ignoreCWTHR==1){
		#impute any abundance where catch is low, even when abundance=NA due to no recoveries
		data.df$imputed[data.df$T.ty < catchmin] <- TRUE
	}else if(ignoreCWTHR==2){
		#impute any abundance where catch is low or any case of abundance=NA due to no recoveries- no matter the catch level (this option is odd)
		data.df$imputed[is.na(data.df$value) | data.df$T.ty < catchmin] <- TRUE
	}

	year.impute <- unique(data.df$return.year[data.df$imputed==TRUE])

	data.df$strata.fac <- as.factor(data.df$stratum)
	data.df$return.year.fac <- as.factor(data.df$return.year)
	data.df$value.log <- log(data.df$value)
	glm.fit <- glm(formula = value.log~return.year.fac+strata.fac, data = data.df[data.df$imputed==FALSE,])
	#get the strata that need a prediction:
		newdata <- data.df[data.df$imputed==TRUE,]
	#remove strata that will be predicted:
	data.df <- data.df[data.df$imputed==FALSE,]

	#need to make sure there is a year coefficient for any newdata year:
	strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse="")
	year.coef.ind <- grep(pattern = "year",names(coefficients(glm.fit)))
	year.coef.name <- names(coefficients(glm.fit))[year.coef.ind]
	year.coef <- strReverse(substr(strReverse(year.coef.name), 1,4))
 	year.coef.missing <- unique(newdata$return.year[!newdata$return.year %in% year.coef])
 	newdata <- newdata[!newdata$return.year %in% year.coef.missing,]

	predict.val <- predict(object = glm.fit, newdata=newdata)
	newdata$value <- exp(predict.val)
	newdata$imputed <- TRUE
	#data.df$imputed <- FALSE
	data.df <- rbind(data.df, newdata)
	data.df <- data.df[order(data.df$return.year, data.df$stratum),]

	results <- aggregate(value~return.year, data.df, sum)

	#switch column names back to how they came in:
	colnames(data.df)[colnames(data.df)=="stratum"] <- stratum.var
	colnames(data.df)[colnames(data.df)=="return.year"] <- year.var
	colnames(data.df)[colnames(data.df)=="value"] <- value.var

	#this fills in for any missing year:
	year.series <- data.frame(return.year=seq(min(results$return.year, na.rm = TRUE), max(results$return.year, na.rm = TRUE)))
	results <- merge(year.series, results, by='return.year', all.x = TRUE)
	results <- results[order(results$return.year),]
	colnames(results) <- c(year.var, "N.y")

	return(list(imputation.method="glmc", catchmin=catchmin, ignoreCWTHR=ignoreCWTHR, glm.fit=glm.fit, imputation.results=results, data.df=data.df))
}#END calc_GLMC



#' @title (SPFI) Calculate the CWT harvest rate parameters grouped by fishery,
#'   and year.
#'
#' @description This is equivalent to equation 2 in the draft SPFI document.
#'   This function does a single estimate of the harvest rate values. It does
#'   not perform iterations to optimize.
#'
#' @param r.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param d.tsa Output from \code{\link{calc_d.tsa}}.
#' @param n.ysa Synonymous with \code{CWTPop} in VB or:
#'   \code{hrj.df[hrj.df$data.type=="Pop" & hrj.df$fishery.index == 1,]}
#' @inheritParams calc_d.tsa
#'
#' @return A data frame of the harvest rate parameter estimates grouped by
#'   fishery stratum and year.
#' @export
#'
#' @examples
#' \dontrun{
#' #look to \code{\link{buildSPFIscript}} for creating hrj.df
#' hrj.df <- hrj.df[hrj.df$spfiflag==1,]
#' cwtpop <- hrj.df[hrj.df$data.type=="Pop" & hrj.df$fishery.index == 1 & hrj.df$Stock.Number %in% stock.subset,]
#' cwtpop <- subset(cwtpop,select = -fishery.index) #n.ysa
#' cwtcatch <- hrj.df[hrj.df$data.type=="NomCat" & hrj.df$fishery.index %in% fishery.subset & hrj.df$Stock.Number %in% stock.subset,]
#' if(region=="seak") cwtcatch <- adjustAlaska(x = cwtcatch, data.catch = data.catch)
#' r.ty.sum <- calc_ty.sum(x = cwtcatch, newvar.name = "r.ty.sum")
#' d.tsa <- calc_d.tsa(r.tsa.sum = r.tsa.sum, n.ysa = cwtpop, standardize.bol = TRUE)
#' hcwt.ty <- calc_hcwt.ty(r.ty.sum=r.ty.sum,  d.tsa = d.tsa, n.ysa = cwtpop)
#' }
calc_hcwt.ty <- function(r.ty.sum, d.tsa, n.ysa){

  colnames(n.ysa)[colnames(n.ysa)=="value"] <- "n.ysa"

  denom.tmp <- merge(d.tsa, n.ysa, by=c('Stock.Number', 'age'), all = TRUE)
  denom.tmp$d.by.n <- denom.tmp$d.tsa * denom.tmp$n.ysa
  denom.sum <- aggregate(d.by.n~fishery.index+return.year, data = denom.tmp, sum, na.rm=TRUE)

  hcwt.ty <- merge(r.ty.sum, denom.sum, by=c('fishery.index', 'return.year'), all = TRUE)
  hcwt.ty$hcwt.ty <- hcwt.ty$r.ty.sum / hcwt.ty$d.by.n

  return(hcwt.ty[,c("fishery.index", "return.year", "r.ty.sum", "d.by.n", "hcwt.ty")])

}#END calc_hcwt.ty



#' @title (SPFI) Calculate the AEQ stratum harvest rate parameters grouped by fishery, and year.
#'
#' @description This is equivalent to equation 6 in the draft SPFI document. In
#'   the Visual Basic this is referred to as the AEQScaler.
#'
#' @param c.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param r.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param hcwt.ty Output from \code{\link{calc_hcwt.ty}}.
#'
#' @return A data frame of the AEQ stratum harvest rate parameter estimates
#'   grouped by fishery stratum and year.
#' @export
#'
#' @examples
#' \dontrun{
#' aeqcwt <- hrj.df[hrj.df$data.type==data.type & hrj.df$fishery.index %in% fishery.subset & hrj.df$Stock.Number %in% stock.subset,]
#' if(region=="seak") aeqcwt <- adjustAlaska(x = aeqcwt, data.catch = data.catch)
#' c.ty.sum <- calc_ty.sum(x = aeqcwt, newvar.name = "c.ty.sum")
#'
#' cwtcatch <- hrj.df[hrj.df$data.type=="NomCat" & hrj.df$fishery.index %in% fishery.subset & hrj.df$Stock.Number %in% stock.subset,]
#' if(region=="seak") cwtcatch <- adjustAlaska(x = cwtcatch, data.catch = data.catch)
#' r.ty.sum <- calc_ty.sum(x = cwtcatch, newvar.name = "r.ty.sum")
#'
#' hcwt.ty <- calc_hcwt.ty(...)
#'
#' H.ty <- calc_H.ty(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, hcwt.ty = hcwt.ty)
#' }
calc_H.ty <- function(c.ty.sum, r.ty.sum, hcwt.ty){

  H.ty <- merge(c.ty.sum, r.ty.sum, by=c("fishery.index", 'return.year'))
  H.ty <- merge(H.ty, hcwt.ty[,c('fishery.index', "return.year", "hcwt.ty")], by=c("fishery.index", 'return.year'))

  H.ty$r.ty.sum[H.ty$r.ty.sum==0] <- NA
  H.ty$H.ty <- H.ty$c.ty / H.ty$r.ty.sum * H.ty$hcwt.ty
  return(H.ty)

}#END calc_H.ty



#' @title (SPFI) Calculate the AEQ stratum harvest rate parameters grouped by
#'   fishery, and year.
#'
#' @description This is equivalent to equation 6a in the draft SPFI document. In
#'   the Visual Basic this is referred to as the AEQScaler.
#'
#' @param c.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param r.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param hcwt.ty Output from \code{\link{calc_hcwt.ty}}.
#' @param T.ty Output from \code{\link{calc_T.ty}}.
#' @inheritParams calc_H.ty
#' @inheritParams calc_N.ty
#'
#' @return A data frame of the AEQ stratum harvest rate parameter estimates
#'   grouped by fishery stratum and year.
#' @export
#'
#' @examples
#' \dontrun{
#' T.ty <- calc_T.ty(catch.df = data.catch$data.catch)
#' H.ty <- calc_H.ty2(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, hcwt.ty = hcwt.ty, T.ty = T.ty)
#' }
calc_H.ty2 <- function(c.ty.sum, r.ty.sum, hcwt.ty, T.ty){
  T.ty <- T.ty[,c('fishery.index', "return.year", "T.ty")]
  hcwt.ty <- hcwt.ty[,c('fishery.index', "return.year", "hcwt.ty")]

  H.ty <- merge(c.ty.sum, r.ty.sum, by=c("fishery.index", 'return.year'))
  H.ty <- merge(H.ty, T.ty, by=c("fishery.index", 'return.year'))
  H.ty <- merge(H.ty, hcwt.ty, by=c("fishery.index", 'return.year'))

  H.ty$r.ty.sum[H.ty$r.ty.sum==0] <- NA
  H.ty$hcwt.ty[H.ty$hcwt.ty==0] <- NA

  H.ty$numerator <- H.ty$c.ty / H.ty$r.ty.sum * H.ty$T.ty
  H.ty$denominator <- H.ty$T.ty / H.ty$hcwt.ty
  H.ty$H.ty <- H.ty$numerator/H.ty$denominator
  return(H.ty)

}#END calc_H.ty2



#' @title (SPFI) Calculate the AEQ stratum harvest rate parameters grouped by
#'   fishery, and year.
#'
#' @description This is equivalent to equation 6b in the draft SPFI document. In
#'   the Visual Basic this is referred to as the AEQScaler.
#'
#' @param c.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param r.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param T.ty Output from \code{\link{calc_T.ty}}.
#' @param N.ty Output from \code{\link{calc_N.ty}}.
#' @inheritParams calc_H.ty
#' @inheritParams calc_N.ty
#'
#' @return  A data frame of the AEQ stratum harvest rate parameter estimates
#'   grouped by fishery stratum and year.
#' @export
#'
#' @examples
#' \dontrun{
#' T.ty <- calc_T.ty(catch.df = data.catch$data.catch)
#' N.ty <- calc_N.ty(T.ty = T.ty, hcwt.ty = hcwt.ty)
#' H.ty <- calc_H.ty3(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, T.ty = T.ty, N.ty = N.ty)
#' }
calc_H.ty3 <- function(c.ty.sum, r.ty.sum, T.ty, N.ty){
  T.ty <- T.ty[,c('fishery.index', "return.year", "T.ty")]
  N.ty <- N.ty[,c('fishery.index', "return.year", "N.ty")]

  H.ty <- merge(c.ty.sum, r.ty.sum, by=c("fishery.index", 'return.year'))
  H.ty <- merge(H.ty, T.ty, by=c("fishery.index", 'return.year'))
  H.ty <- merge(H.ty, N.ty, by=c("fishery.index", 'return.year'))

  H.ty$r.ty.sum[H.ty$r.ty.sum==0] <- NA
  H.ty$N.ty[H.ty$N.ty==0] <- NA

  H.ty$numerator <- H.ty$c.ty / H.ty$r.ty.sum * H.ty$T.ty
  H.ty$denominator <- H.ty$N.ty
  H.ty$H.ty <- H.ty$numerator/H.ty$denominator
  return(H.ty)
}#END calc_H.ty3



#' @title (SPFI) Calculate the AEQ fishery harvest rate parameters grouped by
#'   year.
#'
#' @description This is equivalent to equation 7 in the draft SPFI document.
#'
#' @param c.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param r.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param hcwt.ty Output from \code{\link{calc_hcwt.ty}}.
#' @param T.ty Output from \code{\link{calc_T.ty}}.
#' @inheritParams calc_H.ty
#' @inheritParams calc_N.ty
#'
#' @return A data frame of the AEQ fishery harvest rate parameter estimates
#'   grouped by year.
#' @export
#'
#' @examples
#' \dontrun{
#' H.y <- calc_H.y(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, hcwt.ty = hcwt.ty, T.ty = T.ty)
#' }
calc_H.y <- function(c.ty.sum, r.ty.sum, hcwt.ty, T.ty){
  T.ty <- T.ty[,c('fishery.index', "return.year", "T.ty")]
  hcwt.ty <- hcwt.ty[,c('fishery.index', "return.year", "hcwt.ty")]

  numerator.tmp <- merge(c.ty.sum, r.ty.sum, by=c("fishery.index", 'return.year'))
  numerator.tmp <- merge(numerator.tmp, T.ty, by=c("fishery.index", 'return.year'))
  numerator.tmp$r.ty.sum[numerator.tmp$r.ty.sum==0] <- NA
  numerator.tmp$numerator <- numerator.tmp$c.ty.sum/numerator.tmp$r.ty.sum * numerator.tmp$T.ty
  numerator <- aggregate(numerator~return.year, data=numerator.tmp, sum, na.rm=TRUE)

  denominator.tmp <- merge(T.ty, hcwt.ty, by=c("fishery.index", 'return.year'))
  denominator.tmp$hcwt.ty[denominator.tmp$hcwt.ty==0] <- NA
  denominator.tmp$denominator <- denominator.tmp$T.ty / denominator.tmp$hcwt.ty
  denominator <- aggregate(denominator~return.year, data=denominator.tmp, sum, na.rm=TRUE)

  H.y <- merge(numerator, denominator, by='return.year')
  H.y$H.y <- H.y$numerator / H.y$denominator
  return(H.y)

}#END calc_H.y



#' @title (SPFI) Calculate the harvest rate parameters grouped by year.
#'
#' @description This is equivalent to equation 7b in the draft SPFI document.
#' This function produces the same output as \code{\link{calc_H.y}}.
#'
#' @param c.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param r.ty.sum Output from \code{\link{calc_ty.sum}}.
#' @param T.ty Output from \code{\link{calc_T.ty}}.
#' @param N.y Output from \code{\link{calc_N.y}}.
#' @inheritParams calc_H.ty
#' @inheritParams calc_N.ty
#'
#' @return A data frame of the AEQ fishery harvest rate parameter estimates
#'   grouped by year.
#' @export
#'
#' @examples
#' \dontrun{
#' #do imputation for total abundance:
#' imputation <- "calc_APC"
#' imputation.list <- do.call(what = imputation, args = list(N.ty, data.catch = data.catch))
#' N.y <- imputation.list$imputation.results
#' H.y <- calc_H.y2(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, T.ty = T.ty, N.y = N.y)
#' }
calc_H.y2 <- function(c.ty.sum, r.ty.sum, T.ty, N.y){

  T.ty <- T.ty[,c('fishery.index', "return.year", "T.ty")]

  numerator.tmp <- merge(c.ty.sum, r.ty.sum, by=c("fishery.index", 'return.year'))
  numerator.tmp <- merge(numerator.tmp, T.ty, by=c("fishery.index", 'return.year'))

  numerator.tmp$r.ty.sum[numerator.tmp$r.ty.sum==0] <- NA

  numerator.tmp$numerator <- numerator.tmp$c.ty.sum/numerator.tmp$r.ty.sum * numerator.tmp$T.ty

  numerator <- aggregate(numerator~return.year, data=numerator.tmp, sum, na.rm=TRUE)
  H.y <- merge(numerator, N.y, by='return.year')

  H.y$H.y <- H.y$numerator / H.y$N.y
  return(H.y)

}#END calc_H.y2



#' @title (SPFI) Calculate the abundance by fishery stratum and year.
#'
#' @description This is equivalent to equation 4 in the draft SPFI document.
#'
#' @param T.ty Output from \code{\link{calc_T.ty}}.
#' @param hcwt.ty Output from \code{\link{calc_hcwt.ty}}.
#'
#' @return A data frame of the abundance estimates, grouped by fishery stratum
#'   and year.
#' @export
#'
#' @examples
#' \dontrun{
#' N.ty <- calc_N.ty(T.ty = T.ty, hcwt.ty = hcwt.ty)
#' }
calc_N.ty <- function(T.ty, hcwt.ty){

  #T.ty and hcwt.ty don't have same nrows, I'd expect them to be equal
  T.ty <- T.ty[,c('fishery.index', "return.year", "T.ty")]
  hcwt.ty <- hcwt.ty[,c('fishery.index', "return.year", "hcwt.ty")]

  N.ty <- merge(T.ty, hcwt.ty, by=c('fishery.index', "return.year"), all.x = TRUE)
  N.ty$hcwt.ty[N.ty$hcwt.ty==0] <- NA

  N.ty$N.ty <- N.ty$T.ty / N.ty$hcwt.ty
  return(N.ty)

}#END calc_N.ty



#' @title (SPFI) Calculate the total yearly abundance across fishery strata.
#'
#' @description This is equivalent to equation 5 in the draft SPFI document.
#'
#' @param N.ty Output from \code{\link{calc_N.ty}}.
#'
#' @return A data frame of the abundance estimates, grouped by year.
#' @export
#'
#' @examples
#' \dontrun{
#' T.ty <- calc_T.ty(catch.df = data.catch$data.catch)
#' N.ty <- calc_N.ty(T.ty = T.ty, hcwt.ty = hcwt.ty)
#' N.y <- calc_N.y(N.ty = N.ty)
#' }
calc_N.y <- function(N.ty){
  N.y <- aggregate(N.ty~return.year, data=N.ty, sum)
  colnames(N.y)[which(colnames(N.y)=="N.ty")] <- "N.y"

  return(N.y)
}#END calc_N.y



#' @title (SPFI) Calculate the stata specific harvest rate indices by year.
#'
#' @description
#' This is equivalent to equation 8 in the draft SPFI document.
#'
#' @param H.ty Output from \code{\link{calc_H.ty}} or \code{\link{calc_H.ty2}} or \code{\link{calc_H.ty3}}.
#'
#' @return A data frame of the stata specific harvest rate indices, grouped by year.
#' @export
#'
#' @examples
#' \dontrun{
#' S.ty <- calc_S.ty(H.ty = H.ty)
#' }
calc_S.ty <- function(H.ty){

  H.t.base <- aggregate(H.ty~fishery.index, data=H.ty[H.ty$return.year %in% 1979:1982,], mean, na.rm = TRUE)
  colnames(H.t.base)[2] <- "H.t.base"
  H.ty <- merge(H.ty, H.t.base, by='fishery.index', all = TRUE)
  complete <- complete.cases(H.ty[,c('H.ty', 'H.t.base')])
  H.ty$S.ty[complete] <- H.ty$H.ty[complete] / H.ty$H.t.base[complete]
  return(H.ty)

}#END calc_S.ty



#' @title (SPFI) Calculate the SPFI grouped by year.
#'
#' @description
#' This is equivalent to equation 9 in the draft SPFI document.
#'
#' @param H.y Output from \code{\link{calc_H.y}} or \code{\link{calc_H.y2}}.
#'
#' @return A data frame with the SPFI values by year.
#' @export
#'
#' @examples
#' \dontrun{
#' S.y <- calc_S.y(H.y = H.y)
#' }
calc_S.y <- function(H.y){

  H.y$H.y.base <- mean(H.y$H.y[H.y$return.year %in% 1979:1982], na.rm = TRUE)
  H.y$S.y <- H.y$H.y / H.y$H.y.base

  year.series <- data.frame(return.year=min(H.y$return.year, na.rm = TRUE):max(H.y$return.year, na.rm = TRUE))
  H.y <- merge(year.series, H.y, by='return.year', all=TRUE)
  return(H.y)

}#END calc_S.y



#' @title (SPFI) Wrapper function to calculate the stratified proportional
#'   fishery index (SPFI).
#'
#' @param data.type A character vector of length one. The value can be either
#'   "AEQCat" or "AEQTot".
#' @param region A character vector of length one. The value can be "wcvi",
#'   "nbc", or "seak".
#' @param hrj.df A data frame. This is a long format table from the HRJ list.
#' @param hrj.filename A character vector length one. The name of the mdb file
#'   of HRJ data.
#' @param data.catch Output from \code{\link{readCatchData}}.
#' @param data.stock Output from \code{\link{readStockData}}.
#' @param fishery.subset A vector of the fishery numbers. If left as NULL then
#'   the default fisheries are the same as used in the VB. These are: SEAk
#'   (1:6), NBC (1:8), WCVI (1:12)
#' @param stock.subset A vector of the stock numbers. Can be left as NULL and
#'   the function will grab from the stocfile.stf.
#' @param adjustAlaska.bol A logical value. If TRUE then sum SEAK stratum 6 into
#'   4. This is applied to both the cwt catch data from the HRJ file and the
#'   catch data derirved from the *.cat file. Default is TRUE
#' @param imputation A list. Default is NULL (no imputation). If imputing then
#'   elements of the list can be found in the example below.
#' @param ... Arguments to pass to the imputation functions.
#'
#' @description After reading in the catch, stock, and HRJ data, the user can
#'   run this function alone (with appropriate arguments) to obtain the SPFI
#'   estimates.
#'
#' @details Regarding the imputation function as defined in the argument above,
#'   there are currently just two functions: \code{\link{calc_AAPC}} and
#'   \code{\link{calc_APC}}. The user can add imputation functions. However, the
#'   arguments and the output must match that as defined in
#'   \code{\link{calc_AAPC}} or \code{\link{calc_APC}}.
#'
#' @return A list comprising 14 elements. Nine of the elements are a data frames
#'   comprising the results of the intermediate (and final) calculations. Some
#'   elements are character strings (\code{catch.filename} and
#'   \code{stock.filename}) and \code{imputation.list} is a list of the output
#'   from the data imputation (if it was chosen). The element names are similar
#'   to the function name for the calculation. For example the distribution
#'   parameters are calculated by \code{\link{calc_d.tsa}} and found in the list
#'   element named \code{d.tsa}. The twelve elements are named: \code{data.type,
#'   hrj.filename, catch.filename, stock.filename, d.tsa, hcwt.ty, T.ty, N.ty,
#'   N.y, imputation.list, H.ty, H.y, S.ty, S.y}. The SPFI estimates can be
#'   found in the element named \code{S.y}.
#' @export
#'
#' @examples
#' \dontrun{
#' data.type <- "AEQTot" # "AEQCat" # or "AEQTot"
#' region <- "seak" # "wcvi" # "nbc" #  "seak"
#' #only one aabm in a data folder:
#' data.catch <- readCatchData("wcvi7914.cat", strLocation = region)
#' data.stock <- readStockData( "STOCFILE.STF") load("hrj_from_mdb.RData")
#' #data.hrj is available from the load() above:
#' hrj.df <- data.hrj$hrj.list.long$HRJ_BY
#' #all data sets include data prior to 1979.
#' #These values are excluded in the VB.
#' year.range <- 1979:(data.stock$stockmeta$intLastBrood$value+1)
#' hrj.df <- hrj.df[hrj.df$return.year %in% year.range,]
#'
#' imputation <- list(FUN="calc_GLMC", args=list(ignoreCWTHR=1, catchmin=4000))
#'
#' calc_SPFI(data.type = data.type, region = region, hrj.df = hrj.df,
#' data.catch = data.catch, data.stock = data.stock, imputation=imputation)
#' }
calc_SPFI <- function(data.type =c("AEQCat", "AEQTot"), region = c("wcvi", "nbc", "seak"), hrj.df=NA, hrj.filename=NA, data.catch, data.stock, fishery.subset=NULL, stock.subset=NULL, adjustAlaska.bol=TRUE, imputation=NULL,...){

  time.start <- Sys.time()

  #first test that all stocks needed in the stock file also exist in the hrj data:
  SN.flagged <- unique(data.stock$SPFIFlag.long$Stock.Number[data.stock$SPFIFlag.long$value==1])
  SN.absent.from.hrj <- SN.flagged[!SN.flagged %in% hrj.df$Stock.Number]

  if(length(SN.absent.from.hrj)>0){
  	cat("\n")
  	str1 <-c("The following stock numbers are flagged in the stock file for use in estimating SPFI but absent from the HRJ data.\n","The stock file:\n", basename(data.stock$filename),"\n", "The missing stocks:\n", paste(SN.absent.from.hrj, collapse = ", "))

  	cat(stringi::stri_wrap(c(str1,"\n\n", "Either the missing stocks should be added to the HRJ data (and re-import that data) or in the stock file set those flagged stock-ages to zero. The function is terminating without results being calculated.\n")), sep="\n")
  	return(NULL)

  	  	}



  #this terminates the function without results:
 # if(length(SN.absent.from.hrj)>=1) {return("Function terminated without results.")}
  ####End of checking for availability of stocs




  if(is.null(fishery.subset)) {
    #these fishery subsets match what is defined in the VB
    fishery.df <- data.frame(aabm=c(rep('seak',6), rep('nbc',8), rep('wcvi',12)), fishery.index=c(1:6, 1:8, 1:12))
    fishery.subset <- fishery.df$fishery.index[fishery.df$aabm==region]
  }
  if(is.null(stock.subset)) stock.subset <- unique(data.stock$SPFIFlag.long$Stock.Number[data.stock$SPFIFlag.long$value==1])

  SPFIFlag.long <- data.stock$SPFIFlag.long[,c("Stock.Number", "age", "value")]
  colnames(SPFIFlag.long)[colnames(SPFIFlag.long)=="value"] <- "spfiflag"
  hrj.df <- merge(hrj.df,SPFIFlag.long, by.x=c("Stock.Number", "age"), by.y=c("Stock.Number", "age") )
  hrj.df <- hrj.df[hrj.df$spfiflag==1,]


  cwtpop <- hrj.df[hrj.df$data.type=="Pop"
                   & hrj.df$fishery.index == 1
                   & hrj.df$Stock.Number %in% stock.subset,]

  cwtpop <- subset(cwtpop,select = -fishery.index) #n.ysa

  cwtcatch <- hrj.df[hrj.df$data.type=="NomCat"
                     & hrj.df$fishery.index %in% fishery.subset
                     & hrj.df$Stock.Number %in% stock.subset,]

  if(adjustAlaska.bol & region=="seak") {

    adjustAlaska.results <-  adjustAlaska(x = cwtcatch, data.catch = data.catch)
    cwtcatch <- adjustAlaska.results$x
    data.catch <- adjustAlaska.results$data.catch
    }

  aeqcwt <- hrj.df[hrj.df$data.type==data.type
                        & hrj.df$fishery.index %in% fishery.subset
                        & hrj.df$Stock.Number %in% stock.subset,]

  if(adjustAlaska.bol & region=="seak") {
    adjustAlaska.results <- adjustAlaska(x = aeqcwt, data.catch = data.catch)
    aeqcwt <- adjustAlaska.results$x
   }

  r.tsa.sum <- calc_tsa.sum(x = cwtcatch, newvar.name = "r.tsa.sum") # same as SumCWTCat in VB
  r.ty.sum <- calc_ty.sum(x = cwtcatch, newvar.name = "r.ty.sum") #same as SumCWTCat2 in vb
  c.ty.sum <- calc_ty.sum(x = aeqcwt, newvar.name = "c.ty.sum") #same as SumAEQCWTCat in vb


  maxmaxdifference.limit <-  0.0000001
  maxmaxdifference <- 1
  counter <- 0
  maxmaxdifference.df <- data.frame(time.val=as.POSIXct(character()), maxmaxdifference=numeric())

  d.tsa <- calc_d.tsa(r.tsa.sum = r.tsa.sum, n.ysa = cwtpop, standardize.bol = TRUE)

  while(maxmaxdifference > maxmaxdifference.limit){
    hcwt.ty <- calc_hcwt.ty(r.ty.sum=r.ty.sum,  d.tsa = d.tsa, n.ysa = cwtpop)

    d.tsa.prior <- d.tsa

    d.tsa <- calc_d.tsa(r.tsa.sum = r.tsa.sum, n.ysa = cwtpop, hcwt.ty = hcwt.ty, standardize.bol = TRUE)

    maxmaxdifference <- calc_Difference(d.tsa.prior, d.tsa)

    counter <- counter+1
    cat(paste(counter, maxmaxdifference,"\n"))
    maxmaxdifference.df <- rbind(maxmaxdifference.df, data.frame(time.val=Sys.time(), maxmaxdifference=maxmaxdifference))
  }
  cat("Minimum reached\n")
  T.ty <- calc_T.ty(catch.df = data.catch$data.catch)

  N.ty <- calc_N.ty(T.ty = T.ty, hcwt.ty = hcwt.ty)

  N.y <- calc_N.y(N.ty = N.ty)

  H.ty <- calc_H.ty(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, hcwt.ty = hcwt.ty)
  #need to limit to years available in catch data:
  H.ty <- H.ty[H.ty$return.year %in% sort(unique(T.ty$return.year)),]

  #H.ty <- calc_H.ty2(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, hcwt.ty = hcwt.ty, T.ty = T.ty)
  #H.ty <- calc_H.ty3(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, T.ty = T.ty, N.ty = N.ty)

  # gap imputation, if requested:
  if(is.null(imputation)){
    imputation.list <- NULL
    H.y <- calc_H.y(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, hcwt.ty = hcwt.ty, T.ty = T.ty)

  }else{
    #do imputation for total abundance:
    imputation.list <- do.call(what = imputation$FUN, args = c(list(N.ty, data.catch = data.catch),imputation$args))
    N.ty <- imputation.list$data.df
    N.y <- imputation.list$imputation.results

    H.ty <- calc_H.ty3(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, T.ty = T.ty, N.ty = N.ty)
    H.y <- calc_H.y2(c.ty.sum = c.ty.sum, r.ty.sum = r.ty.sum, T.ty = T.ty, N.y = N.y)
  }
  #end of imputation



  S.ty <- calc_S.ty(H.ty = H.ty)

  S.y <- calc_S.y(H.y = H.y)
  cat("Completed\n")
  cat(paste(round(Sys.time()- time.start,1), "seconds"))

  return(list(data.type=data.type, hrj.filename=hrj.filename, catch.filename = data.catch$filename, stock.filename= data.stock$filename, d.tsa=d.tsa,c.ty.sum=c.ty.sum, r.ty.sum=r.ty.sum, hcwt.ty=hcwt.ty, T.ty=T.ty, N.ty=N.ty, N.y=N.y, imputation.list=imputation.list, H.ty=H.ty, H.y=H.y, S.ty=S.ty, S.y=S.y))

}#END calc_SPFI



#' @title (SPFI) Sum CWT catch by fishery, stock, and age.
#'
#' @param x A data frame. Typically the \code{cwtcatch} object.
#' @param newvar.name A string of length one. This is the new name for the
#'   column of summed data. The default is "value.sum". It's recommended not to
#'   change the default unless using this function for purposes independent of
#'   the SPFI estimation.
#'
#' @return A data frame of the CWT catch sum grouped by fishery, stock, and age.
#' @export
#'
#' @examples
#' \dontrun{
#'   cwtcatch <- hrj.df[hrj.df$data.type=="NomCat" & hrj.df$fishery.index %in% fishery.subset & hrj.df$Stock.Number %in% stock.subset,]
#' if(region=="seak") cwtcatch <- adjustAlaska(x = cwtcatch, data.catch = data.catch)
#' r.tsa.sum <- calc_tsa.sum(x = cwtcatch, newvar.name = "r.tsa.sum")
#' }
calc_tsa.sum <- function(x, newvar.name ='value.sum'){
  tsa.sum <- aggregate(value~Stock.Number+fishery.index+age, data= x, sum , na.rm=TRUE)
  colnames(tsa.sum)[colnames(tsa.sum)=="value"] <- newvar.name
  return(tsa.sum)
}#END calc.r.tsa.sum



#' @title (SPFI) Sum CWT catch by fishery, and year.
#'
#' @param x A data frame. Typically the \code{cwtcatch} object.
#' @param newvar.name A string of length one. This is the new name for the
#'   column of summed data. The default is "value.sum". It's recommended not to
#'   change the default unless using this function for purposes independent of
#'   the SPFI estimation.
#'
#' @return A data frame of the CWT catch sum by fishery, and year.
#' @export
#'
#' @examples
#' \dontrun{
#'   cwtcatch <- hrj.df[hrj.df$data.type=="NomCat" & hrj.df$fishery.index %in% fishery.subset & hrj.df$Stock.Number %in% stock.subset,]
#' if(region=="seak") cwtcatch <- adjustAlaska(x = cwtcatch, data.catch = data.catch)
#' r.ty.sum <- calc_ty.sum(x = cwtcatch, newvar.name = "r.ty.sum")
#' }
calc_ty.sum <- function(x, newvar.name ='value.sum'){
  ty.sum <- aggregate(value~fishery.index+return.year, data= x, sum , na.rm=TRUE)
  colnames(ty.sum)[colnames(ty.sum)=="value"] <- newvar.name
  return(ty.sum)
}#END calc.r.ty.sum



#' @title (SPFI) Calculate the Pacific Salmon Treaty catch, grouped by fishery
#'   stratum and year.
#' @description This is equivalent to equation 3 in the draft SPFI document. The
#'   output is the same as found in the Visual Basic variable:
#'   \code{CatchContribution}. The SEAK data includes values for "Alaska
#'   hatchery contribution". The "Alaska hatchery contribution" is subtrated
#'   from the catch (by stratum and year) to produce the PSC catch.
#'
#' @param catch.df A data frame, extracted from the output of
#'   \code{\link{readCatchData}}, such as: \code{data.catch$data.catch}.
#'
#' @return A data frame.
#' @export
#'
#' @examples
#' \dontrun{
#' data.catch <- readCatchData("wcvi7914.cat", strLocation = 'wcvi')
#' T.ty <- calc_T.ty(catch.df = data.catch$data.catch)
#' }
calc_T.ty <- function(catch.df){

  T.ty <- catch.df
  colnames(T.ty)[colnames(T.ty)=="CatchContribution"] <- "T.ty"
  colnames(T.ty)[colnames(T.ty)=="TempYear"] <- "return.year"
  colnames(T.ty)[colnames(T.ty)=="TempStrata"] <- "fishery.index"
  return(T.ty)

}#END calc_T.ty


#' Minimization to estimate distribution parameter
#'
#' @param data.type
#' @param region
#' @param hrj.df
#' @param hrj.filename
#' @param data.catch
#' @param data.stock
#' @param fishery.subset
#' @param stock.subset
#' @param adjustAlaska.bol
#'
#' @return
#' @export
#'
#' @examples
#' \dontrun{
#' data.type <- "AEQTot" # "AEQCat" # or "AEQTot"
#' region <- "seak" # "wcvi" # "nbc" #  "seak"
#' #only one aabm in a data folder:
#' data.catch <- readCatchData("wcvi7914.cat", strLocation = region)
#' data.stock <- readStockData( "STOCFILE.STF") load("hrj_from_mdb.RData")
#' #data.hrj is available from the load() above:
#' hrj.df <- data.hrj$hrj.list.long$HRJ_BY
#' #all data sets include data prior to 1979.
#' #These values are excluded in the VB.
#' year.range <- 1979:(data.stock$stockmeta$intLastBrood$value+1)
#' hrj.df <- hrj.df[hrj.df$return.year %in% year.range,]
#' minimizeDistribution(data.type = data.type, region = region, hrj.df = hrj.df,
#' data.catch = data.catch, data.stock = data.stock)
#' }
minimizeDistribution <- function (data.type = c("AEQCat", "AEQTot"), region = c("wcvi",
    "nbc", "seak"), hrj.df = NA, hrj.filename = NA, data.catch,
    data.stock, fishery.subset = NULL, stock.subset = NULL, adjustAlaska.bol = TRUE){

    time.start <- Sys.time()
    SN.flagged <- unique(data.stock$SPFIFlag.long$Stock.Number[data.stock$SPFIFlag.long$value ==
        1])
    SN.absent.from.hrj <- SN.flagged[!SN.flagged %in% hrj.df$Stock.Number]
    if (length(SN.absent.from.hrj) > 0) {
        cat("\n")
        str1 <- c("The following stock numbers are flagged in the stock file for use in estimating SPFI but absent from the HRJ data.\n",
            "The stock file:\n", basename(data.stock$filename),
            "\n", "The missing stocks:\n", paste(SN.absent.from.hrj,
                collapse = ", "))
        cat(stringi::stri_wrap(c(str1, "\n\n", "Either the missing stocks should be added to the HRJ data (and re-import that data) or in the stock file set those flagged stock-ages to zero. The function is terminating without results being calculated.\n")),
            sep = "\n")
        return(NULL)
    }
    if (is.null(fishery.subset)) {
        fishery.df <- data.frame(aabm = c(rep("seak", 6), rep("nbc",
            8), rep("wcvi", 12)), fishery.index = c(1:6, 1:8,
            1:12))
        fishery.subset <- fishery.df$fishery.index[fishery.df$aabm ==
            region]
    }
    if (is.null(stock.subset))
        stock.subset <- unique(data.stock$SPFIFlag.long$Stock.Number[data.stock$SPFIFlag.long$value ==
            1])
    SPFIFlag.long <- data.stock$SPFIFlag.long[, c("Stock.Number",
        "age", "value")]
    colnames(SPFIFlag.long)[colnames(SPFIFlag.long) == "value"] <- "spfiflag"
    hrj.df <- merge(hrj.df, SPFIFlag.long, by.x = c("Stock.Number",
        "age"), by.y = c("Stock.Number", "age"))
    hrj.df <- hrj.df[hrj.df$spfiflag == 1, ]
    cwtpop <- hrj.df[hrj.df$data.type == "Pop" & hrj.df$fishery.index ==
        1 & hrj.df$Stock.Number %in% stock.subset, ]
    cwtpop <- subset(cwtpop, select = -fishery.index)
    cwtcatch <- hrj.df[hrj.df$data.type == "NomCat" & hrj.df$fishery.index %in%
        fishery.subset & hrj.df$Stock.Number %in% stock.subset,
        ]
    if (adjustAlaska.bol & region == "seak") {
        adjustAlaska.results <- adjustAlaska(x = cwtcatch, data.catch = data.catch)
        cwtcatch <- adjustAlaska.results$x
        data.catch <- adjustAlaska.results$data.catch
    }
    aeqcwt <- hrj.df[hrj.df$data.type == data.type & hrj.df$fishery.index %in%
        fishery.subset & hrj.df$Stock.Number %in% stock.subset,
        ]
    if (adjustAlaska.bol & region == "seak") {
        adjustAlaska.results <- adjustAlaska(x = aeqcwt, data.catch = data.catch)
        aeqcwt <- adjustAlaska.results$x
    }
    r.tsa.sum <- calc_tsa.sum(x = cwtcatch, newvar.name = "r.tsa.sum")
    r.ty.sum <- calc_ty.sum(x = cwtcatch, newvar.name = "r.ty.sum")
    c.ty.sum <- calc_ty.sum(x = aeqcwt, newvar.name = "c.ty.sum")
    maxmaxdifference.limit <- 1e-07
    maxmaxdifference <- 1
    counter <- 0
    maxmaxdifference.df <- data.frame(time.val = as.POSIXct(character()),
        maxmaxdifference = numeric())
    d.tsa <- calc_d.tsa(r.tsa.sum = r.tsa.sum, n.ysa = cwtpop,
        standardize.bol = TRUE)
    while (maxmaxdifference > maxmaxdifference.limit) {
        hcwt.ty <- calc_hcwt.ty(r.ty.sum = r.ty.sum, d.tsa = d.tsa,
            n.ysa = cwtpop)
        d.tsa.prior <- d.tsa
        d.tsa <- calc_d.tsa(r.tsa.sum = r.tsa.sum, n.ysa = cwtpop,
            hcwt.ty = hcwt.ty, standardize.bol = TRUE)
        maxmaxdifference <- calc_Difference(d.tsa.prior, d.tsa)
        counter <- counter + 1
        cat(paste(counter, maxmaxdifference, "\n"))
        maxmaxdifference.df <- rbind(maxmaxdifference.df, data.frame(time.val = Sys.time(),
            maxmaxdifference = maxmaxdifference))
    }
    cat("Minimum reached\n")

    return(list(d.tsa=d.tsa, hcwt.ty=hcwt.ty))
}#END minimizeDistribution






#' @title (SPFI) Read the CTC catch data files (*.cat).
#'
#' @param filename A string of length one, defining the name of the catch file.
#'   If more than one file name is included, only the first is used.
#' @param strLocation A string of length one, defining the AABM location
#'   ("seak", "nbc", "wcvi").
#'
#' @details Needs details.
#' @return A list of six elements. The sixth element is a data frame of the
#'   catch data.
#' @export
#'
#' @examples
#' \dontrun{
#' readCatchData("wcvi7914.cat", strLocation = "wcvi")
#' }
readCatchData <- function(filename, strLocation= c("seak", "nbc", "wcvi") ){
  strLocation <- match.arg(strLocation)

  if(!is.character(filename)) stop("'filename' must be a character string")
  filename <- filename[1]

  dat.tmp <- read.csv(filename, stringsAsFactors = FALSE, header = FALSE)
  colnames(dat.tmp) <- c("TempYear", "TempStrata", "strTempStrata", "TempCatch", "TempContribution")
  dat.tmp$strStrata <- dat.tmp$strTempStrata
  dat.tmp$CatchContribution <- dat.tmp$TempCatch - dat.tmp$TempContribution

  intFirstStrata <- min(dat.tmp$TempStrata)
  intTopStrata <-  max(dat.tmp$TempStrata)

  if(!is.na(strLocation) & tolower(strLocation) == "seak"){
    intLastStrata <-  intTopStrata + 1
  } else {
    intLastStrata <-  intTopStrata
  }

  intLastYear <- max(dat.tmp$TempYear)
  intFirstYear <- min(dat.tmp$TempYear)

  #in frmMain.vb line 769
  # If strLocation = "Alaska" Then strStrata(intLastStrata) = "FALL"
  # for SEAK with 5 strata in catch file this does nothing as intLastStrata=6:
  if(!is.na(strLocation) & tolower(strLocation) == "seak") dat.tmp$strStrata[dat.tmp$TempStrata ==intLastStrata] <- "FALL"

  return(list(filename=filename, intFirstStrata=intFirstStrata, intTopStrata=intTopStrata, intLastStrata=intLastStrata, intFirstYear=intFirstYear, intLastYear=intLastYear, data.catch=dat.tmp) )
}#END readCatchData



#' @title (SPFI) Read CTC stock file (stocfile.stf).
#'
#' @param filename A string of length one. The default value is "stocfile.stf".
#' @inheritParams readCatchData
#'
#' @return A list of four elements. The first element, \code{stockmeta}, holds
#'   the meta data found in the first five lines of the stock file. The second
#'   element, \code{SPFIFlag}, is a data frame defining what stock-age
#'   combinations will be included in the analysis. The third element,
#'   \code{SPFIFlag.long}, is the same as \code{SPFIFlag}, but reshaped to a
#'   long format. The fourth element, \code{stocks.df}, is the table of stocks
#'   (found in the stock file).
#' @export
#'
#' @examples
#' \dontrun{
#' data.stock <- readStockData('STOCFILE.STF')
#' }
readStockData <- function(filename= "stocfile.stf"){

  varNames <- c("intNumStocks", "intNumFisheries","intFirstBrood", "intLastBrood","intNumSPFIStocks")
  stock.vec <- readLines(filename)
  stock.vec <- trimws(stock.vec)
  stockmeta <- strsplit(stock.vec[1:5],"," )
  stockmeta <- lapply(stockmeta, FUN = function(x){
    x.tmp <- trimws(x)
    list(description=x.tmp[2], value=as.integer(x.tmp[1]))
  })
  names(stockmeta) <- varNames

  #evaluate how many stocks in flag table and stock translation table:
  dat.tmp <- readLines(filename, skipNul=TRUE)
  SPFIFlag.startrow <- min(grep("Stock Number", x = dat.tmp))
  stocktable.startrow <- max(grep("Stock Number", x = dat.tmp))
  if(stockmeta$intNumSPFIStocks$value != stocktable.startrow-1-SPFIFlag.startrow) {
    stockmeta$intNumSPFIStocks$value <- stocktable.startrow-1-SPFIFlag.startrow
    cat("\n#######\n Value for number of Stocks in the SPFI revised to row count of SPFI flag table.\n######\n")
  }

  row.final <- length(dat.tmp[dat.tmp!=""])
  if(stockmeta$intNumStocks$value != row.final- stocktable.startrow) {
    stockmeta$intNumStocks$value <- row.final- stocktable.startrow
    cat("\n#######\n Value for number of stocks in the stock translation table\n revised to row count of the stock table.\n######\n")
  }



  #begin import with proper stock counts:

  SPFIFlag <- read.csv(filename,skip=length(varNames), nrows = stockmeta$intNumSPFIStocks$value )
  colnames(SPFIFlag)[2] <- "StartAge.0"
  colnames.vec <- colnames(SPFIFlag)
  SPFIFlag.long <- reshape(data=SPFIFlag, dir='long', varying=list(2:ncol(SPFIFlag)), timevar = 'column.index', v.names= 'value' )
  SPFIFlag.long$grouping.var <- colnames.vec[2:length(colnames.vec)][SPFIFlag.long$column.index]
  SPFIFlag.long$age.index <- as.integer(substr(SPFIFlag.long$grouping.var, nchar(SPFIFlag.long$grouping.var), nchar(SPFIFlag.long$grouping.var) ))

  SPFIFlag.long <- SPFIFlag.long[,-which(colnames(SPFIFlag.long) %in% c("id", "column.index", "grouping.var"))]

  stocks.df <- read.csv(filename, skip=length(varNames) + stockmeta$intNumSPFIStocks$value +1, stringsAsFactors = FALSE, strip.white = TRUE )

  SPFIFlag.long <- merge(SPFIFlag.long, stocks.df, by="Stock.Number", all.x=TRUE)
  SPFIFlag.long$age <- SPFIFlag.long$Start.Age + SPFIFlag.long$age.index

  return(list(filename=filename, stockmeta=stockmeta, SPFIFlag=SPFIFlag, SPFIFlag.long=SPFIFlag.long, stocks.df=stocks.df))
}#END readStockData




#' @title (SPFI) Build, save, and open an R script to help execute SPFI.
#'
#' @description This creates and opens a script named "spfi_script.R". This is a
#'   template for the user to work with when doing SPFI estimates. This is
#'   intended to help the user understand the work flow and due to file path
#'   differences, is unlikely to work as is. Some object values will need
#'   updating (for example the datapath). If the user does not have a copy of
#'   the hrj data base saved  into a .Rdata file, then the functions
#'   \code{\link{readHRJAccessDatabase}}, \code{\link{reshapeHRJtolong}}, and the
#'   \code{list} creation will have to executed. It might be wise to then save
#'   the \code{list} to a .Rdata file.
#'
#' @return Opens an R script that includes the template of functions to
#'   calculate SPFI.
#' @export
#'
#' @examples
#' writeScriptSPFI()
writeScriptSPFI <- function(){

  date.creation <- Sys.time()
  script.str <- c("
####### SETUP #######
rm(list=ls())
###### COMMENTS ########
script.name <- 'spfi_script'
if(!exists('script.metadata')) script.metadata <- list()

script.metadata[[script.name]] <- list(
fileName = paste0(script.name,'.R'),
author= 'Michael Folkes',",
paste0("date.creation=", "'",as.character(date.creation),"',"),
"date.edit= NA,
comments = NA
)# END list

####### DATA #######
data.type <- 'AEQCat' # 'AEQCat' # or 'AEQTot'

region <- 'wcvi' # 'wcvi' # 'nbc' #  'seak'

#only one aabm in a data folder:
#datapath <- paste('./', region, sep='/')

catch.filename <- list.files(pattern = '*.cat')
data.catch <- readCatchData(catch.filename, strLocation = region)
data.stock <- readStockData('STOCFILE.STF')

# reading 32 bit mdb files requires using 32bit R
hrj.list.wide <- readHRJAccessDatabase('HRJ_database 2016b.mdb')
hrj.list.long <- reshapeHRJtolong(hrj.list.wide, data.stock)
hrj.list <- list(sourcefile=hrj.list.wide$sourcefile, hrj.list.wide=hrj.list.wide$data, hrj.list.long=hrj.list.long)
#filename <- 'hrj_from_mdb.RData'
#save(hrj.list, file = filename)
#load(filename)

#this is the method to use with hrj text files:
#filename <- list.files(data.path, pattern = 'HRJ')
#filepath <- paste(data.path, filename, sep='/')
#hrj.list <- readHRJtext(filepath)
#add stock number column to the data frames:
#hrj.list$hrj.cwt.list <- lapply(hrj.list$hrj.cwt.list, updateStockByName, data.stock$stocks.df)
#write to a prebuilt access data base (R cannot create data base files):
#writeHRJAccessDatabase(hrj = hrj.list$hrj.cwt.list, filename = 'test.accdb')
#write csv files in same format as found in data base:
#writeHRJcsv(hrj = hrj.list$hrj.cwt.list)

#long format is what the spfi code uses:
#hrj.list.long <- reshapeHRJtolong(hrj.list$hrj.cwt.list, data.stock)

#reshape to wide format prior to writing to access:
#workdingdata.wide <- reshapeHRJtowide(hrj.list.long)
#writeHRJAccessDatabase(hrj = workdingdata.wide, filename = 'test.accdb')


####### MAIN #######
#hrj.list is available from the load() above:
hrj.df <- hrj.list$hrj.list.long$HRJ_BY

#all data sets include data prior to 1979. These values are excluded in the VB,
#which has 'intFirstYear As Integer = 1979' (line 83)
# And if those early years are included results are slightly affected.
year.range <- 1979:(data.stock$stockmeta$intLastBrood$value+1)
hrj.df <- hrj.df[hrj.df$return.year %in% year.range,]


#the function calcSPFI calls all the required intermediate function steps and
#the output is a list that has all intermediate data and the spfi values (S.y)
spfi.output <- calc_SPFI(data.type = data.type, region = region, hrj.df = hrj.df, hrj.filename=hrj.list$sourcefile, data.catch = data.catch, data.stock = data.stock, adjustAlaska.bol = FALSE, imputation='calc_GLMC', catchmin=4000)

saveRDS(spfi.output, file = paste('spfi.output', data.stock$filename, '.RData', sep='_'))

write.csv(x = spfi.output$S.y, file = paste('spfi', region, '.csv', sep = '_'), row.names = FALSE)

writeSPFItable6.6(spfi.output, data.catch)

####### END #######

")
  write(script.str, file="spfi_script.R")
  file.edit("spfi_script.R" )
}#END writeScriptSPFI



#' @title (SPFI) Write csv file of summarized SPFI results.
#'
#' @param spfi.output A list. The output of \code{\link{calc_SPFI}}.
#' @param data.catch A list. The output of  \code{\link{readCatchData}}.
#' @param comments A list. Comments that will be appended to the top of the
#'   output file. Each list element is the equivalent of a new line in the
#'   output.
#'
#' @description This writes a csv file with columns matching those found in
#'   table 6-6 on page 103 of REPORT TCCHINOOK (09)-2
#'   (\url{http://www.psc.org/download/35/chinook-technical-committee/2120/tcchinook09-2.pdf}).
#'
#'
#' @return A csv file named 'SPFItable6-6.csv'
#' @export
#'
#' @examples
#' \dontrun{
#' writeSPFItable6.6(spfi.output, data.catch)
#' }
writeSPFItable6.6 <- function(spfi.output, data.catch, comments=NULL, filename = NA){
  strata.subset <- unique(spfi.output$N.ty$fishery.index)
  #hcwt.ty.wide <-  reshape(spfi.output$S.ty[spfi.output$S.ty$fishery.index %in% strata.subset, c('return.year', 'fishery.index', "hcwt.ty")], direction = 'wide', idvar = "return.year", timevar = 'fishery.index')
  hcwt.ty.wide <-  reshape(spfi.output$hcwt.ty[spfi.output$hcwt.ty$fishery.index %in% strata.subset, c('return.year', 'fishery.index', "hcwt.ty")], direction = 'wide', idvar = "return.year", timevar = 'fishery.index')

  #c.ty.sum.wide <-  reshape(spfi.output$S.ty[spfi.output$S.ty$fishery.index %in% strata.subset, c('return.year', 'fishery.index', "c.ty.sum")], direction = 'wide', idvar = "return.year", timevar = 'fishery.index')

  #r.ty.sum.wide <-  reshape(spfi.output$S.ty[spfi.output$S.ty$fishery.index %in% strata.subset, c('return.year', 'fishery.index', "r.ty.sum")], direction = 'wide', idvar = "return.year", timevar = 'fishery.index')

  tmpcatch <- data.frame(return.year=data.catch$data.catch$TempYear, fishery.index=data.catch$data.catch$TempStrata, AEQcatch=data.catch$data.catch$CatchContribution)

  aeqcatch <- reshape(tmpcatch, dir='wide', idvar='return.year', timevar = "fishery.index")

  results <- merge(hcwt.ty.wide, aeqcatch, 'return.year')

  N.ty.wide <-  reshape(spfi.output$N.ty[spfi.output$N.ty$fishery.index %in% strata.subset, c('return.year', 'fishery.index', "N.ty")], direction = 'wide', idvar = "return.year", timevar = 'fishery.index')

  results <- merge(results, N.ty.wide, by='return.year')

  if("scalar" %in% colnames(spfi.output$N.y)){
    results <- merge(results, spfi.output$N.y[, c('return.year', "scalar")], by='return.year')
  }

  results <- merge(results, spfi.output$H.y[, c('return.year', "H.y")], by='return.year')

  results <- merge(results, spfi.output$S.y[, c('return.year', "S.y")], by='return.year')

  imputation.str <- ifelse(is.null(spfi.output$imputation.list), "no.imputation", paste0(spfi.output$imputation.list$imputation.method, spfi.output$imputation.list$catchmin))

  #table66.filename <- paste("SPFItable6-6", spfi.output$stock.filename, spfi.output$data.type,  imputation, ".csv", sep="_")

  comments <- c(unlist(comments),
                "source files:",
                spfi.output$hrj.filename, spfi.output$catch.filename, spfi.output$stock.filename,
  							paste("Data type:", spfi.output$data.type ),
  							paste("imputation:", imputation.str ),
                "Results based on imputation may not accurately represent historical values.\n\n")

  write(comments,file = filename, append = FALSE)
  options(warn=-1)
  write.table(x = results, file = filename, row.names = FALSE, append = TRUE, sep=",")
  options(warn=0)
  cat("\nResults written to file:", filename, "but if based on imputation they may not accurately represent historical values.")

}#END writeSPFItable6.6



#' @title Write .stf text file.
#'
#' @param x A data frame with columns: "StockID" (3 letter acronym), and
#'   (likely) four columns named age2...age4. The values of the age columns will
#'   be 0 or 1, depending on whether that stock-age should be included in the
#'   SPFI estimation. An example of argument is found in the example below.
#' @param stocks.df A data frame with column names: "Stock Number","Stock Name",
#'   "StockID","Start Age". If NA (the defualt) then the function builds a data
#'   frame from the 'new' list of 31 stocks.
#' @param filename A character vector of length one. The default is NA. If NA
#'   then the stf ouput is sent to stdout().
#'
#' @return
#' @export
#'
#' @examples
#' \dontrun{
#' spfi.flag <- data.frame(matrix(ncol=5, byrow = TRUE, data = c(
#' 	"ATN", 0, 0, 0, 0,
#' 	"BQR", 0, 1, 1, 0,
#' 	"CHI", 0, 1, 1, 0
#' 	)), stringsAsFactors = FALSE)
#'
#' 	colnames(spfi.flag) <- c("StockID", "age.index2", "age.index3", "age.index4", "age.index5")
#' 	spfi.flag <- type.convert(spfi.flag, as.is = TRUE)
#' 	writeSTFfile(x = spfi.flag, filename = "test.stf")
#'
#' }
writeSTFfile <- function(x, stocks.df=NA, filename=NA){
	if(is.na(filename)) filename <- stdout()

  if(is.na(stocks.df)){
    #this is the 31 stock list
    stocks.df <- data.frame(matrix(byrow = TRUE, ncol=4,
  c(1, "Atnarko Summer", "ATN",2,
    2, "Big Qualicum", "BQR",2,
    3, "Chilliwack", "CHI",2,
    4, "Cowlitz Fall Tule", "CWF",2,
    5, "Elk River", "ELK",2,
    6, "George Adams Fall Fingerling", "GAD",2,
    7, "Harrison", "HAR",2,
    8, "Kitsumkalum Summer", "KLM",3,
    9, "Columbia Lower River Hatchery", "LRH",2,
    10, "Lewis River Wild", "LRW",2,
    11, "Nicola River Spring", "NIC",3,
    12, "Nisqually Fall Fingerling", "NIS",2,
    13, "Northern SE AK", "NSA",3,
    14, "Puntledge Summer", "PPS",2,
    15, "Queets Fall Fingerling", "QUE",2,
    16, "Quinsam Fall", "QUI",2,
    17, "Robertson Creek", "RBT",2,
    18, "Samish Fall Fingerling", "SAM",2,
    19, "Lower Shuswap River Summers", "SHU",2,
    20, "Skagit Spring Fingerling", "SKF",2,
    21, "Spring Creek Tule", "SPR",2,
    22, "South Puget Sound Fall Fingerling", "SPS",2,
    23, "South Puget Sound Fall Yearling", "SPY",2,
    24, "Salmon River", "SRH",2,
    25, "Southern SE AK", "SSA",3,
    26, "Skagit Summer Fingerling", "SSF",2,
    27, "Columbia Summers", "SUM",2,
    28, "Transboundary Rivers", "TBR",3,
    29, "Upriver Brights", "URB",2,
    30, "White River Spring Yearling", "WRY",2,
    31, "Willamette Spring", "WSH",3)), stringsAsFactors = FALSE)
  }#END if(is.na(stocks.df))
  stocks.df <- type.convert(stocks.df)
  colnames(stocks.df) <- c("Stock Number","Stock Name", "StockID","Start Age")

  stockmeta <- list()
  stockmeta$intNumStocks$value <- nrow(stocks.df)
  stockmeta$intNumStocks$description <- "Number of Stocks"
  stockmeta$intNumFisheries$value <- 79
  stockmeta$intNumFisheries$description <- "Number of Fisheries"
  stockmeta$BYfirst$value <- 1971
  stockmeta$BYfirst$description <- "First Brood Year"
  stockmeta$BYlast$value <- 2016
  stockmeta$BYlast$description <- "Last Brood Year"

  #stockmeta$NUMspfistocks$value <- length(unique(unlist(apply(x[3:ncol(x)],2,function(x2) which(x2==1)))))
  stockmeta$NUMspfistocks$value <- nrow(stocks.df)
  stockmeta$NUMspfistocks$description <- "Number of Stocks in the SPFI"

  flag.df <- merge(x, stocks.df)
  flag.df <- flag.df[,c("Stock Number", colnames(flag.df)[grep(pattern = "age", colnames(flag.df))])]
  flag.colnames <- c("Stock Number", 'StartAge' , "StartAge+1", "StartAge+2", "StartAge+3")
  colnames(flag.df) <- flag.colnames[1:ncol(flag.df)]
  flag.df <- flag.df[order(flag.df$`Stock Number`),]

  #write the file
  options(warn = -1)
 file.create(filename)
 meta.df <- lapply(stockmeta, function(stockmeta.line) data.frame(stockmeta.line))
 meta.df <- do.call("rbind", meta.df)
 write.table(meta.df, file = filename, quote = FALSE, row.names = FALSE, col.names =FALSE, sep = ", ")
 write.table(flag.df, file=filename, append = TRUE, sep=", ", row.names = FALSE, quote = FALSE)
write.table(stocks.df, file=filename, append = TRUE, sep=", ", row.names = FALSE, quote = FALSE)
options(warn = 0)

 cat(c("\n", filename, "\n", "has been written.", " \n "))

}#END writeSTFfile
MichaelFolkes/ctctools documentation built on May 7, 2019, 4:56 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
MichaelFolkes/ctctools
Analytical tools For The Pacific Salmon Commission's Chinook Technical Committee.

R/spfi_functions.R
In MichaelFolkes/ctctools: Analytical tools For The Pacific Salmon Commission's Chinook Technical Committee.

R Package Documentation

Browse R Packages

We want your feedback!

MichaelFolkes/ctctools Analytical tools For The Pacific Salmon Commission's Chinook Technical Committee.

R/spfi_functions.R In MichaelFolkes/ctctools: Analytical tools For The Pacific Salmon Commission's Chinook Technical Committee.

R Package Documentation

Browse R Packages

We want your feedback!

MichaelFolkes/ctctools
Analytical tools For The Pacific Salmon Commission's Chinook Technical Committee.

R/spfi_functions.R
In MichaelFolkes/ctctools: Analytical tools For The Pacific Salmon Commission's Chinook Technical Committee.