R/prepStrata.R

Defines functions prepStrata

Documented in prepStrata

#' This function prepares data to run multiple strata
#' 
#' The model is actually run by a separate function in order to allow
#' a user to edit the priors and initial values before running.
#' 
#' @param trapData dataframe with data for fish sampled from one strata - trap data for dam escapement
#' @param tags dataframe with tag rates for all groups. This should NOT include the Unassigned group.
#' @param GSIcol name of column containing GSI assignments. If you have no GSI information, create a column with
#'   the same value for all samples.
#' @param PBTcol name of column containing PBT assignments
#' @param strataCol name of column indicating the strata the observation belongs to
#' @param variableCols vector of column names of columns containing the variables to estimate composition and 
#'   to inform the group selection for PBT untagged fish
#' @param variableColsOth vector of column names of columns containing the variables to estimate composition but 
#'   NOT inform the group selection for PBT untagged fish
#' @param adFinCol name of column containing adipose fin status - TRUE (or AI) being intact FALSE (or AD) being clipped, NA missing
#' @param AI TRUE to analyze ad-intact fish, FALSE to analyze ad-clipped fish
#' @param verbose TRUE to print some messages, FALSE to not
#' @param GSIgroups These are the values for all the GSI groups that you expect to be present in the population. If NA, then
#'   the values are taken to be all the unique values in the GSI column in the dataset within each strata (ie unobserved groups are
#'   assumed to be 0).
#' @param variableValues a list, in the order of \code{variableCols} with entries having the values expected for each variable. This
#'   is helpful to make sure the same variable values are estiamted in each strata even if one value is not observed in all strata. If
#'   a value other than a list is given (for example, NA), it uses the values present in the dataset within each strata (ie unobserved 
#'   groups are assumed to be 0).
#' @param variableValuesOth Same as variableValues, but for variableColsOth
#' @param symPrior All the priors are Dirichlet distributions. The default is to use this value for all the alphas of all
#'   the priors. You can, and are encouraged to, manually adjust the priors as you see fit prior to running the MCMC chain. 
#' 
#' @export

prepStrata <- function(trapData, tags, GSIcol, PBTcol, strataCol, variableCols = c(), variableColsOth = c(), adFinCol, AI = TRUE, 
									 GSIgroups = NA,
									 variableValues = NA, variableValuesOth = NA, symPrior = .5, verbose = TRUE){
	
	#turn adFinCol into boolean if necessary
	if(!is.logical(trapData[,adFinCol])){
		nonValid <- sum(!is.na(trapData[,adFinCol]) & !(trapData[,adFinCol] %in% c("AD", "AI")))
		if(nonValid > 0){
			errMessage <- paste(nonValid, "observations that are not valid options for", adFinCol,
				"\nthe adFinCol must either be a logical variable, with TRUE for ad-intact,", 
				"or be a character variable with values of AD and AI for ad-clipped and ad-intact, respectively.", 
				"\n Missing data should have values of NA.")
			stop(errMessage)
		}
		trapData[,adFinCol] <- trapData[,adFinCol] == "AI"
		trapData[,adFinCol] <- as.logical(trapData[,adFinCol])
	}
	
	### first filter data to either Ad-intact or Ad-clipped
	
	if(AI) {
		trapData <- trapData[!is.na(trapData[,adFinCol]) & trapData[,adFinCol] & !is.na(trapData[,strataCol]),]
	} else if (!AI){
		trapData <- trapData[!is.na(trapData[,adFinCol]) & !trapData[,adFinCol] & !is.na(trapData[,strataCol]),]
	} else {
		stop("Non-boolean input for AI in prepOneStrataAI")
	}

	numObs <- nrow(trapData)
	if(verbose & AI) cat("\nFound", numObs, "ad-intact observations.\n")
	if(verbose & !AI) cat("\nFound", numObs, "ad-clipped observations.\n")
	trapData <- trapData[!is.na(trapData[,PBTcol]) & !is.na(trapData[,GSIcol]),]
	if(verbose) cat("\nDiscarding", numObs - nrow(trapData), "observations that were not", "attempted to be PBT and GSI assigned\n")
	
	if("Unassigned" %in% tags[,1]){
		warning("Unassigned is a group in tags. Unassigned is the key word to denote PBT-unassigned samples. Removing this group from tags.")
		tags <- tags[tags[,1] != "Unassigned",]
	}
	
	### this section sets all strata to use all groups in entire dataset
	###  most biologists will not want this, they will want only the groups observed
	###  to be estimated - can repurpose this code later if decide its useful
	### The same effect can be generated by the user by setting the variableValues, GSIgroups, etc
	#### but not for tags. For that, the downstream function would have to be edited
	
	# #use all GSI groups present in the data if not specified
	# if(is.na(GSIgroups[1])) GSIgroups <- sort(unique(trapData[,GSIcol])) # these are the different GSI groups in the dataset
	# 
	# #use all variable values present in the data if not specified
	# if (!is.list(variableValues)){
	# 	values <- list() # this is list of the categories in each variable
	# 	for(v in variableCols){
	# 		values[[v]] <- sort(unique(trapData[,v])) # these are the categories in var
	# 	}
	# } else {
	# 	values <- variableValues
	# 	names(values) <- variableCols
	# }
	# 
	# #use all "other" variable values present in the data if not specified
	# if (!is.list(variableValuesOth)){
	# 	valuesOth <- list() # this is list of the categories in each variable
	# 	for(v in variableColsOth){
	# 		valuesOth[[v]] <- sort(unique(trapData[,v])) # these are the categories in var
	# 	}
	# } else {
	# 	valuesOth <- variableValuesOth
	# 	names(valuesOth) <- variableColsOth
	# }
	
	# change strata variable to character - if numeric and not consecutive from 1, causes problems with assigning to the list
	trapData[,strataCol] <- as.character(trapData[,strataCol])

	# now prep each strata
	allStrata <- list() # list containing inputs for each strata
	for(s in sort(unique(trapData[,strataCol]))){
		strataData <- trapData[trapData[,strataCol] == s,] #select one strata
		allStrata[[s]] <- prepOneStrata(trapData = strataData, tags = tags, GSIcol = GSIcol, PBTcol = PBTcol, 
							variableCols = variableCols, variableColsOth = variableColsOth, adFinCol = adFinCol, AI = AI, 
							 verbose = FALSE, GSIgroups = GSIgroups,
							 variableValues = variableValues, variableValuesOth = variableValuesOth, strataName = s, symPrior = symPrior)
	}
	
	return(allStrata)
}
delomast/fishCompTools documentation built on Jan. 11, 2021, 1:51 a.m.