#' Combine CalCOM and PacFIN data
#'
#' `r lifecycle::badge("experimental")` Adds required PacFIN columns to CalCOM
#' data, initializing them to meaningful values as appropriate and translating
#' from the CalCOM values when they exist in a different format.
#'
#' @param Pdata Output from [PullBDS.PacFIN()].
#' @param CalCOM A data frame supplied, more than likely be E.J. Dick or
#' California Department of Fish and Wildlife containing biological samples
#' for flatfish species prior to 1990.
#'
#' @return Returns a combined dataset in PacFIN format.
#'
#' @export
#'
#' @details
#' This function was made defunct in March 2021 in [commit
#' d5c0355](https://github.com/pfmc-assessments/PacFIN.Utilities/commit/d5c0355795d10b945a9e28bfb05cb6811697c852)
#' because CalCOM data were expected to be available in PacFIN and restored in
#' April 2023 after discovering that CalCOM data for flatfish prior to 1990 is
#' not currently available in PacFIN, as discussed in [issue #101](
#' https://github.com/pfmc-assessments/PacFIN.Utilities/issues/101) because the
#' early data are "in separate tables because they are bin samples (# of fish)
#' rather than cluster samples (weight based). PacFIN doesn't access those
#' tables."
#'
#' Coding of data from CalCOM to PacFIN is done for dates, genders, and areas.
#' `PSMFC_AREA`s are derived from CalCOM `"PORT"`s.
#'
#' Other data that are not reported in CalCOM datasets (e.g., `SAMPLE_TYPE` and
#' `SAMPLE_METHOD`) are initialized to typical default values kept when cleaning
#' data so that the CalCOM data aren't rejected by [cleanPacFIN()].
#'
#' @author Melissa Haltuch, Andi Stephens, and Ian G. Taylor
#' @seealso
#' * [cleanPacFIN()], which should be ran after this function
combineCalCOM <- function(Pdata, CalCOM) {
if (colnames(CalCOM)[1] == "X" && inherits(CalCOM[[1]], "integer")) {
CalCOM <- CalCOM[, -1]
}
calcom_columns_example <- c(
"SPECIES", "SAMPLE_NO", "SAMPLE_DATE", "AGE", "FISH_NO",
"TLENGTH", "SEX", "DEPTH", "SumOfWEIGHT", "SumOfTOTAL_CT",
"TOTAL_WGT", "PORT_COMPLEX",
# Check if age method is present
if ("AGE_METHOD" %in% colnames(CalCOM)) "AGE_METHOD"
)
check_column_names <- purrr::map_lgl(
colnames(CalCOM),
.f = ~ .x %in% calcom_columns_example
) %>%
all()
if (!check_column_names) {
message(
"There are columns in CalCOM that cannot currently be ",
"processed by combineCalCOM(), the following are viable options\n",
"and the remaining columns are removed:\n",
glue::glue_collapse(
glue::single_quote(calcom_columns_example),
sep = ", "
)
)
CalCOM <- CalCOM[, calcom_columns_example]
}
if (!"AGE_METHOD" %in% colnames(CalCOM)) {
# TODO what age method are calCOM ages
CalCOM[, "AGE_METHOD"] <- ""
cli::cli_alert("Users must set AGE_METHOD for CalCOM data manually.")
} else {
CalCOM[, "AGE_METHOD"] <- as.character(CalCOM[, "AGE_METHOD"])
}
# Break out Year, Month, Day from vector formatted "2/23/2012"
trueDate <- as.Date(
x = as.character(CalCOM$SAMPLE_DATE),
format = "%m/%d/%Y"
)
CalCOM$SAMPLE_YEAR <- as.numeric(format(trueDate, format = "%Y"))
CalCOM$SAMPLE_MONTH <- as.numeric(format(trueDate, format = "%m"))
CalCOM$SAMPLE_DAY <- as.numeric(format(trueDate, format = "%d"))
# Fix Areas
name_area <- "PSMFC_CATCH_AREA_CODE"
CalCOM[[name_area]] <- NA
CalCOM[[name_area]][CalCOM$PORT %in% c("ERK", "CRS")] <- "1C"
CalCOM[[name_area]][CalCOM$PORT %in% c("BRG", "OSF", "MNT", "1")] <- "1B"
CalCOM[[name_area]][CalCOM$PORT %in% c("OSB", "MRO", "2")] <- "1A"
# Create PacFIN format matrix and fill
CalCOM <- CalCOM %>%
dplyr::rename(
PACFIN_SPECIES_CODE = "SPECIES",
SAMPLE_NUMBER = "SAMPLE_NO",
FISH_SEQUENCE_NUMBER = "FISH_NO",
FISH_LENGTH = "TLENGTH",
DEPTH_AVERAGE_FATHOMS = "DEPTH",
AGENCY_PORT_CODE = "PORT_COMPLEX",
# landed weight for this sample in pounds
WEIGHT_OF_LANDING_LBS = "TOTAL_WGT",
CLUSTER_WEIGHT_LBS = "SumOfWEIGHT",
AGE_METHOD1 = "AGE_METHOD"
) %>%
dplyr::mutate(
SEX_CODE = nwfscSurvey::codify_sex(SEX),
AGENCY_CODE = "CalCOM",
age1 = as.numeric(AGE),
FISH_LENGTH_TYPE_CODE = "T",
FISH_LENGTH_UNITS = "MM",
OBSERVED_FREQUENCY = 1,
FINAL_FISH_AGE_IN_YEARS = age1,
PACFIN_PORT_CODE = dplyr::case_when(
# TODO: what are calCOM port complexes 1 and 2
AGENCY_PORT_CODE == 1 ~ "",
AGENCY_PORT_CODE == 2 ~ "",
.default = AGENCY_PORT_CODE
),
SPECIES_WEIGHT_LBS = CLUSTER_WEIGHT_LBS,
SAMPLE_METHOD_CODE = "R",
SAMPLE_TYPE = "M",
PACFIN_GEAR_CODE = "GFT",
AGENCY_GEAR_CODE = PACFIN_GEAR_CODE
) %>%
dplyr::select(-SAMPLE_DATE, -AGE, -SEX, -SumOfTOTAL_CT)
# Determine what colnames are in Pdata
CalCOM <- if (check_columns_downloaded(Pdata)) {
CalCOM
} else {
changecol_pacfin(CalCOM)
}
out <- dplyr::full_join(
x = Pdata,
y = CalCOM,
by = intersect(colnames(Pdata), colnames(CalCOM))
)
return(out)
}
check_pacfin_species_code_calcom <- function(x) {
species_codes_in_calcom_bin <- c("DOVR", "EGLS", "PTRL", "REX")
purrr::map_lgl(species_codes_in_calcom_bin, .f = ~ .x %in% x) %>%
any()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.