R/summarizer.R

Defines functions summarizer

Documented in summarizer

#' @title summarizer
#' @description This is a tool that quickly breaks down the results of any of the fleet
#' wrappers to provide information on the number of trips, unique vessels, unique licences, and total
#' catch.  These results can be further aggregated by Year, NAFO area, Species code, GEAR TYPE, as
#' well as by any fields that exist in the TRIPS or SETS object of the marfis or isdb results
#' (or both).
#' @param data  default is \code{NULL}. This is the entire output from any of the fleet wrappers.
#' @param units  default is \code{"KGS"}. This is the units you want weights returned in.  Valid
#' options are any of "KGS", "TONNES", "LBS".
#' @param byGr  default is \code{TRUE}. If TRUE, the summed weights and numbers of unique trips and
#' licence_ids will will be broken down by all identified gear types. If FALSE, aggregations will
#' ignore differences in gear types.
#' @param bySpp  default is \code{TRUE}. If TRUE, the summed weights and numbers of unique trips and
#' licence_ids will will be broken down by all identified landed species. If FALSE,aggregations will
#' ignore differences in landed species.
#' @param byYr  default is \code{FALSE}.   If TRUE, the summed weights and numbers of unique trips
#' and licence_ids will will be broken down by each year in which a landing was identified. If FALSE,
#' aggregations will ignore differences in year. "Year" is taken from the end date of the associated
#' trip record (i.e. <data>$marf$MARF_SETS$T_DATE2 for MARF, and <data>$isdb$ISDB_SETS$DATE_TIME for
#' ISDB).
#' @param byQuarter  default is \code{FALSE}.   If TRUE, the summed weights and numbers of unique trips
#' and licence_ids will will be broken down by each quarter in which a landing was identified. If FALSE,
#' aggregations will ignore differences in year. "Year" is taken from the end date of the associated
#' trip record (i.e. <data>$marf$MARF_SETS$T_DATE2 for MARF, and <data>$isdb$ISDB_SETS$DATE_TIME for
#' ISDB).
#' @param byNAFO  default is \code{FALSE}.   If TRUE, the summed weights and numbers of unique trips
#' and licence_ids will will be broken down by <reported> NAFO division in which a landing was
#' identified. If FALSE, weights will be summed, irrespective of <reported> NAFO division.
#' @param byCust  default is \code{NULL}. This can be the name of any field found in \code{<data>$marf$MARF_SETS}
#' by which you would like to see aggregated results
#' @param specISDB default is \code{NULL}.  If your data is for a fleet returns multiple species, you can
#' enter an ISDB species code here, and the ISDB data will be limited to those species.
#' @param specMARF default is \code{NULL}.  If your data is for a fleet returns multiple species, you can
#' enter an MARFIS species code here, and the MARF data  will be limited to those species.
#' @param doMARF  default is \code{TRUE}. This indicates that the MARFIS results should be summarized.
#' @param doISDB  default is \code{TRUE}. This indicates that the ISDB results should be summarized.
#' @param quietly default is \code{FALSE}. This specifies whether or not status messages should be
#' output to the console while the scripts run.
#' @examples \dontrun{
#' summary <- summarizer(data = Halibut2017, byGr = TRUE)
#'        }
#' @family simpleproducts
#' @return by default, a list containing an object with the results of both MARF and ISDB is
#' returned.  If either \code{doMARF} or \code{doISDB} is set to FALSE, than the returned object is
#' a dataframe
#' @author  Mike McMahon, \email{Mike.McMahon@@dfo-mpo.gc.ca}
#' @export
summarizer <- function(data=NULL, units="KGS", bySpp = TRUE, byYr= FALSE, byQuarter = FALSE, byGr = TRUE, byNAFO = FALSE, byCust = NULL, specMARF = NULL, specISDB = NULL, doMARF=T, doISDB=T, quietly = F){
  defYr <- lubridate::year(as.Date(gsub('"',"",data$params$user[data$params$user$PARAMETER == "dateStart", "VALUE"]), format="%Y-%m-%d"))

  ISDB <- NA
  MARF <- NA
  summISDB <- function(){
    if (!"isdb" %in% names(data)) return(NA)
    byCustISDB <-NULL
    t <- data$isdb$ISDB_TRIPS #[,c("TRIP_ID_ISDB", "VR", "LIC")]
    s <- data$isdb$ISDB_SETS
    s$YEAR <- lubridate::year(s$DATE_TIME)
    s$QUARTER <- lubridate::quarter(s$DATE_TIME)

    if (is.null(specISDB)) {
      specISDB <- eval(parse(text=data$params$user[data$params$user$PARAMETER == "isdbSpp", "VALUE"]))
      # if (!quietly) message(paste0("No specISDB was specified - defaulting to wrapper target species"))
    }

    c <- data$isdb$ISDB_CATCHES$ALL[data$isdb$ISDB_CATCHES$ALL$SPECCD_ID %in% specISDB,c("SPECCD_ID", "FISHSET_ID", "EST_NUM_CAUGHT", "EST_KEPT_WT", "EST_DISCARD_WT", "EST_COMBINED_WT")]

    if (any(is.na(t$VR))){
      badVR <- length(t[is.na(t$VR),])
      if (!quietly)  message(paste0(badVR, " of your ISDB trips were missing a valid Vessel. There is no way to
                     differentiate between missing vessels, so all missing values will be considered
                     as a single vessel, identified as '-999'"))
      t[["VR"]][is.na(t[["VR"]])] <- -999
    }
    if (any(is.na(t$LIC))){
      badLic <- length(t[is.na(t$LIC),])
      if (!quietly) message(paste0(badLic, " of your ISDB trips were missing a valid LICENCE. There is no way to
                     differentiate between missing licences, so all missing values will be considered
                     as a single licence, identified as '-999'"))
      t[["LIC"]][is.na(t[["LIC"]])] <- -999
    }
#
# tt<-merge(s[, !names(s) %in% c("TRIP_ID_MARF")],
#                      c[, !names(c) %in% c("TRIP_ID_MARF")], by="FISHSET_ID")
# tt_1<-merge(t, tt, by.x="TRIP_ID_ISDB", by.y="TRIP_ID")
    all <- merge(t, s[, !names(s) %in% c("TRIP_ID_MARF")], by.x="TRIP_ID_ISDB", by.y="TRIP_ID")
    all <- merge(all, c[, !names(c) %in% c("TRIP_ID_MARF")], by="FISHSET_ID")
    rm(list=c("t","s","c"))
    sumFields <- c("EST_NUM_CAUGHT","EST_KEPT_WT","EST_DISCARD_WT","EST_COMBINED_WT")
    countFields <- c("TRIP_ID_ISDB","VR","LIC")
    aggFields <-c("YEAR")

    if(!byYr) all$YEAR <- defYr
    if(!byQuarter) {
      all$QUARTER <- "ALL"
    }else{
      aggFields <- c(aggFields, "QUARTER")
    }

    if(byNAFO) {
      aggFields <- c(aggFields, "NAFO_ISDB_SETS")
    } else {
      all$NAFO_ISDB_SETS <- "ALL"
    }
    if(bySpp) {
      aggFields <- c(aggFields, "SPECCD_ID")
    } else {
      all$SPECCD_ID  <- "ALL"
    }
    if(byGr) {
      if ("GEARCD_ID" %in% names(all)){
        aggFields <- c(aggFields, "GEARCD_ID")
      }else{
        message("Can't aggregate ISDB data by Gear code - the data appears to precede its availability")
      }
    }else{
      all$GEARCD_ID <- "ALL"
    }
    if (length(byCust)>0) {
      byCustISDB <- byCust[byCust %in% names(all)]
      if (length(byCustISDB)<1){
        byCustISDB <- NULL
        if (!quietly) message("byCust field ignored for ISDB data")
      }else{
        aggFields <- c(aggFields, byCustISDB)
        if (any(is.na(all[[byCustISDB]]))){
          badCust <- length(is.na(all[[byCustISDB]]))
          if (!quietly) message(paste0(badCust, " of the ISDB records of your custom field were missing values. There is no way to
                     differentiate between these, so allwill be considered as a single entity, identified as '-999'"))
          all[[byCustISDB]][is.na(all[[byCustISDB]])] <- -999
        }
      }
    }else{
      byCustISDB <- NULL
    }
    all <- all[,c(aggFields, sumFields, countFields)]

    sums <- all[, c(sumFields, aggFields)]
    counts <- all[, c(countFields, aggFields)]

    sums <- data.table::setDT(sums)[, lapply(.SD, sum), by=c(aggFields), .SDcols=sumFields]
    counts <- data.table::setDT(counts)[ , .(NVESS = length(unique(VR)),
                                             NLICS = length(unique(LIC)),
                                             NTRIPS = length(unique(TRIP_ID_ISDB))), by = c(aggFields)]
    res <- merge(data.table::setDF(counts), data.table::setDF(sums))
    # res <- res[with(res,order(YEAR, QUARTER, NTRIPS)),]
    if (byQuarter){
      res <- res[with(res,order(YEAR, QUARTER, NTRIPS)),]
    }else{
      res <- res[with(res,order(YEAR, NTRIPS)),]
    }

    for (s in 1:length(sumFields)){
      if (sumFields[s]=="EST_NUM_CAUGHT") next
      if (units == "TONNES") res[[paste0(sumFields[s],"_TONNES")]]<-res[[sumFields[s]]]/1000
      if (units == "LBS") res[[paste0(sumFields[s],"_LBS")]]<-res[[sumFields[s]]]*2.20462
      if (units != "KGS") res[[sumFields[s]]] <- NULL
    }

    if (!is.null(byCustISDB)) colnames(res)[colnames(res)=="CUSTOM"] <- byCustISDB
    return(res)
  }
  summMARF <- function(){
    if (!"marf" %in% names(data)) return(NA)
    byCustMARF <- NULL
    #t <- data$marf$MARF_TRIPS
    t <- unique(data$marf$MARF_TRIPS[, !(names(data$marf$MARF_TRIPS) %in% c("MON_DOC_ID"))])
    if (any(is.na(t$VR_NUMBER_FISHING))){
      badVR <- length(t[is.na(t$VR_NUMBER_FISHING),])
      if (!quietly)  message(paste0(badVR, " of your MARFIS trips were missing a valid Vessel. There is no way to
                     differentiate between missing vessels, so all missing values will be considered
                     as a single vessel, identified as '-999'"))
      t[["VR"]][is.na(t[["VR"]])] <- -999
    }
    if (any(is.na(t$LICENCE_ID))){
      badLic <- length(t[is.na(t$LICENCE_ID),])
      if (!quietly) message(paste0(badLic, " of your MARFIS trips were missing a valid LICENCE. There is no way to
                     differentiate between missing licences, so all missing values will be considered
                     as a single licence, identified as '-999'"))
      t[["LICENCE_ID"]][is.na(t[["LICENCE_ID"]])] <- -999
    }
    t$YEAR <- lubridate::year(t$T_DATE2)
    t$QUARTER <- lubridate::quarter(t$T_DATE2)
    s <- unique(data$marf$MARF_SETS[, !(names(data$marf$MARF_SETS) %in% c("MON_DOC_ID" , "T_DATE1",
                                                                   "T_DATE2", "NUM_OF_EVENTS", "NUM_OF_GEAR_UNITS","DURATION_IN_HOURS", "EF_FISHED_DATETIME" , "LATITUDE", "LONGITUDE"))])
    if (any(is.na(s$LOG_EFRT_STD_INFO_ID))){
      badLESII <- length(t[is.na(t$LICENCE_ID),])
      if (!quietly) message(paste0(badLESII, " of your MARFIS sets were missing a valid LOG_EFRT_STD_INFO_ID. There is no way to
                     differentiate between missing sets within a trip, so all missing values will be considered
                     as a single LOG_EFRT_STD_INFO_ID, identified as '-999'"))
    s[["LOG_EFRT_STD_INFO_ID"]][is.na(s[["LOG_EFRT_STD_INFO_ID"]])] <- -999
    }
    #cases exist where a LOG_EFRT_STD_INFO_ID is repeated - just keep the one with the max NAFO (preferentially removes those starting with "<")
    s <-data.table::setDT(s)
    s <- s[, .SD[NAFO_MARF_SETS == max(NAFO_MARF_SETS)], by = list(TRIP_ID_MARF, LOG_EFRT_STD_INFO_ID)]
    s <- data.table::setDF(s)

    if (is.null(specMARF)) {
      specMARF <- eval(parse(text=data$params$user[data$params$user$PARAMETER == "marfSpp", "VALUE"]))
      # if (!quietly) message(paste0("No specMARF was specified - defaulting to wrapper target species"))
    }

    all <- merge(t, s)
    # rm(list=c("t","s"))
    if(!"MARF_CATCHES" %in% names(data$marf)){
      if (bySpp) message("Can't aggregate MARFIS data by species - the data appears to precede its availability")
      all$SPECIES_CODE <- "ALL"
    }else{
      c <- data$marf$MARF_CATCHES[data$marf$MARF_CATCHES$SPECIES_CODE %in% specMARF,]

      if (any(is.na(c$LOG_EFRT_STD_INFO_ID))){
        badLESII2 <- length(c[is.na(c$LICENCE_ID),])
        if (!quietly) message(paste0(badLESII2, " of your MARFIS catches were missing a valid LOG_EFRT_STD_INFO_ID. There is no way to
                     differentiate between missing sets within a trip, so all missing values will be considered
                     as a single LOG_EFRT_STD_INFO_ID, identified as '-999'"))
        c[["LOG_EFRT_STD_INFO_ID"]][is.na(c[["LOG_EFRT_STD_INFO_ID"]])] <- -999
      }
      #
      c<- data.table::setDT(c)
      c <- c[, .(RND_WEIGHT_KGS = sum(RND_WEIGHT_KGS)), by= list(TRIP_ID_MARF, LOG_EFRT_STD_INFO_ID, SPECIES_CODE)]
      c<- data.table::setDF(c)

      all <- merge(all, c)
      rm(list=c("c"))
    }
    sumFields <- c("RND_WEIGHT_KGS")
    countFields <- c("TRIP_ID_MARF","VR_NUMBER_FISHING","LICENCE_ID")
    aggFields <-c("YEAR")

    if(!byYr) all$YEAR <- defYr
    if(!byQuarter) {
      all$QUARTER <- "ALL"
    }else{
      aggFields <- c(aggFields, "QUARTER")
    }

    if(byNAFO) {
      aggFields <- c(aggFields, "NAFO_MARF_SETS")
    }else{
      all$NAFO_MARF_SETS <- "ALL"
    }

    if(bySpp) {
      aggFields <- c(aggFields, "SPECIES_CODE")
    }else{
      all$SPECIES_CODE <- "ALL"
    }

    if(byGr) {
      aggFields <- c(aggFields, "GEAR_CODE")
    }else{
      all$GEAR_CODE <- "ALL"
    }

    if (length(byCust)>0) {
      byCustMARF <- byCust[byCust %in% names(all)]
      if (length(byCustMARF)<1){
        byCustMARF <- NULL
        if (!quietly) message("byCust field ignored for MARF data")
      }else{
        aggFields <- c(aggFields, byCustMARF)
        if (any(is.na(all[[byCustMARF]]))){
          badCust <- length(is.na(all[[byCustMARF]]))
          if (!quietly) message(paste0(badCust, " of the MARFIS records of your custom field were missing values. There is no way to
                     differentiate between these, so allwill be considered as a single entity, identified as '-999'"))
          all[[byCustMARF]][is.na(all[[byCustMARF]])] <- -999
        }
      }
    }else{
      byCustMARF <- NULL
    }

    all <- all[,c(aggFields, sumFields, countFields)]
    sums <- all[, c(sumFields, aggFields)]
    counts <- all[, c(countFields, aggFields)]

    sums <- data.table::setDT(sums)[, lapply(.SD, sum), by=c(aggFields), .SDcols=sumFields]
    counts <- data.table::setDT(counts)[ , .(NVESS = length(unique(VR_NUMBER_FISHING)),
                                             NLICS = length(unique(LICENCE_ID)),
                                             NTRIPS = length(unique(TRIP_ID_MARF))), by = c(aggFields)]

    res <- merge(data.table::setDF(counts), data.table::setDF(sums))
    if (byQuarter){
      res <- res[with(res,order(YEAR, QUARTER, NTRIPS)),]
    }else{
      res <- res[with(res,order(YEAR, NTRIPS)),]
    }

    if (units == "TONNES"){
      res$RND_WEIGHT_TONNES <- res$RND_WEIGHT_KGS/1000
      res$RND_WEIGHT_KGS <- NULL
    } else if (units == "LBS"){
      res$RND_WEIGHT_LBS <- res$RND_WEIGHT_KGS*2.20462
      res$RND_WEIGHT_KGS <- NULL
    }
    return(res)
  }


  if (doISDB) ISDB <- summISDB()
  if (doMARF) MARF <- summMARF()

  res <- list()
  if (all(is.na(ISDB))) ISDB <- "No data to summarize"
  if (all(is.na(MARF))) MARF <- "No data to summarize"

  res$ISDB <- ISDB
  res$MARF <- MARF

  return(res)
}
Maritimes/Mar.bycatch documentation built on Feb. 6, 2024, 3:27 p.m.