R/zoop_downloader.R

Defines functions Zoopdownloader

Documented in Zoopdownloader

#' Downloads and combines zooplankton datasets collected by the Interagency Ecological Program from the Sacramento-San Joaquin Delta
#'
#' This function downloads all IEP zooplankton datasets from the internet,
#' converts them to a consistent format, binds them together, and exports
#' the combined dataset as .Rds R data files and/or an R object.
#' Datasets currently include "EMP" (Environmental Monitoring Program),
#' "FRP" (Fish Restoration Program), "FMWT" (Fall Midwater Trawl), "STN" (Townet Survey), "20mm" (20mm survey),
#' "DOP" (Directed Outflow Project Lower Trophic Study), and "YBFMP" (Yolo Bypass Fish Monitoring Program).
#' @param Data_sets Datasets to include in combined data. Choices include "EMP_Meso", "FMWT_Meso", "STN_Meso", "20mm_Meso", "FRP_Meso", "YBFMP_Meso", "EMP_Micro", "YBFMP_Micro", "FRP_Macro", "EMP_Macro", "FMWT_Macro", "STN_Macro", "DOP_Macro", and "DOP_Meso". Defaults to including all datasets except the two YBFMP datasets.
#' @param Biomass Whether to add carbon biomass (carbon biomass per unit effort (\eqn{\mu}g/ \ifelse{html}{\out{m<sup>3</sup>}}{\eqn{m^{3}}})) to the dataset (where conversion equations and required data are available). Defaults to \code{Biomass = TRUE}
#' @param Data_folder Path to folder in which source datasets are stored, and to which you would like datasets to be downloaded if you set \code{Redownload_data = TRUE}. If you do not want to store every source dataset, you can leave this at the default \code{tempdir()}. If you do not wish to redownload these datasets every time you run the function, you can set this to a directory on your computer and run the function in the future with \code{Redownload_data = FALSE}, which will load the source datasets from \code{Data_folder} instead of downloading them again.
#' @param Save_object Should the combined data be saved to disk? Defaults to \code{Save_object = TRUE}.
#' @param Return_object Should data be returned as an R object? If \code{TRUE}, the function will return the full combined dataset. Defaults to `Return_object = FALSE`.
#' @param Return_object_type If \code{Return_object = TRUE}, should data be returned as a combined dataframe (\code{Return_object_type = "Combined"}) or a list with component "Zooplankton" containing the zooplankton data and component "Environment" containing the environmental data (\code{Return_object_type = "List"}, the default). A list is required to feed data into the \code{Zoopsynther} function without saving the combined dataset to disk.
#' @param Redownload_data Should source datasets be redownloaded from the internet? Defaults to \code{Redownload_data = FALSE}.
#' @param Download_method Method used to download files. See argument \code{method} options in \code{\link[utils]{download.file}}. Defaults to "curl".
#' @param Zoop_path File path specifying the folder and filename of the zooplankton dataset. Defaults to \code{Zoop_path = file.path(Data_folder, "zoopforzooper")}.
#' @param Env_path File path specifying the folder and filename of the dataset with accessory environmental parameters. Defaults to \code{Env_path = file.path(Data_folder, "zoopenvforzooper")}.
#' @param Crosswalk Crosswalk table to be used for conversions. Must have columns named for each unique combination of source and size class with an underscore separator, as well as all taxonomic levels Phylum through Species, Taxname (full scientific name) and Lifestage. See \code{\link{crosswalk}} (the default) for an example.
#' @param Stations Latitudes and longitudes for each unique station. See \code{\link{stations}} (the default) for an example.
#' @keywords download integration synthesis zooplankton
#' @import data.table
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @return If \code{Return_object = TRUE}, returns the combined dataset as a list or tibble, depending on whether \code{Return_object_type} is set to \code{"List"} or \code{"Combined"}. If \code{Save_object = TRUE}, writes 2 .Rds files to disk: one with the zooplankton catch data and another with accessory environmental parameters.
#' @author Sam Bashevkin
#' @details Note that EMP Macro samples with QAQC flags (any value of AmphipodCode other than "A") have had their Amphipod CPUE set to NA in this function. For more information on the source datasets see \code{\link{zooper}}.
#' @examples
#' \dontrun{
#' Data <- Zoopdownloader(Data_folder = tempdir(), Return_object = TRUE,
#' Save_object = FALSE, Redownload_data = TRUE)
#' }
#' @seealso \code{\link{Zoopsynther}}, \code{\link{crosswalk}}, \code{\link{stations}}, \code{\link{zooper}}
#' @export

Zoopdownloader <- function(
    Data_sets = c("EMP_Meso", "FMWT_Meso", "STN_Meso",
                  "20mm_Meso", "FRP_Meso", "EMP_Micro",
                  "FRP_Macro", "EMP_Macro", "FMWT_Macro",
                  "STN_Macro", "DOP_Meso", "DOP_Macro"),
    Biomass = TRUE,
    Data_folder = tempdir(),
    Save_object = TRUE,
    Return_object = FALSE,
    Return_object_type = "List",
    Redownload_data = FALSE,
    Download_method="auto",
    Zoop_path = file.path(Data_folder, "zoopforzooper"),
    Env_path = file.path(Data_folder, "zoopenvforzooper"),
    Crosswalk = zooper::crosswalk,
    Stations = zooper::stations){

  # Setup -------------------------------------------------------------------
  where <- utils::getFromNamespace("where", "tidyselect")

  # Check arguments

  if (!purrr::every(Data_sets, ~.%in%c("EMP_Meso", "FMWT_Meso", "STN_Meso",
                                       "20mm_Meso", "FRP_Meso","EMP_Micro",
                                       "FRP_Macro", "EMP_Macro", "FMWT_Macro",
                                       "STN_Macro", "YBFMP_Meso", "YBFMP_Micro",
                                       "DOP_Meso", "DOP_Macro"))){
    stop("Data_sets must contain one or more of the following options: 'EMP_Meso',
         'FMWT_Meso', 'STN_Meso', '20mm_Meso', 'FRP_Meso', 'EMP_Micro', 'FRP_Macro', 'EMP_Macro',
         'FMWT_Macro', 'STN_Macro', 'YBFMP_Meso', 'YBFMP_Micro', 'DOP_Macro', 'DOP_Meso'")
  }

  if (!Return_object_type%in%c("List", "Combined")){
    stop("Return_object_type must be either 'List' or 'Combined'.")
  }

  if(!purrr::every(list(Save_object, Return_object, Redownload_data), is.logical)){
    stop("Save_object, Return_object, and Redownload_data must all have logical arguments.")
  }

  if(Biomass & !("Macro"%in%stringr::str_extract(Data_sets, "(?<=_).*") & "EMP_Macro"%in%Data_sets)){
    stop("Biomass are only available for macrozooplankton, and currently only available for EMP, so EMP_Macro must be selected if Length = TRUE.")
  }

  # Load station key to later incorporate latitudes and longitudes

  stations <- Stations

  # Initialize list of dataframes

  data.list<-list()

  if(Biomass){
    lengths.list<-list()
  }

  # Find URLs ---------------------------------------------------------------

  URLs<-zoop_urls(unique(stringr::str_extract(Data_sets, "^[^_]+(?=_)")))

  # EMP Meso ---------------------------------------------------------------------
  if("EMP_Meso"%in%Data_sets) {

    #download the file
    if (!file.exists(file.path(Data_folder, "EMP_meso.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$EMP$Meso,
            destfile=file.path(Data_folder, "EMP_meso.csv"), mode="wb", method=Download_method)
    }


    # Import the EMP data

    zoo_EMP_Meso<-readr::read_csv(file.path(Data_folder, "EMP_meso.csv"),
                                  col_types=readr::cols_only(SampleDate="c", Time="c", StationNZ="c",
                                                             Chl_a="d", Secchi="d", Temperature="d",
                                                             ECSurfacePreTow="d", ECBottomPreTow="d",
                                                             Volume="d", Depth="d", ACARTELA="d", ACARTIA="d",
                                                             DIAPTOM="d", EURYTEM="d", OTHCALAD="d",
                                                             PDIAPFOR="d", PDIAPMAR="d", SINOCAL="d",
                                                             TORTANUS="d", ACANTHO="d", LIMNOSPP="d",
                                                             LIMNOSINE="d", LIMNOTET="d", OITHDAV="d",
                                                             OITHSIM="d", OITHSPP="d", OTHCYCAD="d",
                                                             HARPACT="d", CALJUV="d", EURYJUV="d",
                                                             OTHCALJUV="d", PDIAPJUV="d", SINOCALJUV="d",
                                                             ASINEJUV="d", ACARJUV="d", DIAPTJUV="d",
                                                             TORTJUV="d", CYCJUV="d", LIMNOJUV="d",
                                                             OITHJUV="d", OTHCYCJUV="d", COPNAUP="d",
                                                             EURYNAUP="d", OTHCOPNAUP="d", PDIAPNAUP="d",
                                                             SINONAUP="d", BOSMINA="d", DAPHNIA="d",
                                                             DIAPHAN="d",OTHCLADO="d", ASPLANCH="d",
                                                             KERATELA="d",OTHROT="d", POLYARTH="d",
                                                             SYNCH="d",SYNCHBIC="d", TRICHO="d",
                                                             BARNNAUP="d", CRABZOEA="d"))

    # Tranform from "wide" to "long" format, add some variables,
    # alter data to match other datasets

    data.list[["EMP_Meso"]] <- zoo_EMP_Meso%>%
      dplyr::filter(!is.na(.data$SampleDate))%>%
      dplyr::mutate(SampleDate=lubridate::parse_date_time(.data$SampleDate, "%m/%d/%Y", tz="America/Los_Angeles"),
                    Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time), NA_character_, paste(.data$SampleDate, .data$Time)),
                                                        c("%Y-%m-%d %I:%M %p"), tz="Etc/GMT+8"), #create a variable for datetime
                    Datetime=lubridate::with_tz(.data$Datetime, "America/Los_Angeles"))%>% # Ensure everything ends up in local time
      tidyr::pivot_longer(cols=c(-"SampleDate", -"StationNZ", -"Time", -"Secchi", -"Chl_a", -"Temperature",
                                 -"ECSurfacePreTow", -"ECBottomPreTow", -"Volume", -"Datetime", -"Depth"),
                          names_to="EMP_Meso", values_to="CPUE")%>% #transform from wide to long
      dplyr::mutate(Source="EMP",
                    SizeClass="Meso")%>% #add variable for data source
      dplyr::select("Source", Date="SampleDate", "Datetime",
                    Station="StationNZ", Chl = "Chl_a", CondBott = "ECBottomPreTow", CondSurf = "ECSurfacePreTow", "Secchi", "SizeClass",
                    "Temperature", "Volume", BottomDepth="Depth", "EMP_Meso", "CPUE")%>% #Select for columns in common and rename columns to match
      dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("EMP_Meso", "Lifestage", "Taxname", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Intro", "EMPstart", "EMPend")%>% #only retain EMP codes
                         dplyr::filter(!is.na(.data$EMP_Meso))%>% #Only retain Taxnames corresponding to EMP codes
                         dplyr::distinct(),
                       by="EMP_Meso")%>%
      dplyr::filter(!is.na(.data$Taxname))%>% #Should remove all the summed categories in original dataset
      dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
                    SampleID=paste(.data$Source, .data$Station, .data$Date), #Create identifier for each sample
                    Tide="1",# All EMP samples collected at high slack
                    TowType="Oblique",
                    BottomDepth=.data$BottomDepth*0.3048)%>% # Convert feet to meters
      dplyr::mutate(CPUE=dplyr::case_when(
        .data$CPUE!=0 ~ .data$CPUE,
        .data$CPUE==0 & .data$Date < .data$Intro ~ 0,
        .data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$EMPstart ~ NA_real_,
        .data$CPUE==0 & .data$Date >= .data$EMPstart & .data$Date < .data$EMPend ~ 0,
        .data$CPUE==0 & .data$Date >= .data$EMPend ~ NA_real_
      ))%>%
      dplyr::select(-"EMP_Meso", -"EMPstart", -"EMPend", -"Intro")%>% #Remove EMP taxa codes
      dplyr::select(-"Datetime")%>% #Add this back in when other EMP data have time
      dtplyr::lazy_dt()%>% #Speed up code using dtplyr package that takes advantage of data.table speed
      dplyr::group_by(dplyr::across(-"CPUE"))%>%
      dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #Some taxa now have the same names (e.g., CYCJUV and OTHCYCJUV) so we now add those categories together.
      dplyr::ungroup()%>%
      tibble::as_tibble() %>% #required to finish operation after lazy_dt()
      dplyr::left_join(stations, by=c("Source", "Station")) #Add lat and long


    cat("\nEMP_Meso finished!\n\n")
  }


  # DOP Meso ---------------------------------------------------------------------
  if("DOP_Meso"%in%Data_sets) {

    #download the files
    if (!file.exists(file.path(Data_folder, "DOP_Meso.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$DOP$Meso,
            destfile=file.path(Data_folder, "DOP_Meso.csv"), mode="wb", method=Download_method)
    }
    if (!file.exists(file.path(Data_folder, "DOP_trawls.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$DOP$trawls,
            destfile=file.path(Data_folder, "DOP_trawls.csv"), mode="wb", method=Download_method)
    }


    # Import the DOP data

    zoo_DOP_Meso<-readr::read_csv(file.path(Data_folder, "DOP_Meso.csv"),
                                  col_types=readr::cols_only(ICF_ID="c", Acanthocyclops_spp_adult="d", Acanthocyclops_vernalis_adult="d",
                                                             Acanthocyclops_vernalis_copepodid="d", Acartia_spp_adult="d", Acartia_spp_copepodid="d",
                                                             Acartiella_sinensis_adult="d", Acartiella_sinensis_copepodid="d", Asplanchna_spp="d",
                                                             Barnacle_UNID_nauplii="d", Bosmina_longirostris="d", Brachionidae_UNID="d",
                                                             Brachionus_spp="d", Calanoid_UNID_adult="d", Calanoid_UNID_copepodid="d",
                                                             Camptocercus_spp="d", Chydoridae_UNID="d", Chydorus_spp="d",
                                                             Cladocera_UNID="d", Copepod_UNID_nauplii="d", Crab_UNID_zoea="d",
                                                             Cyclopoid_UNID_adult="d", Cyclopoid_UNID_copepodid="d", Daphnia_spp="d",
                                                             Daphniidae_UNID="d", Diaptomidae_UNID_adult="d", Diaptomidae_UNID_copepodid="d",
                                                             Ditrichocorycaeus_affinis_adult="d", Eurytemora_affinis_adult="d", Eurytemora_affinis_copepodid="d",
                                                             Eurytemora_spp_nauplii="d", Harpacticoid_UNID="d", Holopedium_gibberum="d",
                                                             Ilyocryptus_spp="d", Keratella_spp="d", Labidocera_spp_adult="d",
                                                             Labidocera_spp_copepodid="d", Leptodora_spp="d", Limnoithona_sinensis_adult="d",
                                                             Limnoithona_sinensis_copepodid="d", Limnoithona_spp_adult="d", Limnoithona_spp_copepodid="d",
                                                             Limnoithona_tetraspina_adult="d", Limnoithona_tetraspina_copepodid="d", Macrothrix_spp="d",
                                                             Moina_spp="d", Oithona_davisae_adult="d", Oithona_davisae_copepodid="d",
                                                             Oithona_similis_adult="d", Oithona_similis_copepodid="d", Oithona_spp_adult="d",
                                                             Oithona_spp_copepodid="d", Osphranticum_labronectum_adult="d", Osphranticum_labronectum_copepodid="d",
                                                             Ostracoda_UNID="d", Paracalanus_parvus_adult="d", Paracalanus_parvus_copepodid="d",
                                                             Platyias_spp="d", Podonidae_UNID="d", Polyarthra_spp="d",
                                                             Pseudodiaptomus_euryhalinus_adult="d", Pseudodiaptomus_forbesi_adult="d", Pseudodiaptomus_forbesi_copepodid="d",
                                                             Pseudodiaptomus_marinus_adult="d", Pseudodiaptomus_marinus_copepodid="d", Pseudodiaptomus_spp_adult="d",
                                                             Pseudodiaptomus_spp_copepodid="d", Pseudodiaptomus_spp_nauplii="d", Rotifer_UNID="d",
                                                             Scapholeberis_spp="d", Sididae_UNID="d", Sinocalanus_doerrii_adult="d",
                                                             Sinocalanus_doerrii_copepodid="d", Sinocalanus_doerrii_nauplii="d", Synchaeta_bicornis="d",
                                                             Synchaeta_spp="d", Tortanus_dextrilobatus_adult="d", Tortanus_discaudatus_adult="d",
                                                             Tortanus_spp_copepodid="d", Trichocerca_spp="d"))

    zoo_DOP_trawls<-readr::read_csv(file.path(Data_folder, "DOP_trawls.csv"),
                                    col_types=readr::cols_only(ICF_ID="c", Date="c", Start_Time="c",
                                                               Station_Code="c", Habitat="c", Latitude="d", Longitude="d",
                                                               Start_Depth="d", Temperature="d", Conductivity="d",
                                                               Turbidity="d", pH="d", DO="d", Microcystis="c",
                                                               Chl_a="d", Secchi="d", Mesozooplankton_Volume="d"))

    # Tranform from "wide" to "long" format, add some variables,
    # alter data to match other datasets

    data.list[["DOP_Meso"]] <- zoo_DOP_Meso %>%
      tidyr::pivot_longer(cols = !"ICF_ID", names_to = "DOP_Meso", values_to = "CPUE") %>%
      dplyr::left_join(zoo_DOP_trawls, by="ICF_ID") %>%
      dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%Y-%m-%d", tz="America/Los_Angeles"),
                    Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Start_Time), NA_character_, paste(.data$Date, .data$Start_Time)),
                                                        "%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"), #create a variable for datetime
                    Source = "DOP", #add variable for data source
                    SizeClass = "Meso") %>%
      dplyr::filter(!is.na(.data$Mesozooplankton_Volume)) %>% #get rid of environmental variables with no data

      #Select variables we are interested in.
      dplyr::select("Source", "Date", "Datetime",
                    Station = "Station_Code", Chl = "Chl_a", CondSurf = "Conductivity", "Secchi", "SizeClass",
                    "Temperature", TurbidityNTU ="Turbidity", "pH", "DO", "Microcystis",
                    Volume = "Mesozooplankton_Volume", BottomDepth = "Start_Depth",
                    "DOP_Meso", "CPUE", "Latitude", "Longitude", "ICF_ID", TowType="Habitat") %>%
      dplyr::left_join(Crosswalk %>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("DOP_Meso", "Lifestage", "Taxname", "Phylum",
                                       "Class", "Order", "Family", "Genus", "Species",
                                       "DOPstart", "DOPend", "Intro")%>% #only retain dop codes
                         dplyr::filter(!is.na(.data$DOP_Meso))%>% #Only retain Taxnames corresponding to EMP codes
                         dplyr::distinct(),
                       by="DOP_Meso")%>%
      dplyr::filter(!is.na(.data$Taxname), !is.na(.data$CPUE)) %>%  #get rid of the lines with "NA" because the critter wasn't counted in this sample.
      dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
                    SampleID=paste(.data$Source, .data$Station, .data$Date, .data$ICF_ID), #Create identifier for each sample
                    TowType=dplyr::recode(.data$TowType, `Channel Surface`="Surface", Shoal="Surface",
                                          `Channel Deep`="Bottom"),
                    CondBott=ifelse(.data$TowType=="Bottom", .data$CondSurf, NA), # Move salinity to bottom for bottom samples
                    dplyr::across(c("Chl", "CondSurf", "Secchi", "Temperature", "TurbidityNTU", "pH", "DO", "Microcystis"),
                                  ~ifelse(.data$TowType=="Bottom", NA, .x)), # Remove bottom samples for variables that aren't retained
                    BottomDepth=.data$BottomDepth*0.3048)%>% # Convert feet to meters
      dplyr::mutate(CPUE=dplyr::case_when(
        .data$CPUE!=0 ~ .data$CPUE,
        .data$CPUE==0 & .data$Date < .data$Intro ~ 0,
        .data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$DOPstart ~ NA_real_,
        .data$CPUE==0 & .data$Date >= .data$DOPstart & .data$Date < .data$DOPend ~ 0,
        .data$CPUE==0 & .data$Date >= .data$DOPend ~ NA_real_)) %>%
      dplyr::filter(!is.na(.data$CPUE)) %>%
      dplyr::select(-"DOP_Meso", -"ICF_ID", -"DOPstart", -"DOPend", -"Intro") #Remove DOP code
    cat("\nDOP_Meso finished!\n\n")

  }


  # DOP Macro ---------------------------------------------------------------------
  if("DOP_Macro"%in%Data_sets) {

    #download the files
    if (!file.exists(file.path(Data_folder, "DOP_Macro.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$DOP$Macro,
            destfile=file.path(Data_folder, "DOP_Macro.csv"), mode="wb", method=Download_method)
    }
    if (!file.exists(file.path(Data_folder, "DOP_trawls.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$DOP$trawls,
            destfile=file.path(Data_folder, "DOP_trawls.csv"), mode="wb", method=Download_method)
    }


    # Import the DOP data

    zoo_DOP_Macro<-readr::read_csv(file.path(Data_folder, "DOP_Macro.csv"),
                                   col_types=readr::cols_only(ICF_ID="c", Alienacanthomysis_macropsis="d", Americorophium_spinicorne="d",
                                                              Americorophium_spp="d", Americorophium_stimpsoni="d", Ampelisca_abdita="d",
                                                              Amphipod_UNID="d", Ampithoe_spp="d", Ampithoe_valida="d",
                                                              Corophiidae_UNID="d", Crangonyx_spp="d", Cumacean_UNID="d",
                                                              Deltamysis_holmquistae="d", Dexaminidae_UNID="d", Eogammarus_spp="d",
                                                              Exopalaemon_spp="d", Gammarus_daiberi="d", Grandidierella_japonica="d",
                                                              Grandifoxus_grandis="d", Hyalella_spp="d", Hyperacanthomysis_longirostris="d",
                                                              Isopoda_UNID="d", Monocorophium_acherusicum="d", Mysid_UNID="d",
                                                              Neomysis_kadiakensis="d", Neomysis_mercedis="d", Oedicerotidae_UNID="d",
                                                              Orientomysis_aspera="d", Orientomysis_hwanhaiensis="d", Pleustidae_UNID="d",
                                                              Shrimp_UNID_larvae="d", Sinocorophium_alienense="d", Tanaidacea_UNID="d"))

    zoo_DOP_trawls<-readr::read_csv(file.path(Data_folder, "DOP_trawls.csv"),
                                    col_types=readr::cols_only(ICF_ID="c", Date="c", Start_Time="c",
                                                               Station_Code="c", Habitat="c", Latitude="d", Longitude="d",
                                                               Start_Depth="d", Temperature="d", Conductivity="d",
                                                               Turbidity="d", pH="d", DO="d", Microcystis="c",
                                                               Chl_a="d", Secchi="d", Macrozooplankton_Volume="d"))

    # Tranform from "wide" to "long" format, add some variables,
    # alter data to match other datasets

    data.list[["DOP_Macro"]] <- zoo_DOP_Macro %>%
      tidyr::pivot_longer(cols = !"ICF_ID", names_to = "DOP_Macro", values_to = "CPUE") %>%
      dplyr::left_join(zoo_DOP_trawls, by="ICF_ID") %>%
      dplyr::filter(!is.na(.data$Macrozooplankton_Volume)) %>%
      dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%Y-%m-%d", tz="America/Los_Angeles"),
                    Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Start_Time), NA_character_, paste(.data$Date, .data$Start_Time)),
                                                        "%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"), #create a variable for datetime,
                    Source = "DOP", #add variable for data source
                    SizeClass = "Macro") %>%

      #Select variables we are interested in. I need to check on the latitude/longitude issue with Sam.
      dplyr::select("Source", "Date", "Datetime",
                    Station = "Station_Code", Chl = "Chl_a", CondSurf = "Conductivity", "Secchi", "SizeClass",
                    "Temperature", TurbidityNTU = "Turbidity", "pH", "DO", "Microcystis",
                    Volume = "Macrozooplankton_Volume", BottomDepth = "Start_Depth", "ICF_ID",
                    "DOP_Macro", "CPUE", "Latitude", "Longitude", TowType="Habitat") %>%
      dplyr::left_join(Crosswalk %>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("DOP_Macro", "Lifestage", "Taxname", "Phylum",
                                       "Class", "Order", "Family", "Genus", "Species",
                                       "DOPstart", "DOPend", "Intro")%>% #only retain dop codes
                         dplyr::filter(!is.na(.data$DOP_Macro))%>% #Only retain Taxnames corresponding to EMP codes
                         dplyr::distinct(),
                       by="DOP_Macro")%>%
      dplyr::filter(!is.na(.data$Taxname), !is.na(.data$CPUE)) %>%
      dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
                    SampleID=paste(.data$Source, .data$Station, .data$Date, .data$ICF_ID), #Create identifier for each sample
                    TowType=dplyr::recode(.data$TowType, `Channel Surface`="Surface", Shoal="Surface",
                                          `Channel Deep`="Bottom"),
                    CondBott=ifelse(.data$TowType=="Bottom", .data$CondSurf, NA),
                    dplyr::across(c("Chl", "CondSurf", "Secchi", "Temperature", "TurbidityNTU", "pH", "DO", "Microcystis"),
                                  ~ifelse(.data$TowType=="Bottom", NA, .x)),
                    BottomDepth=.data$BottomDepth*0.3048)%>% # Convert feet to meters
      dplyr::mutate(CPUE=dplyr::case_when(
        .data$CPUE!=0 ~ .data$CPUE,
        .data$CPUE==0 & .data$Date < .data$Intro ~ 0,
        .data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$DOPstart ~ NA_real_,
        .data$CPUE==0 & .data$Date >= .data$DOPstart & .data$Date < .data$DOPend ~ 0,
        .data$CPUE==0 & .data$Date >= .data$DOPend ~ NA_real_)) %>%
      dplyr::filter(!is.na(.data$CPUE)) %>%
      dplyr::select(-"DOP_Macro", -"ICF_ID", -"DOPstart", -"DOPend", -"Intro") #Remove DOP code
    cat("\nDOP_Macro finished!\n\n")

  }


  # FMWTSTN Meso --------------------------------------------------------------------

  if("FMWT_Meso"%in%Data_sets | "STN_Meso"%in%Data_sets) {

    #download the file
    if (!file.exists(file.path(Data_folder, "FMWTSTN_Meso.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$FMWTSTN$Meso,
            destfile=file.path(Data_folder,"FMWTSTN_Meso.csv"), mode="wb", method=Download_method)
    }

    if (!file.exists(file.path(Data_folder, "SMSCG_Meso.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$SMSCG$Meso,
            destfile=file.path(Data_folder, "SMSCG_Meso.csv"), mode="wb", method=Download_method)
    }


    # Import the FMWT data

    zoo_FMWT_Meso <- readr::read_csv(file.path(Data_folder, "FMWTSTN_Meso.csv"),
                                     col_types=readr::cols_only(Project="c", Year="d", Survey="d",
                                                                Date="c", Station="c", Time="c",
                                                                TideCode="c", DepthBottom="d", CondSurf="d",
                                                                CondBott="d", TempSurf="d", Secchi="d",Turbidity="d",
                                                                Microcystis="c", Volume="d",
                                                                ACARTELA="d", ACARTIA="d", DIAPTOM="d",
                                                                EURYTEM="d", OTHCALAD="d", PDIAPFOR="d",
                                                                PDIAPMAR="d", SINOCAL="d", TORTANUS="d",
                                                                ACANTHO="d", LIMNOSPP="d", LIMNOSINE="d",
                                                                LIMNOTET="d", OITHDAV="d", OITHSIM="d",
                                                                OITHSPP="d", OTHCYCAD="d", HARPACT="d",
                                                                EURYJUV="d", OTHCALJUV="d", PDIAPJUV="d",
                                                                SINOCALJUV="d", ASINEJUV="d", ACARJUV="d",
                                                                DIAPTJUV="d", TORTJUV="d", LIMNOJUV="d",
                                                                OITHJUV="d", OTHCYCJUV="d", EURYNAUP="d",
                                                                OTHCOPNAUP="d", PDIAPNAUP="d", SINONAUP="d",
                                                                BOSMINA="d", DAPHNIA="d", DIAPHAN="d",
                                                                OTHCLADO="d", ASPLANCH="d", KERATELA="d",
                                                                OTHROT="d", POLYARTH="d", SYNCH="d",
                                                                TRICHO="d", BARNNAUP="d", CRABZOEA="d",
                                                                OSTRACOD="d", CUMAC="d"))%>%
      dplyr::mutate(ID=paste(.data$Year, .data$Project, .data$Survey, .data$Station),
                    Date=lubridate::parse_date_time(.data$Date, "%m/%d/%Y", tz="America/Los_Angeles"))

    zoo_SMSCG_Meso<-readr::read_csv(file.path(Data_folder, "SMSCG_Meso.csv"),
                                    col_types=readr::cols_only(Project="c", Year="d", Survey="d",
                                                               Date="c", Station="c", Time="c",
                                                               TideCode="c", DepthBottom="d", CondSurf="d",
                                                               PPTSurf="d", CondBott="d", PPTBott="d",
                                                               TempSurf="d", TempBottom="d", Secchi="d",
                                                               Turbidity="d", Microcystis="c", Volume="d",
                                                               ACARTELA="d", ACARTIA="d", DIAPTOM="d",
                                                               EURYTEM="d", OTHCALAD="d", PDIAPFOR="d",
                                                               PDIAPMAR="d", SINOCAL="d", TORTANUS="d",
                                                               ACANTHO="d", LIMNOSPP="d", LIMNOSINE="d",
                                                               LIMNOTET="d", OITHDAV="d", OITHSIM="d",
                                                               OTHCYCAD="d", HARPACT="d", EURYJUV="d",
                                                               OTHCALJUV="d", PDIAPJUV="d", SINOCALJUV="d",
                                                               ASINEJUV="d", ACARJUV="d", DIAPTJUV="d",
                                                               TORTJUV="d", LIMNOJUV="d", OITHJUV="d",
                                                               OTHCYCJUV="d", EURYNAUP="d", OTHCOPNAUP="d",
                                                               PDIAPNAUP="d", SINONAUP="d", BOSMINA="d",
                                                               DAPHNIA="d", DIAPHAN="d", OTHCLADO="d",
                                                               ASPLANCH="d", KERATELA="d", OTHROT="d",
                                                               POLYARTH="d", SYNCH="d", TRICHO="d",
                                                               BARNNAUP="d", CRABZOEA="d", OSTRACOD="d", CUMAC="d"))%>%
      dplyr::mutate(Project=dplyr::recode(.data$Project, TNS="STN"),
                    ID=paste(.data$Year, .data$Project, .data$Survey, .data$Station),
                    Date=lubridate::parse_date_time(.data$Date, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
      dplyr::filter(!.data$ID%in%unique(zoo_FMWT_Meso$ID) & .data$Project!="EMP")%>%
      dplyr::mutate(Station=dplyr::if_else(.data$Project=="FRP", paste(.data$Project, .data$Station), .data$Station),
                    Project=dplyr::recode(.data$Project, FRP="STN"))

    # Transform from "wide" to "long" format, add some variables,
    # alter data to match other datasets

    data.list[["FMWT_Meso"]] <- zoo_FMWT_Meso%>%
      dplyr::bind_rows(zoo_SMSCG_Meso)%>%
      dplyr::select(-"ID")%>%
      dplyr::mutate(Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time) | !stringr::str_detect(.data$Time, stringr::fixed(":")),
                                                                       NA_character_,
                                                                       paste(.data$Date, .data$Time)), "%Y-%m-%d %H:%M", tz="America/Los_Angeles"))%>% #create a variable for datetime
      tidyr::pivot_longer(cols=c(-"Project", -"Year", -"Survey", -"Date", -"Datetime",
                                 -"Station",-"Time", -"TideCode",
                                 -"DepthBottom", -"CondSurf",
                                 -"CondBott",  -"TempSurf", -"Secchi",
                                 -"Turbidity", -"Microcystis",
                                 -"Volume"),
                          names_to="FMWT_Meso", values_to="CPUE")%>% #transform from wide to long
      dplyr::select(Source = "Project", "Year", "Date", "Datetime", "Station", Tide = "TideCode",
                    BottomDepth = "DepthBottom", "CondSurf", "CondBott", Temperature = "TempSurf",
                    "Secchi", TurbidityNTU = "Turbidity", "Microcystis", "Volume", "FMWT_Meso", "CPUE")%>% #Select for columns in common and rename columns to match
      dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("FMWT_Meso", "Lifestage", "Taxname", "Phylum", "Class",
                                       "Order", "Family", "Genus", "Species", "Intro",
                                       "FMWTstart", "FMWTend")%>% #only retain FMWT codes
                         dplyr::filter(!is.na(.data$FMWT_Meso))%>% #Only retain Taxnames corresponding to FMWT codes
                         dplyr::distinct(),
                       by = "FMWT_Meso")%>%
      dplyr::filter(!is.na(.data$Taxname))%>%
      dplyr::mutate(Station=dplyr::recode(.data$Station, MONT="Mont", HONK="Honk"),
                    Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
                    Microcystis=dplyr::if_else(.data$Microcystis=="6", "2", .data$Microcystis), #Microsystis value of 6 only used from 2012-2015 and is equivalent to a 2 in other years, so just converting all 6s to 2s.
                    SampleID=paste(.data$Source, .data$Station, .data$Date),
                    TowType="Oblique",
                    SizeClass="Meso")%>% #Create identifier for each sample
      dplyr::mutate(CPUE=dplyr::case_when(
        .data$CPUE!=0 ~ CPUE,
        .data$CPUE==0 & .data$Date < .data$Intro ~ 0,
        .data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$FMWTstart ~ NA_real_,
        .data$CPUE==0 & .data$Date >= .data$FMWTstart & .data$Date < .data$FMWTend ~ 0,
        .data$CPUE==0 & .data$Date >= .data$FMWTend ~ NA_real_
      ))%>%
      dplyr::filter(!is.na(.data$CPUE))%>%
      dplyr::select(-"FMWT_Meso", -"FMWTstart", -"FMWTend", -"Intro")%>% #Remove FMWT taxa codes
      dplyr::left_join(stations, by=c("Source", "Station"))%>% #Add lat and long
      {if(!("FMWT_Meso"%in%Data_sets)){
        dplyr::filter(., .data$Source != "FMWT")
      } else{
        .
      }}%>%
      {if(!("STN_Meso"%in%Data_sets)){
        dplyr::filter(., .data$Source != "STN")
      } else{
        .
      }}

    cat("\nFMWT_Meso and/or STN_Meso finished!\n\n")

  }
  # twentymm Meso ----------------------------------------------------------------

  if("20mm_Meso"%in%Data_sets) {


    #download the file
    if (!file.exists(file.path(Data_folder, "twentymm_Meso.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$twentymm$Meso,
            destfile=file.path(Data_folder, "twentymm_Meso.csv"), mode="wb", method=Download_method)
    }



    # Import and modify 20mm data

    zoo_20mm_Meso<-readxl::read_excel(file.path(Data_folder, "twentymm_Meso.csv"),
                                      sheet="20-mm CB CPUE Data",
                                      col_types = c("numeric","date", rep("numeric", 3),
                                                    "date", rep("numeric", 6), "text", rep("numeric", 74)))

    data.list[["twentymm_Meso"]]<-zoo_20mm_Meso%>%
      dplyr::mutate(SampleID = paste(.data$Station, .data$SampleDate, .data$TowNum),
                    SampleDate=lubridate::force_tz(.data$SampleDate, "America/Los_Angeles"),
                    Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$TowTime),
                                                                       NA_character_,
                                                                       paste0(.data$SampleDate, " ", lubridate::hour(.data$TowTime), ":", lubridate::minute(.data$TowTime))),
                                                        "%Y-%m-%d %H:%M", tz="America/Los_Angeles"))%>%
      #turbidity is now eitehr NTU or FNU
      tidyr::pivot_longer(cols=c(-"SampleDate", -"Survey", -"Station", -"TowTime", -"Temp", -"TopEC",
                                 -"BottomEC", -"Secchi", -"NTU", -"FNU", -"Tide", -"BottomDepth", -"Duration", -"MeterCheck", -"Volume",
                                 -"Dilution", -"SampleID", -"Datetime"),
                          names_to="twentymm_Meso", values_to="CPUE")%>% #transform from wide to long
          dplyr::select(Date="SampleDate", "Station", Temperature = "Temp", CondSurf = "TopEC",
                    CondBott = "BottomEC", "Secchi",
                     TurbidityNTU = "NTU", TurbidityFNU = "FNU",
                    "Tide", "BottomDepth", "Volume", "SampleID", "Datetime", "twentymm_Meso", "CPUE")%>% #Select for columns in common and rename columns to match

      dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("twentymm_Meso", "Lifestage", "Taxname", "Phylum", "Class",
                                       "Order", "Family", "Genus", "Species", "Intro", "twentymmstart", "twentymmend", "twentymmstart2")%>% #only retain FMWT codes
                         dplyr::filter(!is.na(.data$twentymm_Meso))%>% #Only retain Taxnames corresponding to FMWT codes
                         dplyr::distinct(),
                       by = "twentymm_Meso")%>%
      dplyr::filter(!is.na(.data$Taxname))%>%
      dplyr::mutate(Source="twentymm",
                    SizeClass="Meso",
                    Station=as.character(.data$Station),
                    Taxlifestage=paste(.data$Taxname, .data$Lifestage),#add variable for data source, create variable for combo taxonomy x life stage
                    BottomDepth=.data$BottomDepth*0.3048)%>% # Convert feet to meters
      dplyr::mutate(CPUE=dplyr::case_when(
        .data$CPUE!=0 ~ .data$CPUE,
        .data$CPUE==0 & .data$Date < .data$Intro ~ 0,
        .data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$twentymmstart ~ NA_real_,
        .data$CPUE==0 & .data$Date >= .data$twentymmstart & .data$Date < .data$twentymmend ~ 0,
        .data$CPUE==0 & .data$Date >= .data$twentymmend & .data$Date < .data$twentymmstart2 ~ NA_real_,
        .data$CPUE==0 & .data$Date >= .data$twentymmstart2 ~ 0 #20mm dataset had one case of a taxa starting, ending, and starting again
      ))%>%
      dplyr::select(-"twentymmend", -"twentymmstart", -"twentymmstart2", -"Intro", -"twentymm_Meso")%>%
      dtplyr::lazy_dt()%>% #Speed up
      dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
      dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #this just adds up those duplications
      dplyr::ungroup()%>%
      tibble::as_tibble()%>%
      dplyr::mutate(Source="20mm",
                    TowType="Oblique",
                    SampleID=paste(.data$Source, .data$SampleID)) %>%#Create identifier for each sample

      dplyr::left_join(stations, by=c("Source", "Station")) #Add lat and long

    cat("\n20mm_Meso finished!\n\n")
  }

  # FRP Meso ---------------------------------------------------------------------

  if("FRP_Meso"%in%Data_sets) {

    # Import the FRP data

    #download the file
    if (!file.exists(file.path(Data_folder, "zoopsFRP.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$FRP$Meso,
            destfile=file.path(Data_folder, "zoopsFRP.csv"), mode="wb", method=Download_method)
      Tryer(n=3, fun=utils::download.file, url=URLs$FRP$site,
            destfile=file.path(Data_folder, "sitesFRP.csv"), mode="wb", method=Download_method)

    }

    zoo_FRP_Meso <- readr::read_csv(file.path(Data_folder, "zoopsFRP.csv"), na=c("", "NA"))
    sites_FRP_Meso <- readr::read_csv(file.path(Data_folder, "sitesFRP.csv"), na=c("", "NA"))

    #join environmental data to taxa counts and fix some wonky names
    FRP_all = dplyr::left_join(zoo_FRP_Meso, sites_FRP_Meso) %>%
      dplyr::mutate(CommonName = dplyr::case_when(CommonName == "Fish larvae" ~ "Fish UNID",
                                                  CommonName == "Insect Unid" ~ "Insect UNID",
                                                  CommonName == "Calanoid copepod (gravid)" ~ "Calanoid UNID",
                                                  CommonName == "Asellidae UNID" ~ "Asellidae",
                                                  CommonName == "Fish larvae" ~ "Fish UNID",
                                                  CommonName == "Insect Unid" ~ "Insect UNID",
                                                  CommonName == "Calanoid copepod (gravid)" ~ "Calanoid UNID",
                                                  TRUE ~ CommonName))

    #Already in long format
    data.list[["FRP_Meso"]] <- FRP_all%>%
    #  dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
      dplyr::mutate(Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$StartTime),
                                                                       NA_character_,
                                                                       paste(.data$Date, .data$StartTime)),
                                                        "%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"))%>% #Create a variable for datetime
      dplyr::mutate(Source="FRP", #add variable for data source
                    SizeClass="Meso",
                    TowType= dplyr::case_when(GearTypeAbbreviation == "ZOOP" ~ "Surface",

                                              GearTypeAbbreviation == "ZOBL" ~ "Oblique",

                                              GearTypeAbbreviation == "ZBEN" ~ "Bottom",
                                              TRUE ~ "Surface"),
                    Microcystis = dplyr::recode(.data$Microcystis, `1=absent`="1", `2=low`="2", `3=medium` = "3"))%>%
      dplyr::select("Source", "Date", "Datetime", Latitude= "LatitudeStart", Longitude = "LongitudeStart", Station = "Location",
                    CondSurf = "SC", "Secchi", "pH", "DO", TurbidityNTU = "Turbidity", "Tide", "Microcystis", "SizeClass", "TowType",
                    Temperature = "Temp", Volume = "effort", FRP_Meso = "CommonName", "CPUE", SampleID = "SampleID_frp")%>% #Select for columns in common and rename columns to match
      dplyr::filter(!is.na(.data$Latitude)) %>% #remove samples with no gps coordinates
      dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
      dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=T), .groups="drop")%>% #this just adds up those duplications
      tidyr::pivot_wider(names_from="FRP_Meso", values_from="CPUE", values_fill=list(CPUE=0))%>%
      tidyr::pivot_longer(cols=c(-"Source", -"Date", -"Datetime",
                                 -"Station", -"CondSurf", -"Secchi", -"pH", -"DO", -"TurbidityNTU",
                                 -"Tide", -"Microcystis", -"SizeClass", -"Latitude", -"Longitude",
                                 -"Temperature", -"Volume", -"SampleID", -"TowType"),
                          names_to="FRP_Meso", values_to="CPUE")%>%
      dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("FRP_Meso", "Lifestage", "Taxname", "Phylum", "Class", "Order", "Family", "Genus", "Species")%>% #only retain FRP codes
                         dplyr::filter(!is.na(.data$FRP_Meso))%>% #Only retain Taxnames corresponding to FRP codes
                         dplyr::distinct(),
                       by = "FRP_Meso")%>%
      dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage))%>% #create variable for combo taxonomy x life stage
      dplyr::select(-"FRP_Meso")%>% #Remove FRP taxa codes
      dtplyr::lazy_dt()%>% #Speed up code
      dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
      dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #this just adds up those duplications
      dplyr::ungroup()%>%
      tibble::as_tibble()%>%
      dplyr::mutate(SampleID=paste(.data$Source, .data$SampleID)) #Create identifier for each sample


    cat("\nFRP_Meso finished!\n\n")
  }

  # YBFMP Meso/Micro -------------------------------------------------------------

  if("YBFMP_Meso"%in%Data_sets | "YBFMP_Micro"%in%Data_sets) {

    #download the file
    if (!file.exists(file.path(Data_folder, "YBFMP.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$YBFMP,
            destfile=file.path(Data_folder, "YBFMP.csv"), mode="wb", method=Download_method)
    }

    zoo_YBFMP<-readr::read_csv(file.path(Data_folder, "YBFMP.csv"),
                               col_types = readr::cols_only(Date="c", Time="c", StationCode="c",
                                                            Tide="c", WaterTemperature="d", Secchi="d",
                                                            SpCnd="d", pH="d", DO="d", Turbidity="d",
                                                            MicrocystisVisualRank="c", MeshSize="c", VolNet_ed="d",
                                                            TaxonName="c", LifeStage="c", CPUE_ed="d"))%>%
      dplyr::mutate(Index = 1:nrow(.))



    # Sum doubles with unclear life stages (both labeled as undifferentiated)
    doubles <- zoo_YBFMP %>%
      dplyr::group_by(.data$StationCode, .data$Date, .data$Time, .data$TaxonName, .data$LifeStage, .data$MeshSize) %>%
      dplyr::mutate(n =  dplyr::n()) %>%
      dplyr::filter(.data$n>1)

    Index_rm <- doubles$Index

    doubles_summed <- stats::aggregate(CPUE_ed~TaxonName, data = doubles, FUN = sum) %>%
      dplyr::right_join((doubles %>%
                           dplyr::select(-"CPUE_ed", -"Index", -"n") %>%
                           dplyr::distinct())) %>%
      dplyr::relocate("TaxonName", .after = "VolNet_ed") %>%
      dplyr::relocate("CPUE_ed", .after = "LifeStage")


    # Add zeroes, add sample ID, modify column names and order, join crosswalk taxonomy.
    data.list[["YBFMP"]] <- zoo_YBFMP %>%
      dplyr::filter(!(.data$Index %in% Index_rm)) %>%
      dplyr::select(-"Index") %>%
      dplyr::bind_rows(doubles_summed) %>% # replace doubles with summed CPUEs
      dplyr::mutate(TaxonName = replace(.data$TaxonName, .data$TaxonName == "Eucyclops phaleratus", "Ectocyclops phaleratus")) %>% # Otherwise creates doubles for Platycyclops phaleratus later on
      dplyr::mutate(YBFMP=paste(.data$TaxonName, .data$LifeStage),
                    MeshSize=dplyr::recode(.data$MeshSize, `150_micron`="Meso", `50_micron`="Micro"),
                    Source = "YBFMP",
                    SampleID = paste0(.data$Date, "_", .data$StationCode, "_", .data$MeshSize),
                    Datetime = lubridate::parse_date_time(paste(.data$Date, .data$Time), "%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"),
                    Date = lubridate::parse_date_time(.data$Date, "%Y-%m-%d", tz="America/Los_Angeles")) %>%
      dplyr:: select("Source",
                     SizeClass = "MeshSize",
                     Volume = "VolNet_ed",
                     "Date",
                     "Datetime",
                     Station = "StationCode",
                     Temperature = "WaterTemperature",
                     "Secchi", TurbidityNTU = "Turbidity",
                     CondSurf = "SpCnd",
                     "pH", "DO",
                     Microcystis="MicrocystisVisualRank",
                     "SampleID",
                     "YBFMP",
                     CPUE = "CPUE_ed")%>%
      {if(!"YBFMP_Meso"%in%Data_sets){
        dplyr::filter(., .data$SizeClass!="Meso")
      }else{
        .
      }}%>%
      {if(!"YBFMP_Micro"%in%Data_sets){
        dplyr::filter(., .data$SizeClass!="Micro")
      }else{
        .
      }}%>%
      tidyr::pivot_wider(names_from="YBFMP", values_from="CPUE", values_fill=list(CPUE=0)) %>%
      tidyr::pivot_longer(cols=c(-"Source", -"SizeClass", -"Volume", -"Date",
                                 -"Datetime", -"Station", -"Temperature", -"CondSurf", -"Secchi",
                                 -"pH", -"DO", -"TurbidityNTU", -"Microcystis",
                                 -"SampleID"),
                          names_to="YBFMP", values_to="CPUE")%>%
      dplyr::left_join(Crosswalk %>%
                         dplyr::select("YBFMP", "Lifestage", "Taxname", "Phylum", "Class",
                                       "Order", "Family", "Genus", "Species"),
                       by = "YBFMP") %>%
      dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage))%>% #create variable for combo taxonomy x life stage
      dplyr::select(-"YBFMP") %>% #Remove YBFMP taxa codes
      dplyr::mutate(SampleID=paste0(.data$Source, "_", .data$SampleID), #Create identifier for each sample
                    TowType="Surface")  %>%
      dplyr::left_join(stations, by=c("Source", "Station")) #Add lat and long
    cat("\nFRP_Meso finished!\n\n")
  }

  # EMP Micro ---------------------------------------------------------------

  if("EMP_Micro"%in%Data_sets) {

    #download the file
    if (!file.exists(file.path(Data_folder, "EMP_Micro.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$EMP$Micro,
            destfile=file.path(Data_folder, "EMP_Micro.csv"), mode="wb", method=Download_method)
    }

    # Import the EMP data
    zoo_EMP_Micro<-readr::read_csv(file.path(Data_folder, "EMP_Micro.csv"),
                                   col_types=readr::cols_only(SampleDate="c", StationNZ="c",
                                                              Chl_a="d", Secchi="d", Temperature="d",
                                                              ECSurfacePreTow="d", ECBottomPreTow="d",
                                                              Volume="d", Depth="d", LIMNOSPP="d",
                                                              LIMNOSINE="d", LIMNOTET="d", OITHDAV="d",
                                                              OITHSIM="d", OITHSPP="d", OTHCYCAD="d",
                                                              HARPACT="d", CYCJUV="d", LIMNOJUV="d",
                                                              OITHJUV="d", OTHCYCJUV="d", COPNAUP="d",
                                                              EURYNAUP="d", OTHCOPNAUP="d", PDIAPNAUP="d",
                                                              SINONAUP="d", ASPLANCH="d",
                                                              KERATELA="d",OTHROT="d", POLYARTH="d",
                                                              SYNCH="d",SYNCHBIC="d", TRICHO="d",
                                                              BARNNAUP="d"))

    # Tranform from "wide" to "long" format, add some variables,
    # alter data to match other datasets

    data.list[["EMP_Micro"]] <- zoo_EMP_Micro%>%
      dplyr::mutate(SampleDate=lubridate::parse_date_time(.data$SampleDate, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
      dplyr::rename(OTHCYCADPUMP = "OTHCYCAD")%>%
      tidyr::pivot_longer(cols=c(-"SampleDate", -"StationNZ", -"Secchi", -"Chl_a", -"Temperature",
                                 -"ECSurfacePreTow", -"ECBottomPreTow", -"Depth", -"Volume"),
                          names_to="EMP_Micro", values_to="CPUE")%>% #transform from wide to long
      dplyr::mutate(Source="EMP",
                    SizeClass="Micro")%>% #add variable for data source
      dplyr::select("Source", Date = "SampleDate", Station="StationNZ", Chl = "Chl_a",
                    CondBott = "ECBottomPreTow", CondSurf = "ECSurfacePreTow", "Secchi",
                    "Temperature", BottomDepth="Depth", "SizeClass", "Volume", "EMP_Micro", "CPUE")%>% #Select for columns in common and rename columns to match
      dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("EMP_Micro", "Lifestage", "Taxname", "Phylum",
                                       "Class", "Order", "Family", "Genus", "Species",
                                       "Intro", "EMPstart", "EMPend")%>% #only retain EMP codes
                         dplyr::filter(!is.na(.data$EMP_Micro))%>% #Only retain Taxnames corresponding to EMP codes
                         dplyr::distinct(),
                       by="EMP_Micro")%>%
      dplyr::filter(!is.na(.data$Taxname))%>% #Should remove all the summed categories in original dataset
      dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
                    SampleID=paste(.data$Source, .data$Station, .data$Date), #Create identifier for each sample
                    Tide="1", # All EMP samples collected at high slack
                    TowType="Vertical pump",
                    BottomDepth=.data$BottomDepth*0.3048)%>% # Convert to meters
      dplyr::mutate(CPUE=dplyr::case_when(
        .data$CPUE!=0 ~ .data$CPUE,
        .data$CPUE==0 & .data$Date < .data$Intro ~ 0,
        .data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$EMPstart ~ NA_real_,
        .data$CPUE==0 & .data$Date >= .data$EMPstart & .data$Date < .data$EMPend ~ 0,
        .data$CPUE==0 & .data$Date >= .data$EMPend ~ NA_real_
      ))%>%
      dplyr::select(-"EMP_Micro", -"EMPstart", -"EMPend", -"Intro")%>% #Remove EMP taxa codes
      dtplyr::lazy_dt()%>% #Speed up code using dtplyr package that takes advantage of data.table speed
      dplyr::group_by(dplyr::across(-"CPUE"))%>%
      dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #Some taxa now have the same names (e.g., CYCJUV and OTHCYCJUV) so we now add those categories together.
      dplyr::ungroup()%>%
      tibble::as_tibble() %>%#required to finish operation after lazy_dt()
      dplyr::left_join(stations, by=c("Source", "Station")) #Add lat and long

    cat("\nEMP_Micro finished!\n\n")
  }
  # FRP Macro ---------------------------------------------------------------

  if("FRP_Macro"%in%Data_sets) {

    #download the file
    if (!file.exists(file.path(Data_folder, "macroinvert_FRP.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$FRP$Macro,
            destfile=file.path(Data_folder, "macroinvert_FRP.csv"), mode="wb", method=Download_method)
      Tryer(n=3, fun=utils::download.file, url=URLs$FRP$site,
            destfile=file.path(Data_folder, "sitesFRP.csv"), mode="wb", method=Download_method)

    }

    zoo_FRP_Macro <- readr::read_csv(file.path(Data_folder, "macroinvert_FRP.csv"), na=c("", "NA"))
    sites_FRP_Macro <- readr::read_csv(file.path(Data_folder, "sitesFRP.csv"), na=c("", "NA"))

    #join environmental data to taxa counts and fix some wonky names
    FRP_allmac = dplyr::left_join(dplyr::select(zoo_FRP_Macro, -"Date", -"Location"), sites_FRP_Macro, by = "VisitNo") %>%
      dplyr::mutate(CommonName = dplyr::case_when(CommonName == "Fish larvae" ~ "Fish UNID", #fix some wonky common names
                                                  CommonName == "Insect Unid" ~ "Insect UNID",
                                                  CommonName == "Calanoid copepod (gravid)" ~ "Calanoid UNID",
                                                  CommonName == "Hymenoptera UNID" ~ "Hymenoptera Other",
                                                  CommonName == "Tricoptera larvae Other" ~ "Tricoptera larvae UNID",
                                                  CommonName == "Palaemonectes" ~ "Palaemon",
                                                  CommonName == "Palaemonetes" ~ "Palaemon",
                                                  CommonName == "Hymenoptera UNID" ~ "Hymenoptera Other",
                                                  CommonName == "Asellidae UNID" ~ "Asellidae",
                                                  CommonName == "Diptera adult" ~ "Diptera Adult",
                                                  CommonName == "Coleoptera other" ~ "Coleoptera Other",
                                                  CommonName == "Tricoptera larvae UNID"~"Trichoptera larvae Other",
                                                  TRUE ~ CommonName))

    #Already in long format
    data.list[["FRP_Macro"]] <- FRP_allmac%>%
      dplyr::filter(.data$GearTypeAbbreviation %in% c("MAC", "MACOBL", "MACBEN"))%>%
     # dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
      dplyr::mutate(Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$StartTime),
                                                                       NA_character_,
                                                                       paste(.data$Date, as.character(.data$StartTime))),
                                                        "%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"))%>% #Create a variable for datetime
      dplyr::mutate(Source = "FRP",
                    SizeClass = "Macro",
                    TowType= dplyr::case_when(GearTypeAbbreviation == "MAC" ~ "Surface",
                                              GearTypeAbbreviation == "MACOBL" ~ "Oblique",
                                              GearTypeAbbreviation == "MACBEN" ~ "Bottom",
                                              TRUE ~ "Surface"),
                    CPUE = .data$AdjCount/.data$effort, #add variable for data source and calculate CPUE
                    Microcystis = dplyr::recode(.data$Microcystis, `1=absent`="1", `2=low`="2", `3=medium`="3"))%>%

      dplyr::select("Source", "Date", "Datetime", Latitude= "LatitudeStart", Longitude = "LongitudeStart", Station = "Location",
                    CondSurf = "SC", "Secchi", "pH", "DO", TurbidityNTU = "Turbidity", "Tide", "Microcystis", "SizeClass", "TowType",
                    Temperature = "Temp", Volume = "effort", FRP_Macro = "CommonName", "CPUE", SampleID = "SampleID_frp")%>% #Select for columns in common and rename columns to match
      dplyr::filter(!is.na(.data$Latitude)) %>%
      dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
      dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=T), .groups="drop")%>% #this just adds up those duplications
      tidyr::pivot_wider(names_from="FRP_Macro", values_from="CPUE", values_fill=list(CPUE=0))%>%
      tidyr::pivot_longer(cols=c(-"Source", -"Date", -"Datetime",
                                 -"Station", -"CondSurf", -"Secchi", -"pH", -"DO", -"TurbidityNTU",
                                 -"Tide", -"Microcystis", -"SizeClass", -"Latitude", -"Longitude",
                                 -"Temperature", -"Volume", -"SampleID", -"TowType"),
                          names_to="FRP_Macro", values_to="CPUE")%>%
      dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("FRP_Macro", "Lifestage", "Taxname", "Phylum", "Class", "Order", "Family", "Genus", "Species")%>% #only retain FRP codes
                         dplyr::filter(!is.na(.data$FRP_Macro))%>% #Only retain Taxnames corresponding to FRP codes
                         dplyr::distinct(),
                       by = "FRP_Macro")%>%
      dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage))%>% #create variable for combo taxonomy x life stage
      dplyr::select(-"FRP_Macro")%>% #Remove FRP taxa codes
      dtplyr::lazy_dt()%>% #Speed up code
      dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
      dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #this just adds up those duplications
      dplyr::ungroup()%>%
      tibble::as_tibble()%>%
      dplyr::mutate(SampleID=paste(.data$Source, .data$SampleID)) #Create identifier for each sample
    cat("\nFRP_Macro finished!\n\n")

  }

  # EMP Macro ---------------------------------------------------------------

  if("EMP_Macro"%in%Data_sets) {

    #download the file
    if (!file.exists(file.path(Data_folder, "EMP_Macro.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$EMP$Macro,
            destfile=file.path(Data_folder, "EMP_Macro.csv"), mode="wb", method=Download_method)
    }

    # Import the EMP data

    zoo_EMP_Macro<-readr::read_csv(file.path(Data_folder, "EMP_Macro.csv"),
                                   col_types=readr::cols_only(SampleDate="c", Time="c", StationNZ="c",
                                                              Chl_a="d", Secchi="d", Temperature="d",
                                                              ECSurfacePreTow="d", ECBottomPreTow="d",
                                                              Volume="d", Depth="d", AmphipodCode="c", A_aspera="d",
                                                              A_hwanhaiensis="d", A_macropsis="d", D_holmquistae="d",
                                                              H_longirostris="d", N_kadiakensis="d", N_mercedis="d",
                                                              Unidentified_mysid="d", A_spinicorne="d", A_stimpsoni="d",
                                                              A_abdita="d", Ampithoe_sp="d", Caprelidae_sp="d",
                                                              C_alienense="d", Crangonyx_sp="d", G_daiberi="d",
                                                              G_japonica="d", Hyalella_sp="d", Monocorophium_sp="d",
                                                              Oedicerotidae_sp="d", Pleustidae="d", Unidentified_Amphipod="d",
                                                              Unidentified_Corophium="d", Unidentified_Gammarus="d", Amphipod_Total="d"))%>%
      dplyr::filter(dplyr::if_any(dplyr::everything(), ~ !is.na(.)))

    # Tranform from "wide" to "long" format, add some variables,
    # alter data to match other datasets

    data.list[["EMP_Macro"]] <- zoo_EMP_Macro%>%
      dplyr::mutate(SampleDate=lubridate::parse_date_time(.data$SampleDate, "%m/%d/%Y", tz="America/Los_Angeles"),
                    Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time), NA_character_, paste(.data$SampleDate, .data$Time)),
                                                        c("%Y-%m-%d %I:%M %p"), tz="Etc/GMT+8"), #create a variable for datetime
                    Datetime=lubridate::with_tz(.data$Datetime, "America/Los_Angeles"), # Ensure everything ends up in local time
                    Unidentified_Amphipod=dplyr::if_else(lubridate::year(.data$SampleDate)<2014, .data$Amphipod_Total, .data$Unidentified_Amphipod))%>% # Transfer pre 2014 amphipod counts to Amphipod_total
      tidyr::pivot_longer(cols=c(-"SampleDate", -"Time", -"Datetime", -"StationNZ", -"Secchi", -"Chl_a", -"Temperature",
                                 -"ECSurfacePreTow", -"ECBottomPreTow", -"Volume", -"Depth", -"AmphipodCode"),
                          names_to="EMP_Macro", values_to="CPUE")%>% #transform from wide to long
      dplyr::mutate(Source="EMP",
                    SizeClass="Macro")%>% #add variable for data source
      dplyr::select("Source", Date = "SampleDate", "Datetime", Station="StationNZ", Chl = "Chl_a",
                    CondBott = "ECBottomPreTow", CondSurf = "ECSurfacePreTow", "Secchi", "SizeClass",
                    "Temperature", BottomDepth="Depth", "Volume", "AmphipodCode", "EMP_Macro", "CPUE")%>% #Select for columns in common and rename columns to match
      dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("EMP_Macro", "Lifestage", "Taxname", "Phylum", "Class",
                                       "Order", "Family", "Genus", "Species", "Intro", "EMPstart", "EMPend")%>% #only retain EMP codes
                         dplyr::filter(!is.na(.data$EMP_Macro))%>% #Only retain Taxnames corresponding to EMP codes
                         dplyr::distinct(),
                       by="EMP_Macro")%>%
      dplyr::filter(!is.na(.data$Taxname))%>% #Should remove all the summed categories in original dataset
      dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
                    SampleID=paste(.data$Source, .data$Station, .data$Date), #Create identifier for each sample
                    Tide="1", # All EMP samples collected at high slack
                    TowType="Oblique",
                    BottomDepth=.data$BottomDepth*0.3048)%>% # Convert to meters
      dplyr::mutate(CPUE=dplyr::case_when(
        .data$CPUE!=0 ~ .data$CPUE,
        .data$CPUE==0 & .data$Date < .data$Intro ~ 0,
        .data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$EMPstart ~ NA_real_,
        .data$CPUE==0 & .data$Date >= .data$EMPstart & .data$Date < .data$EMPend ~ 0,
        .data$CPUE==0 & .data$Date >= .data$EMPend ~ NA_real_),
        CPUE=dplyr::if_else(.data$AmphipodCode!="A" & .data$Order=="Amphipoda", NA_real_, .data$CPUE))%>% # Remove any tainted amphipod data (e.g., veg in net)
      dplyr::select(-"EMP_Macro", -"EMPstart", -"EMPend", -"Intro")%>% #Remove EMP taxa codes
      dtplyr::lazy_dt()%>% #Speed up code using dtplyr package that takes advantage of data.table speed
      dplyr::group_by(dplyr::across(-"CPUE"))%>%
      dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #Some taxa now have the same names (e.g., CYCJUV and OTHCYCJUV) so we now add those categories together.
      dplyr::ungroup()%>%
      tibble::as_tibble() %>% #required to finish operation after lazy_dt()
      dplyr::left_join(stations, by=c("Source", "Station"))

    cat("\nEMP_Macro finished!\n\n")

    if(Biomass){
      #download the file
      if (!file.exists(file.path(Data_folder, "EMP_Lengths.csv")) | Redownload_data) {
        Tryer(n=3, fun=utils::download.file, url=URLs$EMP$Lengths,
              destfile=file.path(Data_folder, "EMP_Lengths.csv"), mode="wb", method=Download_method)

        lengths.list[["EMP_Lengths"]]<-readr::read_csv(file.path(Data_folder, "EMP_Lengths.csv"),
                                                       col_types=readr::cols_only(SampleDate="c", StationNZ="c",
                                                                                  SpeciesName="c", Size="d", AdjustedFreq="d"))%>%
          dplyr::mutate(SampleDate=lubridate::parse_date_time(.data$SampleDate, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
          dplyr::rename(Date="SampleDate", Station="StationNZ", EMP_Lengths="SpeciesName", Length="Size", Count="AdjustedFreq")%>%
          dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
                             dplyr::select("EMP_Lengths", "Lifestage", "Taxname")%>% #only retain EMP codes
                             dplyr::filter(!is.na(.data$EMP_Lengths))%>% #Only retain Taxnames corresponding to EMP codes
                             dplyr::distinct(),
                           by="EMP_Lengths")%>%
          dplyr::filter(!is.na(.data$Taxname))%>%
          dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage),
                        Source="EMP",
                        SizeClass="Macro",
                        SampleID=paste(.data$Source, .data$Station, .data$Date))%>%
          dplyr::select(-"EMP_Lengths", -"Date", -"Station")

        cat("\nEMP_Macro lengths finished!\n\n")
      }
    }

  }
  # FMWT Macro --------------------------------------------------------------

  if("FMWT_Macro"%in%Data_sets | "STN_Macro"%in%Data_sets) {

    #download the file
    if (!file.exists(file.path(Data_folder, "FMWTSTN_Macro.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$FMWTSTN$Macro,
            destfile=file.path(Data_folder,"FMWTSTN_Macro.csv"), mode="wb", method=Download_method)
    }

    #download the file
    if (!file.exists(file.path(Data_folder, "SMSCG_Macro.csv")) | Redownload_data) {
      Tryer(n=3, fun=utils::download.file, url=URLs$SMSCG$Macro,
            destfile=file.path(Data_folder, "SMSCG_Macro.csv"), mode="wb", method=Download_method)
    }


    zoo_FMWT_Macro <- readr::read_csv(file.path(Data_folder, "FMWTSTN_Macro.csv"),
                                      col_types=readr::cols_only(Project="c", Year="d", Survey="d",
                                                                 Date="c", Station="c", Time="c",
                                                                 TideCode="c", DepthBottom="d", CondSurf="d",
                                                                 CondBott="d", TempSurf="d", Secchi="d",
                                                                 Turbidity="d", Microcystis="c", Volume="d",
                                                                 Acanthomysis_aspera="d", Hyperacanthomysis_longirostris="d", Acanthomysis_hwanhaiensis="d",
                                                                 Alienacanthomysis_macropsis="d", Deltamysis_holmquistae="d", Neomysis_kadiakensis="d",
                                                                 Neomysis_mercedis="d", Unidentified_Mysid="d", Americorophium_spinicorne="d",
                                                                 Americorophium_stimpsoni="d", Ampelisca_abdita="d", Corophium_alienense="d",
                                                                 Crangonyx_sp="d", Gammarus_daiberi="d", Grandidierella_japonica="d",
                                                                 Hyalella_sp="d", Unidentified_Amphipod="d", Unidentified_Corophium="d",
                                                                 Unidentified_Gammarus="d"))%>%
      dplyr::mutate(ID=paste(.data$Year, .data$Project, .data$Survey, .data$Station)) %>%
      dplyr::mutate(Datetime = lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time), NA_character_,
                                                                         paste(.data$Date, .data$Time)), "%Y-%m-%d %H:%M", tz="America/Los_Angeles"),
                    Date=lubridate::parse_date_time(.data$Date, "%Y-%m-%d", tz="America/Los_Angeles"))
    #The 2025 data upload has a new date format for Macro (but not meso, and not SMSCG very annoying)

    zoo_SMSCG_Macro <- readr::read_csv(file.path(Data_folder, "SMSCG_Macro.csv"),
                                       col_types=readr::cols_only(Project="c", Year="d", Survey="d",
                                                                  Date="c", Station="c", Time="c",
                                                                  TideCode="c", DepthBottom="d", CondSurf="d",
                                                                  CondBott="d", TempSurf="d", Secchi="d",
                                                                  Turbidity="d", Microcystis="c", Volume="d",
                                                                  Acanthomysis_aspera="d", Hyperacanthomysis_longirostris="d", Acanthomysis_hwanhaiensis="d",
                                                                  Alienacanthomysis_macropsis="d", Deltamysis_holmquistae="d", Neomysis_kadiakensis="d",
                                                                  Neomysis_mercedis="d", Unidentified_Mysid="d", Americorophium_spinicorne="d",
                                                                  Americorophium_stimpsoni="d", Ampelisca_abdita="d", Corophium_alienense="d",
                                                                  Crangonyx_sp="d", Gammarus_daiberi="d", Grandidierella_japonica="d",
                                                                  Hyalella_sp="d", Unidentified_Amphipod="d", Unidentified_Corophium="d",
                                                                  Unidentified_Gammarus="d"))%>%
      dplyr::mutate(ID=paste(.data$Year, .data$Project, .data$Survey, .data$Station))%>%
      dplyr::filter(!.data$ID%in%unique(zoo_FMWT_Macro$ID) & .data$Project%in%c("FMWT", "STN")) %>%

      #put date and time in the right format. Date first, tehn datetime to try and fix a problem where the date randomly came out wrong in the 'datetime' version
      dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%m/%d/%y", tz="America/Los_Angeles"),
        Datetime = lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time), NA_character_,
                                                                         paste(.data$Date, .data$Time)), "%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"))

    data.list[["FMWT_Macro"]] <- dplyr::bind_rows(zoo_FMWT_Macro, zoo_SMSCG_Macro)%>%
      dplyr::select(-"ID")%>%
      distinct()%>% #remove any samples duplicated between the SMSCG datset and the FMWTdataset
      dplyr::mutate(Microcystis = as.character(.data$Microcystis))%>%
      tidyr::pivot_longer(cols=c(-"Project", -"Year", -"Survey", -"Date", -"Datetime",
                                 -"Station", -"Time", -"TideCode",
                                 -"DepthBottom", -"CondSurf", -"CondBott",
                                 -"TempSurf", -"Secchi", -"Turbidity", -"Microcystis",
                                 -"Volume"),
                          names_to="FMWT_Macro", values_to="CPUE")%>% #transform from wide to long
      dplyr::select(Source = "Project", "Date", "Datetime", "Station", Tide = "TideCode", BottomDepth = "DepthBottom",
                    "CondSurf", "CondBott", Temperature = "TempSurf", "Secchi", TurbidityNTU = "Turbidity",
                    "Microcystis", "Volume",
                    "FMWT_Macro", "CPUE")%>% #Select for columns in common and rename columns to match
      dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
                         dplyr::select("FMWT_Macro", "Lifestage", "Taxname", "Phylum", "Class", "Order",
                                       "Family", "Genus", "Species", "Intro", "FMWTstart", "FMWTend")%>% #only retain FMWT codes
                         dplyr::filter(!is.na(.data$FMWT_Macro))%>% #Only retain Taxnames corresponding to FMWT codes
                         dplyr::distinct(),
                       by = "FMWT_Macro")%>%
      dplyr::filter(!is.na(.data$Taxname))%>%
      dplyr::mutate(Station=dplyr::recode(.data$Station, MONT="Mont", HONK="Honk"),
                    Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
                    Microcystis=dplyr::if_else(.data$Microcystis=="6", "2", .data$Microcystis), #Microsystis value of 6 only used from 2012-2015 and is equivalent to a 2 in other years, so just converting all 6s to 2s.
                    SampleID=paste(.data$Source, .data$Station, .data$Date), #Create identifier for each sample
                    SizeClass="Macro",
                    TowType="Oblique",
                    Tide=as.character(.data$Tide))%>%
      dplyr::mutate(CPUE=dplyr::case_when(
        .data$CPUE!=0 ~ .data$CPUE,
        .data$CPUE==0 & .data$Date < .data$Intro ~ 0,
        .data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$FMWTstart ~ NA_real_,
        .data$CPUE==0 & .data$Date >= .data$FMWTstart & .data$Date < .data$FMWTend ~ 0,
        .data$CPUE==0 & .data$Date >= .data$FMWTend ~ NA_real_
      ))%>%
      dplyr::filter(!is.na(.data$CPUE))%>%
      dplyr::select(-"FMWT_Macro", -"FMWTstart", -"FMWTend", -"Intro")%>% #Remove FMWT taxa codes
      dplyr::left_join(stations, by=c("Source", "Station"))%>% #at latitude and longitude.
      {if(!("FMWT_Macro"%in%Data_sets)){
        dplyr::filter(., .data$Source != "FMWT")
      } else{
        .
      }}%>%
      {if(!("STN_Macro"%in%Data_sets)){
        dplyr::filter(., .data$Source != "STN")
      } else{
        .
      }}

    cat("\nFMWT_Macro and/or STN_Macro finished!\n\n")
  }

  # Combine data ----------------------------------------
  cat("\nCombining datasets...\n")
  zoop<-dplyr::bind_rows(data.list)%>% # Combine data
    dplyr::filter(!is.na(.data$Taxname))%>% #Remove NA taxnames (should only correspond to previously summed "all" categories from input datasets)
    dplyr::mutate(SalSurf= wql::ec2pss(.data$CondSurf/1000, t=25),
                  Year=lubridate::year(.data$Date))%>%
    {if("Tide"%in%names(.)){
      dplyr::mutate(., Tide=dplyr::recode(.data$Tide, "1"="High slack", "2"="Ebb", "3"="Low slack", "4"="Flood", "1=high slack"="High slack", "2=ebb"="Ebb", "3=low slack"="Low slack", "4=flood"="Flood")) #Rename tide codes to be consistent
    } else{
      .
    }}%>%
    {if("CondBott"%in%names(.)){
      dplyr::mutate(., SalBott=wql::ec2pss(.data$CondBott/1000, t=25))
    } else{
      .
    }}%>%
    dplyr::select(-tidyselect::any_of(c("Region", "CondBott", "CondSurf"))) #Remove some extraneous variables to save memory

  stationsEMPEZ<-zooper::stationsEMPEZ

  if(any(unique(stationsEMPEZ$Station)%in%unique(zoop$Station))){
    zoop<-zoop%>%
      dplyr::filter(.data$Station%in%unique(stationsEMPEZ$Station))%>%
      dplyr::select(-"Latitude", -"Longitude")%>%
      dplyr::left_join(stationsEMPEZ, by=c("Date", "Station"))%>%
      dplyr::bind_rows(zoop%>%
                         dplyr::filter(!.data$Station%in%unique(stationsEMPEZ$Station)))
  }

  zoopEnv<-zoop%>%
    dplyr::select(-"SizeClass", -"Volume", -"Lifestage", -"Taxname", -"Phylum", -"Class", -"Order",
                  -"Family", -"Genus", -"Species", -"Taxlifestage", -"CPUE")%>%
    dplyr::distinct()

  # Remove duplicated samples not caught by distinct
  dups<-dplyr::filter(zoopEnv, .data$SampleID%in%.data$SampleID[which(duplicated(.data$SampleID))])%>%
    dplyr::group_by(.data$SampleID)%>%
    dplyr::mutate(dplyr::across(where(is.numeric), ~mean(.x, na.rm=T)))%>%
    dplyr::mutate(dplyr::across(where(lubridate::is.POSIXct), ~suppressWarnings(dplyr::if_else(all(is.na(.x)), lubridate::parse_date_time(NA_character_, tz="America/Los_Angeles"), min(.x, na.rm=T)))))%>%
    tidyr::fill(where(is.character), .direction="downup")%>%
    dplyr::mutate(dplyr::across(where(is.character), ~unique(.x)[1]))%>%
    dplyr::ungroup()%>%
    dplyr::distinct()

  zoopEnv<-zoopEnv%>%
    dplyr::filter(!.data$SampleID%in%dups$SampleID)%>%
    dplyr::bind_rows(dups)%>%
    dplyr::mutate(dplyr::across(where(is.numeric), ~ dplyr::if_else(is.nan(.x), NA_real_, .x)))


  zoop<-zoop%>%
    dplyr::select("Source", "SizeClass", "Volume", "Lifestage", "Taxname", "Phylum", "Class",
                  "Order", "Family", "Genus", "Species", "Taxlifestage", "SampleID", "CPUE")

  if(Biomass){
    zoop_lengths<-dplyr::bind_rows(lengths.list)
    zoop<-Zoopbiomass(zoop, zoop_lengths)%>%
      dplyr::select("Source", "SizeClass", "Volume", "Lifestage", "Taxname", "Phylum", "Class",
                    "Order", "Family", "Genus", "Species", "Taxlifestage", "SampleID", "CPUE", "BPUE")

  }

  if(Save_object){
    saveRDS(zoop, file=paste0(Zoop_path, ".Rds"))
    saveRDS(zoopEnv, file=paste0(Env_path, ".Rds"))
  }

  if(Return_object){
    if(Return_object_type=="Combined"){
      zoop_full <- dplyr::left_join(zoop, dplyr::select(zoopEnv, -.data$Source), by="SampleID")
      return(zoop_full)
    }
    if(Return_object_type=="List"){
        return(list(Zooplankton = zoop, Environment = zoopEnv))
    }
  }

}
InteragencyEcologicalProgram/zooper documentation built on Feb. 6, 2025, 9:01 a.m.