#' Downloads and combines zooplankton datasets collected by the Interagency Ecological Program from the Sacramento-San Joaquin Delta
#'
#' This function downloads all IEP zooplankton datasets from the internet,
#' converts them to a consistent format, binds them together, and exports
#' the combined dataset as .Rds R data files and/or an R object.
#' Datasets currently include "EMP" (Environmental Monitoring Program),
#' "FRP" (Fish Restoration Program), "FMWT" (Fall Midwater Trawl), "STN" (Townet Survey), "20mm" (20mm survey),
#' "DOP" (Directed Outflow Project Lower Trophic Study), and "YBFMP" (Yolo Bypass Fish Monitoring Program).
#' @param Data_sets Datasets to include in combined data. Choices include "EMP_Meso", "FMWT_Meso", "STN_Meso", "20mm_Meso", "FRP_Meso", "YBFMP_Meso", "EMP_Micro", "YBFMP_Micro", "FRP_Macro", "EMP_Macro", "FMWT_Macro", "STN_Macro", "DOP_Macro", and "DOP_Meso". Defaults to including all datasets except the two YBFMP datasets.
#' @param Biomass Whether to add carbon biomass (carbon biomass per unit effort (\eqn{\mu}g/ \ifelse{html}{\out{m<sup>3</sup>}}{\eqn{m^{3}}})) to the dataset (where conversion equations and required data are available). Defaults to \code{Biomass = TRUE}
#' @param Data_folder Path to folder in which source datasets are stored, and to which you would like datasets to be downloaded if you set \code{Redownload_data = TRUE}. If you do not want to store every source dataset, you can leave this at the default \code{tempdir()}. If you do not wish to redownload these datasets every time you run the function, you can set this to a directory on your computer and run the function in the future with \code{Redownload_data = FALSE}, which will load the source datasets from \code{Data_folder} instead of downloading them again.
#' @param Save_object Should the combined data be saved to disk? Defaults to \code{Save_object = TRUE}.
#' @param Return_object Should data be returned as an R object? If \code{TRUE}, the function will return the full combined dataset. Defaults to `Return_object = FALSE`.
#' @param Return_object_type If \code{Return_object = TRUE}, should data be returned as a combined dataframe (\code{Return_object_type = "Combined"}) or a list with component "Zooplankton" containing the zooplankton data and component "Environment" containing the environmental data (\code{Return_object_type = "List"}, the default). A list is required to feed data into the \code{Zoopsynther} function without saving the combined dataset to disk.
#' @param Redownload_data Should source datasets be redownloaded from the internet? Defaults to \code{Redownload_data = FALSE}.
#' @param Download_method Method used to download files. See argument \code{method} options in \code{\link[utils]{download.file}}. Defaults to "curl".
#' @param Zoop_path File path specifying the folder and filename of the zooplankton dataset. Defaults to \code{Zoop_path = file.path(Data_folder, "zoopforzooper")}.
#' @param Env_path File path specifying the folder and filename of the dataset with accessory environmental parameters. Defaults to \code{Env_path = file.path(Data_folder, "zoopenvforzooper")}.
#' @param Crosswalk Crosswalk table to be used for conversions. Must have columns named for each unique combination of source and size class with an underscore separator, as well as all taxonomic levels Phylum through Species, Taxname (full scientific name) and Lifestage. See \code{\link{crosswalk}} (the default) for an example.
#' @param Stations Latitudes and longitudes for each unique station. See \code{\link{stations}} (the default) for an example.
#' @keywords download integration synthesis zooplankton
#' @import data.table
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @return If \code{Return_object = TRUE}, returns the combined dataset as a list or tibble, depending on whether \code{Return_object_type} is set to \code{"List"} or \code{"Combined"}. If \code{Save_object = TRUE}, writes 2 .Rds files to disk: one with the zooplankton catch data and another with accessory environmental parameters.
#' @author Sam Bashevkin
#' @details Note that EMP Macro samples with QAQC flags (any value of AmphipodCode other than "A") have had their Amphipod CPUE set to NA in this function. For more information on the source datasets see \code{\link{zooper}}.
#' @examples
#' \dontrun{
#' Data <- Zoopdownloader(Data_folder = tempdir(), Return_object = TRUE,
#' Save_object = FALSE, Redownload_data = TRUE)
#' }
#' @seealso \code{\link{Zoopsynther}}, \code{\link{crosswalk}}, \code{\link{stations}}, \code{\link{zooper}}
#' @export
Zoopdownloader <- function(
Data_sets = c("EMP_Meso", "FMWT_Meso", "STN_Meso",
"20mm_Meso", "FRP_Meso", "EMP_Micro",
"FRP_Macro", "EMP_Macro", "FMWT_Macro",
"STN_Macro", "DOP_Meso", "DOP_Macro"),
Biomass = TRUE,
Data_folder = tempdir(),
Save_object = TRUE,
Return_object = FALSE,
Return_object_type = "List",
Redownload_data = FALSE,
Download_method="auto",
Zoop_path = file.path(Data_folder, "zoopforzooper"),
Env_path = file.path(Data_folder, "zoopenvforzooper"),
Crosswalk = zooper::crosswalk,
Stations = zooper::stations){
# Setup -------------------------------------------------------------------
where <- utils::getFromNamespace("where", "tidyselect")
# Check arguments
if (!purrr::every(Data_sets, ~.%in%c("EMP_Meso", "FMWT_Meso", "STN_Meso",
"20mm_Meso", "FRP_Meso","EMP_Micro",
"FRP_Macro", "EMP_Macro", "FMWT_Macro",
"STN_Macro", "YBFMP_Meso", "YBFMP_Micro",
"DOP_Meso", "DOP_Macro"))){
stop("Data_sets must contain one or more of the following options: 'EMP_Meso',
'FMWT_Meso', 'STN_Meso', '20mm_Meso', 'FRP_Meso', 'EMP_Micro', 'FRP_Macro', 'EMP_Macro',
'FMWT_Macro', 'STN_Macro', 'YBFMP_Meso', 'YBFMP_Micro', 'DOP_Macro', 'DOP_Meso'")
}
if (!Return_object_type%in%c("List", "Combined")){
stop("Return_object_type must be either 'List' or 'Combined'.")
}
if(!purrr::every(list(Save_object, Return_object, Redownload_data), is.logical)){
stop("Save_object, Return_object, and Redownload_data must all have logical arguments.")
}
if(Biomass & !("Macro"%in%stringr::str_extract(Data_sets, "(?<=_).*") & "EMP_Macro"%in%Data_sets)){
stop("Biomass are only available for macrozooplankton, and currently only available for EMP, so EMP_Macro must be selected if Length = TRUE.")
}
# Load station key to later incorporate latitudes and longitudes
stations <- Stations
# Initialize list of dataframes
data.list<-list()
if(Biomass){
lengths.list<-list()
}
# Find URLs ---------------------------------------------------------------
URLs<-zoop_urls(unique(stringr::str_extract(Data_sets, "^[^_]+(?=_)")))
# EMP Meso ---------------------------------------------------------------------
if("EMP_Meso"%in%Data_sets) {
#download the file
if (!file.exists(file.path(Data_folder, "EMP_meso.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$EMP$Meso,
destfile=file.path(Data_folder, "EMP_meso.csv"), mode="wb", method=Download_method)
}
# Import the EMP data
zoo_EMP_Meso<-readr::read_csv(file.path(Data_folder, "EMP_meso.csv"),
col_types=readr::cols_only(SampleDate="c", Time="c", StationNZ="c",
Chl_a="d", Secchi="d", Temperature="d",
ECSurfacePreTow="d", ECBottomPreTow="d",
Volume="d", Depth="d", ACARTELA="d", ACARTIA="d",
DIAPTOM="d", EURYTEM="d", OTHCALAD="d",
PDIAPFOR="d", PDIAPMAR="d", SINOCAL="d",
TORTANUS="d", ACANTHO="d", LIMNOSPP="d",
LIMNOSINE="d", LIMNOTET="d", OITHDAV="d",
OITHSIM="d", OITHSPP="d", OTHCYCAD="d",
HARPACT="d", CALJUV="d", EURYJUV="d",
OTHCALJUV="d", PDIAPJUV="d", SINOCALJUV="d",
ASINEJUV="d", ACARJUV="d", DIAPTJUV="d",
TORTJUV="d", CYCJUV="d", LIMNOJUV="d",
OITHJUV="d", OTHCYCJUV="d", COPNAUP="d",
EURYNAUP="d", OTHCOPNAUP="d", PDIAPNAUP="d",
SINONAUP="d", BOSMINA="d", DAPHNIA="d",
DIAPHAN="d",OTHCLADO="d", ASPLANCH="d",
KERATELA="d",OTHROT="d", POLYARTH="d",
SYNCH="d",SYNCHBIC="d", TRICHO="d",
BARNNAUP="d", CRABZOEA="d"))
# Tranform from "wide" to "long" format, add some variables,
# alter data to match other datasets
data.list[["EMP_Meso"]] <- zoo_EMP_Meso%>%
dplyr::filter(!is.na(.data$SampleDate))%>%
dplyr::mutate(SampleDate=lubridate::parse_date_time(.data$SampleDate, "%m/%d/%Y", tz="America/Los_Angeles"),
Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time), NA_character_, paste(.data$SampleDate, .data$Time)),
c("%Y-%m-%d %I:%M %p"), tz="Etc/GMT+8"), #create a variable for datetime
Datetime=lubridate::with_tz(.data$Datetime, "America/Los_Angeles"))%>% # Ensure everything ends up in local time
tidyr::pivot_longer(cols=c(-"SampleDate", -"StationNZ", -"Time", -"Secchi", -"Chl_a", -"Temperature",
-"ECSurfacePreTow", -"ECBottomPreTow", -"Volume", -"Datetime", -"Depth"),
names_to="EMP_Meso", values_to="CPUE")%>% #transform from wide to long
dplyr::mutate(Source="EMP",
SizeClass="Meso")%>% #add variable for data source
dplyr::select("Source", Date="SampleDate", "Datetime",
Station="StationNZ", Chl = "Chl_a", CondBott = "ECBottomPreTow", CondSurf = "ECSurfacePreTow", "Secchi", "SizeClass",
"Temperature", "Volume", BottomDepth="Depth", "EMP_Meso", "CPUE")%>% #Select for columns in common and rename columns to match
dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("EMP_Meso", "Lifestage", "Taxname", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Intro", "EMPstart", "EMPend")%>% #only retain EMP codes
dplyr::filter(!is.na(.data$EMP_Meso))%>% #Only retain Taxnames corresponding to EMP codes
dplyr::distinct(),
by="EMP_Meso")%>%
dplyr::filter(!is.na(.data$Taxname))%>% #Should remove all the summed categories in original dataset
dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
SampleID=paste(.data$Source, .data$Station, .data$Date), #Create identifier for each sample
Tide="1",# All EMP samples collected at high slack
TowType="Oblique",
BottomDepth=.data$BottomDepth*0.3048)%>% # Convert feet to meters
dplyr::mutate(CPUE=dplyr::case_when(
.data$CPUE!=0 ~ .data$CPUE,
.data$CPUE==0 & .data$Date < .data$Intro ~ 0,
.data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$EMPstart ~ NA_real_,
.data$CPUE==0 & .data$Date >= .data$EMPstart & .data$Date < .data$EMPend ~ 0,
.data$CPUE==0 & .data$Date >= .data$EMPend ~ NA_real_
))%>%
dplyr::select(-"EMP_Meso", -"EMPstart", -"EMPend", -"Intro")%>% #Remove EMP taxa codes
dplyr::select(-"Datetime")%>% #Add this back in when other EMP data have time
dtplyr::lazy_dt()%>% #Speed up code using dtplyr package that takes advantage of data.table speed
dplyr::group_by(dplyr::across(-"CPUE"))%>%
dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #Some taxa now have the same names (e.g., CYCJUV and OTHCYCJUV) so we now add those categories together.
dplyr::ungroup()%>%
tibble::as_tibble() %>% #required to finish operation after lazy_dt()
dplyr::left_join(stations, by=c("Source", "Station")) #Add lat and long
cat("\nEMP_Meso finished!\n\n")
}
# DOP Meso ---------------------------------------------------------------------
if("DOP_Meso"%in%Data_sets) {
#download the files
if (!file.exists(file.path(Data_folder, "DOP_Meso.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$DOP$Meso,
destfile=file.path(Data_folder, "DOP_Meso.csv"), mode="wb", method=Download_method)
}
if (!file.exists(file.path(Data_folder, "DOP_trawls.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$DOP$trawls,
destfile=file.path(Data_folder, "DOP_trawls.csv"), mode="wb", method=Download_method)
}
# Import the DOP data
zoo_DOP_Meso<-readr::read_csv(file.path(Data_folder, "DOP_Meso.csv"),
col_types=readr::cols_only(ICF_ID="c", Acanthocyclops_spp_adult="d", Acanthocyclops_vernalis_adult="d",
Acanthocyclops_vernalis_copepodid="d", Acartia_spp_adult="d", Acartia_spp_copepodid="d",
Acartiella_sinensis_adult="d", Acartiella_sinensis_copepodid="d", Asplanchna_spp="d",
Barnacle_UNID_nauplii="d", Bosmina_longirostris="d", Brachionidae_UNID="d",
Brachionus_spp="d", Calanoid_UNID_adult="d", Calanoid_UNID_copepodid="d",
Camptocercus_spp="d", Chydoridae_UNID="d", Chydorus_spp="d",
Cladocera_UNID="d", Copepod_UNID_nauplii="d", Crab_UNID_zoea="d",
Cyclopoid_UNID_adult="d", Cyclopoid_UNID_copepodid="d", Daphnia_spp="d",
Daphniidae_UNID="d", Diaptomidae_UNID_adult="d", Diaptomidae_UNID_copepodid="d",
Ditrichocorycaeus_affinis_adult="d", Eurytemora_affinis_adult="d", Eurytemora_affinis_copepodid="d",
Eurytemora_spp_nauplii="d", Harpacticoid_UNID="d", Holopedium_gibberum="d",
Ilyocryptus_spp="d", Keratella_spp="d", Labidocera_spp_adult="d",
Labidocera_spp_copepodid="d", Leptodora_spp="d", Limnoithona_sinensis_adult="d",
Limnoithona_sinensis_copepodid="d", Limnoithona_spp_adult="d", Limnoithona_spp_copepodid="d",
Limnoithona_tetraspina_adult="d", Limnoithona_tetraspina_copepodid="d", Macrothrix_spp="d",
Moina_spp="d", Oithona_davisae_adult="d", Oithona_davisae_copepodid="d",
Oithona_similis_adult="d", Oithona_similis_copepodid="d", Oithona_spp_adult="d",
Oithona_spp_copepodid="d", Osphranticum_labronectum_adult="d", Osphranticum_labronectum_copepodid="d",
Ostracoda_UNID="d", Paracalanus_parvus_adult="d", Paracalanus_parvus_copepodid="d",
Platyias_spp="d", Podonidae_UNID="d", Polyarthra_spp="d",
Pseudodiaptomus_euryhalinus_adult="d", Pseudodiaptomus_forbesi_adult="d", Pseudodiaptomus_forbesi_copepodid="d",
Pseudodiaptomus_marinus_adult="d", Pseudodiaptomus_marinus_copepodid="d", Pseudodiaptomus_spp_adult="d",
Pseudodiaptomus_spp_copepodid="d", Pseudodiaptomus_spp_nauplii="d", Rotifer_UNID="d",
Scapholeberis_spp="d", Sididae_UNID="d", Sinocalanus_doerrii_adult="d",
Sinocalanus_doerrii_copepodid="d", Sinocalanus_doerrii_nauplii="d", Synchaeta_bicornis="d",
Synchaeta_spp="d", Tortanus_dextrilobatus_adult="d", Tortanus_discaudatus_adult="d",
Tortanus_spp_copepodid="d", Trichocerca_spp="d"))
zoo_DOP_trawls<-readr::read_csv(file.path(Data_folder, "DOP_trawls.csv"),
col_types=readr::cols_only(ICF_ID="c", Date="c", Start_Time="c",
Station_Code="c", Habitat="c", Latitude="d", Longitude="d",
Start_Depth="d", Temperature="d", Conductivity="d",
Turbidity="d", pH="d", DO="d", Microcystis="c",
Chl_a="d", Secchi="d", Mesozooplankton_Volume="d"))
# Tranform from "wide" to "long" format, add some variables,
# alter data to match other datasets
data.list[["DOP_Meso"]] <- zoo_DOP_Meso %>%
tidyr::pivot_longer(cols = !"ICF_ID", names_to = "DOP_Meso", values_to = "CPUE") %>%
dplyr::left_join(zoo_DOP_trawls, by="ICF_ID") %>%
dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%Y-%m-%d", tz="America/Los_Angeles"),
Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Start_Time), NA_character_, paste(.data$Date, .data$Start_Time)),
"%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"), #create a variable for datetime
Source = "DOP", #add variable for data source
SizeClass = "Meso") %>%
dplyr::filter(!is.na(.data$Mesozooplankton_Volume)) %>% #get rid of environmental variables with no data
#Select variables we are interested in.
dplyr::select("Source", "Date", "Datetime",
Station = "Station_Code", Chl = "Chl_a", CondSurf = "Conductivity", "Secchi", "SizeClass",
"Temperature", TurbidityNTU ="Turbidity", "pH", "DO", "Microcystis",
Volume = "Mesozooplankton_Volume", BottomDepth = "Start_Depth",
"DOP_Meso", "CPUE", "Latitude", "Longitude", "ICF_ID", TowType="Habitat") %>%
dplyr::left_join(Crosswalk %>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("DOP_Meso", "Lifestage", "Taxname", "Phylum",
"Class", "Order", "Family", "Genus", "Species",
"DOPstart", "DOPend", "Intro")%>% #only retain dop codes
dplyr::filter(!is.na(.data$DOP_Meso))%>% #Only retain Taxnames corresponding to EMP codes
dplyr::distinct(),
by="DOP_Meso")%>%
dplyr::filter(!is.na(.data$Taxname), !is.na(.data$CPUE)) %>% #get rid of the lines with "NA" because the critter wasn't counted in this sample.
dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
SampleID=paste(.data$Source, .data$Station, .data$Date, .data$ICF_ID), #Create identifier for each sample
TowType=dplyr::recode(.data$TowType, `Channel Surface`="Surface", Shoal="Surface",
`Channel Deep`="Bottom"),
CondBott=ifelse(.data$TowType=="Bottom", .data$CondSurf, NA), # Move salinity to bottom for bottom samples
dplyr::across(c("Chl", "CondSurf", "Secchi", "Temperature", "TurbidityNTU", "pH", "DO", "Microcystis"),
~ifelse(.data$TowType=="Bottom", NA, .x)), # Remove bottom samples for variables that aren't retained
BottomDepth=.data$BottomDepth*0.3048)%>% # Convert feet to meters
dplyr::mutate(CPUE=dplyr::case_when(
.data$CPUE!=0 ~ .data$CPUE,
.data$CPUE==0 & .data$Date < .data$Intro ~ 0,
.data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$DOPstart ~ NA_real_,
.data$CPUE==0 & .data$Date >= .data$DOPstart & .data$Date < .data$DOPend ~ 0,
.data$CPUE==0 & .data$Date >= .data$DOPend ~ NA_real_)) %>%
dplyr::filter(!is.na(.data$CPUE)) %>%
dplyr::select(-"DOP_Meso", -"ICF_ID", -"DOPstart", -"DOPend", -"Intro") #Remove DOP code
cat("\nDOP_Meso finished!\n\n")
}
# DOP Macro ---------------------------------------------------------------------
if("DOP_Macro"%in%Data_sets) {
#download the files
if (!file.exists(file.path(Data_folder, "DOP_Macro.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$DOP$Macro,
destfile=file.path(Data_folder, "DOP_Macro.csv"), mode="wb", method=Download_method)
}
if (!file.exists(file.path(Data_folder, "DOP_trawls.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$DOP$trawls,
destfile=file.path(Data_folder, "DOP_trawls.csv"), mode="wb", method=Download_method)
}
# Import the DOP data
zoo_DOP_Macro<-readr::read_csv(file.path(Data_folder, "DOP_Macro.csv"),
col_types=readr::cols_only(ICF_ID="c", Alienacanthomysis_macropsis="d", Americorophium_spinicorne="d",
Americorophium_spp="d", Americorophium_stimpsoni="d", Ampelisca_abdita="d",
Amphipod_UNID="d", Ampithoe_spp="d", Ampithoe_valida="d",
Corophiidae_UNID="d", Crangonyx_spp="d", Cumacean_UNID="d",
Deltamysis_holmquistae="d", Dexaminidae_UNID="d", Eogammarus_spp="d",
Exopalaemon_spp="d", Gammarus_daiberi="d", Grandidierella_japonica="d",
Grandifoxus_grandis="d", Hyalella_spp="d", Hyperacanthomysis_longirostris="d",
Isopoda_UNID="d", Monocorophium_acherusicum="d", Mysid_UNID="d",
Neomysis_kadiakensis="d", Neomysis_mercedis="d", Oedicerotidae_UNID="d",
Orientomysis_aspera="d", Orientomysis_hwanhaiensis="d", Pleustidae_UNID="d",
Shrimp_UNID_larvae="d", Sinocorophium_alienense="d", Tanaidacea_UNID="d"))
zoo_DOP_trawls<-readr::read_csv(file.path(Data_folder, "DOP_trawls.csv"),
col_types=readr::cols_only(ICF_ID="c", Date="c", Start_Time="c",
Station_Code="c", Habitat="c", Latitude="d", Longitude="d",
Start_Depth="d", Temperature="d", Conductivity="d",
Turbidity="d", pH="d", DO="d", Microcystis="c",
Chl_a="d", Secchi="d", Macrozooplankton_Volume="d"))
# Tranform from "wide" to "long" format, add some variables,
# alter data to match other datasets
data.list[["DOP_Macro"]] <- zoo_DOP_Macro %>%
tidyr::pivot_longer(cols = !"ICF_ID", names_to = "DOP_Macro", values_to = "CPUE") %>%
dplyr::left_join(zoo_DOP_trawls, by="ICF_ID") %>%
dplyr::filter(!is.na(.data$Macrozooplankton_Volume)) %>%
dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%Y-%m-%d", tz="America/Los_Angeles"),
Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Start_Time), NA_character_, paste(.data$Date, .data$Start_Time)),
"%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"), #create a variable for datetime,
Source = "DOP", #add variable for data source
SizeClass = "Macro") %>%
#Select variables we are interested in. I need to check on the latitude/longitude issue with Sam.
dplyr::select("Source", "Date", "Datetime",
Station = "Station_Code", Chl = "Chl_a", CondSurf = "Conductivity", "Secchi", "SizeClass",
"Temperature", TurbidityNTU = "Turbidity", "pH", "DO", "Microcystis",
Volume = "Macrozooplankton_Volume", BottomDepth = "Start_Depth", "ICF_ID",
"DOP_Macro", "CPUE", "Latitude", "Longitude", TowType="Habitat") %>%
dplyr::left_join(Crosswalk %>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("DOP_Macro", "Lifestage", "Taxname", "Phylum",
"Class", "Order", "Family", "Genus", "Species",
"DOPstart", "DOPend", "Intro")%>% #only retain dop codes
dplyr::filter(!is.na(.data$DOP_Macro))%>% #Only retain Taxnames corresponding to EMP codes
dplyr::distinct(),
by="DOP_Macro")%>%
dplyr::filter(!is.na(.data$Taxname), !is.na(.data$CPUE)) %>%
dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
SampleID=paste(.data$Source, .data$Station, .data$Date, .data$ICF_ID), #Create identifier for each sample
TowType=dplyr::recode(.data$TowType, `Channel Surface`="Surface", Shoal="Surface",
`Channel Deep`="Bottom"),
CondBott=ifelse(.data$TowType=="Bottom", .data$CondSurf, NA),
dplyr::across(c("Chl", "CondSurf", "Secchi", "Temperature", "TurbidityNTU", "pH", "DO", "Microcystis"),
~ifelse(.data$TowType=="Bottom", NA, .x)),
BottomDepth=.data$BottomDepth*0.3048)%>% # Convert feet to meters
dplyr::mutate(CPUE=dplyr::case_when(
.data$CPUE!=0 ~ .data$CPUE,
.data$CPUE==0 & .data$Date < .data$Intro ~ 0,
.data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$DOPstart ~ NA_real_,
.data$CPUE==0 & .data$Date >= .data$DOPstart & .data$Date < .data$DOPend ~ 0,
.data$CPUE==0 & .data$Date >= .data$DOPend ~ NA_real_)) %>%
dplyr::filter(!is.na(.data$CPUE)) %>%
dplyr::select(-"DOP_Macro", -"ICF_ID", -"DOPstart", -"DOPend", -"Intro") #Remove DOP code
cat("\nDOP_Macro finished!\n\n")
}
# FMWTSTN Meso --------------------------------------------------------------------
if("FMWT_Meso"%in%Data_sets | "STN_Meso"%in%Data_sets) {
#download the file
if (!file.exists(file.path(Data_folder, "FMWTSTN_Meso.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$FMWTSTN$Meso,
destfile=file.path(Data_folder,"FMWTSTN_Meso.csv"), mode="wb", method=Download_method)
}
if (!file.exists(file.path(Data_folder, "SMSCG_Meso.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$SMSCG$Meso,
destfile=file.path(Data_folder, "SMSCG_Meso.csv"), mode="wb", method=Download_method)
}
# Import the FMWT data
zoo_FMWT_Meso <- readr::read_csv(file.path(Data_folder, "FMWTSTN_Meso.csv"),
col_types=readr::cols_only(Project="c", Year="d", Survey="d",
Date="c", Station="c", Time="c",
TideCode="c", DepthBottom="d", CondSurf="d",
CondBott="d", TempSurf="d", Secchi="d",Turbidity="d",
Microcystis="c", Volume="d",
ACARTELA="d", ACARTIA="d", DIAPTOM="d",
EURYTEM="d", OTHCALAD="d", PDIAPFOR="d",
PDIAPMAR="d", SINOCAL="d", TORTANUS="d",
ACANTHO="d", LIMNOSPP="d", LIMNOSINE="d",
LIMNOTET="d", OITHDAV="d", OITHSIM="d",
OITHSPP="d", OTHCYCAD="d", HARPACT="d",
EURYJUV="d", OTHCALJUV="d", PDIAPJUV="d",
SINOCALJUV="d", ASINEJUV="d", ACARJUV="d",
DIAPTJUV="d", TORTJUV="d", LIMNOJUV="d",
OITHJUV="d", OTHCYCJUV="d", EURYNAUP="d",
OTHCOPNAUP="d", PDIAPNAUP="d", SINONAUP="d",
BOSMINA="d", DAPHNIA="d", DIAPHAN="d",
OTHCLADO="d", ASPLANCH="d", KERATELA="d",
OTHROT="d", POLYARTH="d", SYNCH="d",
TRICHO="d", BARNNAUP="d", CRABZOEA="d",
OSTRACOD="d", CUMAC="d"))%>%
dplyr::mutate(ID=paste(.data$Year, .data$Project, .data$Survey, .data$Station),
Date=lubridate::parse_date_time(.data$Date, "%m/%d/%Y", tz="America/Los_Angeles"))
zoo_SMSCG_Meso<-readr::read_csv(file.path(Data_folder, "SMSCG_Meso.csv"),
col_types=readr::cols_only(Project="c", Year="d", Survey="d",
Date="c", Station="c", Time="c",
TideCode="c", DepthBottom="d", CondSurf="d",
PPTSurf="d", CondBott="d", PPTBott="d",
TempSurf="d", TempBottom="d", Secchi="d",
Turbidity="d", Microcystis="c", Volume="d",
ACARTELA="d", ACARTIA="d", DIAPTOM="d",
EURYTEM="d", OTHCALAD="d", PDIAPFOR="d",
PDIAPMAR="d", SINOCAL="d", TORTANUS="d",
ACANTHO="d", LIMNOSPP="d", LIMNOSINE="d",
LIMNOTET="d", OITHDAV="d", OITHSIM="d",
OTHCYCAD="d", HARPACT="d", EURYJUV="d",
OTHCALJUV="d", PDIAPJUV="d", SINOCALJUV="d",
ASINEJUV="d", ACARJUV="d", DIAPTJUV="d",
TORTJUV="d", LIMNOJUV="d", OITHJUV="d",
OTHCYCJUV="d", EURYNAUP="d", OTHCOPNAUP="d",
PDIAPNAUP="d", SINONAUP="d", BOSMINA="d",
DAPHNIA="d", DIAPHAN="d", OTHCLADO="d",
ASPLANCH="d", KERATELA="d", OTHROT="d",
POLYARTH="d", SYNCH="d", TRICHO="d",
BARNNAUP="d", CRABZOEA="d", OSTRACOD="d", CUMAC="d"))%>%
dplyr::mutate(Project=dplyr::recode(.data$Project, TNS="STN"),
ID=paste(.data$Year, .data$Project, .data$Survey, .data$Station),
Date=lubridate::parse_date_time(.data$Date, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
dplyr::filter(!.data$ID%in%unique(zoo_FMWT_Meso$ID) & .data$Project!="EMP")%>%
dplyr::mutate(Station=dplyr::if_else(.data$Project=="FRP", paste(.data$Project, .data$Station), .data$Station),
Project=dplyr::recode(.data$Project, FRP="STN"))
# Transform from "wide" to "long" format, add some variables,
# alter data to match other datasets
data.list[["FMWT_Meso"]] <- zoo_FMWT_Meso%>%
dplyr::bind_rows(zoo_SMSCG_Meso)%>%
dplyr::select(-"ID")%>%
dplyr::mutate(Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time) | !stringr::str_detect(.data$Time, stringr::fixed(":")),
NA_character_,
paste(.data$Date, .data$Time)), "%Y-%m-%d %H:%M", tz="America/Los_Angeles"))%>% #create a variable for datetime
tidyr::pivot_longer(cols=c(-"Project", -"Year", -"Survey", -"Date", -"Datetime",
-"Station",-"Time", -"TideCode",
-"DepthBottom", -"CondSurf",
-"CondBott", -"TempSurf", -"Secchi",
-"Turbidity", -"Microcystis",
-"Volume"),
names_to="FMWT_Meso", values_to="CPUE")%>% #transform from wide to long
dplyr::select(Source = "Project", "Year", "Date", "Datetime", "Station", Tide = "TideCode",
BottomDepth = "DepthBottom", "CondSurf", "CondBott", Temperature = "TempSurf",
"Secchi", TurbidityNTU = "Turbidity", "Microcystis", "Volume", "FMWT_Meso", "CPUE")%>% #Select for columns in common and rename columns to match
dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("FMWT_Meso", "Lifestage", "Taxname", "Phylum", "Class",
"Order", "Family", "Genus", "Species", "Intro",
"FMWTstart", "FMWTend")%>% #only retain FMWT codes
dplyr::filter(!is.na(.data$FMWT_Meso))%>% #Only retain Taxnames corresponding to FMWT codes
dplyr::distinct(),
by = "FMWT_Meso")%>%
dplyr::filter(!is.na(.data$Taxname))%>%
dplyr::mutate(Station=dplyr::recode(.data$Station, MONT="Mont", HONK="Honk"),
Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
Microcystis=dplyr::if_else(.data$Microcystis=="6", "2", .data$Microcystis), #Microsystis value of 6 only used from 2012-2015 and is equivalent to a 2 in other years, so just converting all 6s to 2s.
SampleID=paste(.data$Source, .data$Station, .data$Date),
TowType="Oblique",
SizeClass="Meso")%>% #Create identifier for each sample
dplyr::mutate(CPUE=dplyr::case_when(
.data$CPUE!=0 ~ CPUE,
.data$CPUE==0 & .data$Date < .data$Intro ~ 0,
.data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$FMWTstart ~ NA_real_,
.data$CPUE==0 & .data$Date >= .data$FMWTstart & .data$Date < .data$FMWTend ~ 0,
.data$CPUE==0 & .data$Date >= .data$FMWTend ~ NA_real_
))%>%
dplyr::filter(!is.na(.data$CPUE))%>%
dplyr::select(-"FMWT_Meso", -"FMWTstart", -"FMWTend", -"Intro")%>% #Remove FMWT taxa codes
dplyr::left_join(stations, by=c("Source", "Station"))%>% #Add lat and long
{if(!("FMWT_Meso"%in%Data_sets)){
dplyr::filter(., .data$Source != "FMWT")
} else{
.
}}%>%
{if(!("STN_Meso"%in%Data_sets)){
dplyr::filter(., .data$Source != "STN")
} else{
.
}}
cat("\nFMWT_Meso and/or STN_Meso finished!\n\n")
}
# twentymm Meso ----------------------------------------------------------------
if("20mm_Meso"%in%Data_sets) {
#download the file
if (!file.exists(file.path(Data_folder, "twentymm_Meso.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$twentymm$Meso,
destfile=file.path(Data_folder, "twentymm_Meso.csv"), mode="wb", method=Download_method)
}
# Import and modify 20mm data
zoo_20mm_Meso<-readxl::read_excel(file.path(Data_folder, "twentymm_Meso.csv"),
sheet="20-mm CB CPUE Data",
col_types = c("numeric","date", rep("numeric", 3),
"date", rep("numeric", 6), "text", rep("numeric", 74)))
data.list[["twentymm_Meso"]]<-zoo_20mm_Meso%>%
dplyr::mutate(SampleID = paste(.data$Station, .data$SampleDate, .data$TowNum),
SampleDate=lubridate::force_tz(.data$SampleDate, "America/Los_Angeles"),
Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$TowTime),
NA_character_,
paste0(.data$SampleDate, " ", lubridate::hour(.data$TowTime), ":", lubridate::minute(.data$TowTime))),
"%Y-%m-%d %H:%M", tz="America/Los_Angeles"))%>%
#turbidity is now eitehr NTU or FNU
tidyr::pivot_longer(cols=c(-"SampleDate", -"Survey", -"Station", -"TowTime", -"Temp", -"TopEC",
-"BottomEC", -"Secchi", -"NTU", -"FNU", -"Tide", -"BottomDepth", -"Duration", -"MeterCheck", -"Volume",
-"Dilution", -"SampleID", -"Datetime"),
names_to="twentymm_Meso", values_to="CPUE")%>% #transform from wide to long
dplyr::select(Date="SampleDate", "Station", Temperature = "Temp", CondSurf = "TopEC",
CondBott = "BottomEC", "Secchi",
TurbidityNTU = "NTU", TurbidityFNU = "FNU",
"Tide", "BottomDepth", "Volume", "SampleID", "Datetime", "twentymm_Meso", "CPUE")%>% #Select for columns in common and rename columns to match
dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("twentymm_Meso", "Lifestage", "Taxname", "Phylum", "Class",
"Order", "Family", "Genus", "Species", "Intro", "twentymmstart", "twentymmend", "twentymmstart2")%>% #only retain FMWT codes
dplyr::filter(!is.na(.data$twentymm_Meso))%>% #Only retain Taxnames corresponding to FMWT codes
dplyr::distinct(),
by = "twentymm_Meso")%>%
dplyr::filter(!is.na(.data$Taxname))%>%
dplyr::mutate(Source="twentymm",
SizeClass="Meso",
Station=as.character(.data$Station),
Taxlifestage=paste(.data$Taxname, .data$Lifestage),#add variable for data source, create variable for combo taxonomy x life stage
BottomDepth=.data$BottomDepth*0.3048)%>% # Convert feet to meters
dplyr::mutate(CPUE=dplyr::case_when(
.data$CPUE!=0 ~ .data$CPUE,
.data$CPUE==0 & .data$Date < .data$Intro ~ 0,
.data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$twentymmstart ~ NA_real_,
.data$CPUE==0 & .data$Date >= .data$twentymmstart & .data$Date < .data$twentymmend ~ 0,
.data$CPUE==0 & .data$Date >= .data$twentymmend & .data$Date < .data$twentymmstart2 ~ NA_real_,
.data$CPUE==0 & .data$Date >= .data$twentymmstart2 ~ 0 #20mm dataset had one case of a taxa starting, ending, and starting again
))%>%
dplyr::select(-"twentymmend", -"twentymmstart", -"twentymmstart2", -"Intro", -"twentymm_Meso")%>%
dtplyr::lazy_dt()%>% #Speed up
dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #this just adds up those duplications
dplyr::ungroup()%>%
tibble::as_tibble()%>%
dplyr::mutate(Source="20mm",
TowType="Oblique",
SampleID=paste(.data$Source, .data$SampleID)) %>%#Create identifier for each sample
dplyr::left_join(stations, by=c("Source", "Station")) #Add lat and long
cat("\n20mm_Meso finished!\n\n")
}
# FRP Meso ---------------------------------------------------------------------
if("FRP_Meso"%in%Data_sets) {
# Import the FRP data
#download the file
if (!file.exists(file.path(Data_folder, "zoopsFRP.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$FRP$Meso,
destfile=file.path(Data_folder, "zoopsFRP.csv"), mode="wb", method=Download_method)
Tryer(n=3, fun=utils::download.file, url=URLs$FRP$site,
destfile=file.path(Data_folder, "sitesFRP.csv"), mode="wb", method=Download_method)
}
zoo_FRP_Meso <- readr::read_csv(file.path(Data_folder, "zoopsFRP.csv"), na=c("", "NA"))
sites_FRP_Meso <- readr::read_csv(file.path(Data_folder, "sitesFRP.csv"), na=c("", "NA"))
#join environmental data to taxa counts and fix some wonky names
FRP_all = dplyr::left_join(zoo_FRP_Meso, sites_FRP_Meso) %>%
dplyr::mutate(CommonName = dplyr::case_when(CommonName == "Fish larvae" ~ "Fish UNID",
CommonName == "Insect Unid" ~ "Insect UNID",
CommonName == "Calanoid copepod (gravid)" ~ "Calanoid UNID",
CommonName == "Asellidae UNID" ~ "Asellidae",
CommonName == "Fish larvae" ~ "Fish UNID",
CommonName == "Insect Unid" ~ "Insect UNID",
CommonName == "Calanoid copepod (gravid)" ~ "Calanoid UNID",
TRUE ~ CommonName))
#Already in long format
data.list[["FRP_Meso"]] <- FRP_all%>%
# dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
dplyr::mutate(Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$StartTime),
NA_character_,
paste(.data$Date, .data$StartTime)),
"%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"))%>% #Create a variable for datetime
dplyr::mutate(Source="FRP", #add variable for data source
SizeClass="Meso",
TowType= dplyr::case_when(GearTypeAbbreviation == "ZOOP" ~ "Surface",
GearTypeAbbreviation == "ZOBL" ~ "Oblique",
GearTypeAbbreviation == "ZBEN" ~ "Bottom",
TRUE ~ "Surface"),
Microcystis = dplyr::recode(.data$Microcystis, `1=absent`="1", `2=low`="2", `3=medium` = "3"))%>%
dplyr::select("Source", "Date", "Datetime", Latitude= "LatitudeStart", Longitude = "LongitudeStart", Station = "Location",
CondSurf = "SC", "Secchi", "pH", "DO", TurbidityNTU = "Turbidity", "Tide", "Microcystis", "SizeClass", "TowType",
Temperature = "Temp", Volume = "effort", FRP_Meso = "CommonName", "CPUE", SampleID = "SampleID_frp")%>% #Select for columns in common and rename columns to match
dplyr::filter(!is.na(.data$Latitude)) %>% #remove samples with no gps coordinates
dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=T), .groups="drop")%>% #this just adds up those duplications
tidyr::pivot_wider(names_from="FRP_Meso", values_from="CPUE", values_fill=list(CPUE=0))%>%
tidyr::pivot_longer(cols=c(-"Source", -"Date", -"Datetime",
-"Station", -"CondSurf", -"Secchi", -"pH", -"DO", -"TurbidityNTU",
-"Tide", -"Microcystis", -"SizeClass", -"Latitude", -"Longitude",
-"Temperature", -"Volume", -"SampleID", -"TowType"),
names_to="FRP_Meso", values_to="CPUE")%>%
dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("FRP_Meso", "Lifestage", "Taxname", "Phylum", "Class", "Order", "Family", "Genus", "Species")%>% #only retain FRP codes
dplyr::filter(!is.na(.data$FRP_Meso))%>% #Only retain Taxnames corresponding to FRP codes
dplyr::distinct(),
by = "FRP_Meso")%>%
dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage))%>% #create variable for combo taxonomy x life stage
dplyr::select(-"FRP_Meso")%>% #Remove FRP taxa codes
dtplyr::lazy_dt()%>% #Speed up code
dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #this just adds up those duplications
dplyr::ungroup()%>%
tibble::as_tibble()%>%
dplyr::mutate(SampleID=paste(.data$Source, .data$SampleID)) #Create identifier for each sample
cat("\nFRP_Meso finished!\n\n")
}
# YBFMP Meso/Micro -------------------------------------------------------------
if("YBFMP_Meso"%in%Data_sets | "YBFMP_Micro"%in%Data_sets) {
#download the file
if (!file.exists(file.path(Data_folder, "YBFMP.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$YBFMP,
destfile=file.path(Data_folder, "YBFMP.csv"), mode="wb", method=Download_method)
}
zoo_YBFMP<-readr::read_csv(file.path(Data_folder, "YBFMP.csv"),
col_types = readr::cols_only(Date="c", Time="c", StationCode="c",
Tide="c", WaterTemperature="d", Secchi="d",
SpCnd="d", pH="d", DO="d", Turbidity="d",
MicrocystisVisualRank="c", MeshSize="c", VolNet_ed="d",
TaxonName="c", LifeStage="c", CPUE_ed="d"))%>%
dplyr::mutate(Index = 1:nrow(.))
# Sum doubles with unclear life stages (both labeled as undifferentiated)
doubles <- zoo_YBFMP %>%
dplyr::group_by(.data$StationCode, .data$Date, .data$Time, .data$TaxonName, .data$LifeStage, .data$MeshSize) %>%
dplyr::mutate(n = dplyr::n()) %>%
dplyr::filter(.data$n>1)
Index_rm <- doubles$Index
doubles_summed <- stats::aggregate(CPUE_ed~TaxonName, data = doubles, FUN = sum) %>%
dplyr::right_join((doubles %>%
dplyr::select(-"CPUE_ed", -"Index", -"n") %>%
dplyr::distinct())) %>%
dplyr::relocate("TaxonName", .after = "VolNet_ed") %>%
dplyr::relocate("CPUE_ed", .after = "LifeStage")
# Add zeroes, add sample ID, modify column names and order, join crosswalk taxonomy.
data.list[["YBFMP"]] <- zoo_YBFMP %>%
dplyr::filter(!(.data$Index %in% Index_rm)) %>%
dplyr::select(-"Index") %>%
dplyr::bind_rows(doubles_summed) %>% # replace doubles with summed CPUEs
dplyr::mutate(TaxonName = replace(.data$TaxonName, .data$TaxonName == "Eucyclops phaleratus", "Ectocyclops phaleratus")) %>% # Otherwise creates doubles for Platycyclops phaleratus later on
dplyr::mutate(YBFMP=paste(.data$TaxonName, .data$LifeStage),
MeshSize=dplyr::recode(.data$MeshSize, `150_micron`="Meso", `50_micron`="Micro"),
Source = "YBFMP",
SampleID = paste0(.data$Date, "_", .data$StationCode, "_", .data$MeshSize),
Datetime = lubridate::parse_date_time(paste(.data$Date, .data$Time), "%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"),
Date = lubridate::parse_date_time(.data$Date, "%Y-%m-%d", tz="America/Los_Angeles")) %>%
dplyr:: select("Source",
SizeClass = "MeshSize",
Volume = "VolNet_ed",
"Date",
"Datetime",
Station = "StationCode",
Temperature = "WaterTemperature",
"Secchi", TurbidityNTU = "Turbidity",
CondSurf = "SpCnd",
"pH", "DO",
Microcystis="MicrocystisVisualRank",
"SampleID",
"YBFMP",
CPUE = "CPUE_ed")%>%
{if(!"YBFMP_Meso"%in%Data_sets){
dplyr::filter(., .data$SizeClass!="Meso")
}else{
.
}}%>%
{if(!"YBFMP_Micro"%in%Data_sets){
dplyr::filter(., .data$SizeClass!="Micro")
}else{
.
}}%>%
tidyr::pivot_wider(names_from="YBFMP", values_from="CPUE", values_fill=list(CPUE=0)) %>%
tidyr::pivot_longer(cols=c(-"Source", -"SizeClass", -"Volume", -"Date",
-"Datetime", -"Station", -"Temperature", -"CondSurf", -"Secchi",
-"pH", -"DO", -"TurbidityNTU", -"Microcystis",
-"SampleID"),
names_to="YBFMP", values_to="CPUE")%>%
dplyr::left_join(Crosswalk %>%
dplyr::select("YBFMP", "Lifestage", "Taxname", "Phylum", "Class",
"Order", "Family", "Genus", "Species"),
by = "YBFMP") %>%
dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage))%>% #create variable for combo taxonomy x life stage
dplyr::select(-"YBFMP") %>% #Remove YBFMP taxa codes
dplyr::mutate(SampleID=paste0(.data$Source, "_", .data$SampleID), #Create identifier for each sample
TowType="Surface") %>%
dplyr::left_join(stations, by=c("Source", "Station")) #Add lat and long
cat("\nFRP_Meso finished!\n\n")
}
# EMP Micro ---------------------------------------------------------------
if("EMP_Micro"%in%Data_sets) {
#download the file
if (!file.exists(file.path(Data_folder, "EMP_Micro.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$EMP$Micro,
destfile=file.path(Data_folder, "EMP_Micro.csv"), mode="wb", method=Download_method)
}
# Import the EMP data
zoo_EMP_Micro<-readr::read_csv(file.path(Data_folder, "EMP_Micro.csv"),
col_types=readr::cols_only(SampleDate="c", StationNZ="c",
Chl_a="d", Secchi="d", Temperature="d",
ECSurfacePreTow="d", ECBottomPreTow="d",
Volume="d", Depth="d", LIMNOSPP="d",
LIMNOSINE="d", LIMNOTET="d", OITHDAV="d",
OITHSIM="d", OITHSPP="d", OTHCYCAD="d",
HARPACT="d", CYCJUV="d", LIMNOJUV="d",
OITHJUV="d", OTHCYCJUV="d", COPNAUP="d",
EURYNAUP="d", OTHCOPNAUP="d", PDIAPNAUP="d",
SINONAUP="d", ASPLANCH="d",
KERATELA="d",OTHROT="d", POLYARTH="d",
SYNCH="d",SYNCHBIC="d", TRICHO="d",
BARNNAUP="d"))
# Tranform from "wide" to "long" format, add some variables,
# alter data to match other datasets
data.list[["EMP_Micro"]] <- zoo_EMP_Micro%>%
dplyr::mutate(SampleDate=lubridate::parse_date_time(.data$SampleDate, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
dplyr::rename(OTHCYCADPUMP = "OTHCYCAD")%>%
tidyr::pivot_longer(cols=c(-"SampleDate", -"StationNZ", -"Secchi", -"Chl_a", -"Temperature",
-"ECSurfacePreTow", -"ECBottomPreTow", -"Depth", -"Volume"),
names_to="EMP_Micro", values_to="CPUE")%>% #transform from wide to long
dplyr::mutate(Source="EMP",
SizeClass="Micro")%>% #add variable for data source
dplyr::select("Source", Date = "SampleDate", Station="StationNZ", Chl = "Chl_a",
CondBott = "ECBottomPreTow", CondSurf = "ECSurfacePreTow", "Secchi",
"Temperature", BottomDepth="Depth", "SizeClass", "Volume", "EMP_Micro", "CPUE")%>% #Select for columns in common and rename columns to match
dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("EMP_Micro", "Lifestage", "Taxname", "Phylum",
"Class", "Order", "Family", "Genus", "Species",
"Intro", "EMPstart", "EMPend")%>% #only retain EMP codes
dplyr::filter(!is.na(.data$EMP_Micro))%>% #Only retain Taxnames corresponding to EMP codes
dplyr::distinct(),
by="EMP_Micro")%>%
dplyr::filter(!is.na(.data$Taxname))%>% #Should remove all the summed categories in original dataset
dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
SampleID=paste(.data$Source, .data$Station, .data$Date), #Create identifier for each sample
Tide="1", # All EMP samples collected at high slack
TowType="Vertical pump",
BottomDepth=.data$BottomDepth*0.3048)%>% # Convert to meters
dplyr::mutate(CPUE=dplyr::case_when(
.data$CPUE!=0 ~ .data$CPUE,
.data$CPUE==0 & .data$Date < .data$Intro ~ 0,
.data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$EMPstart ~ NA_real_,
.data$CPUE==0 & .data$Date >= .data$EMPstart & .data$Date < .data$EMPend ~ 0,
.data$CPUE==0 & .data$Date >= .data$EMPend ~ NA_real_
))%>%
dplyr::select(-"EMP_Micro", -"EMPstart", -"EMPend", -"Intro")%>% #Remove EMP taxa codes
dtplyr::lazy_dt()%>% #Speed up code using dtplyr package that takes advantage of data.table speed
dplyr::group_by(dplyr::across(-"CPUE"))%>%
dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #Some taxa now have the same names (e.g., CYCJUV and OTHCYCJUV) so we now add those categories together.
dplyr::ungroup()%>%
tibble::as_tibble() %>%#required to finish operation after lazy_dt()
dplyr::left_join(stations, by=c("Source", "Station")) #Add lat and long
cat("\nEMP_Micro finished!\n\n")
}
# FRP Macro ---------------------------------------------------------------
if("FRP_Macro"%in%Data_sets) {
#download the file
if (!file.exists(file.path(Data_folder, "macroinvert_FRP.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$FRP$Macro,
destfile=file.path(Data_folder, "macroinvert_FRP.csv"), mode="wb", method=Download_method)
Tryer(n=3, fun=utils::download.file, url=URLs$FRP$site,
destfile=file.path(Data_folder, "sitesFRP.csv"), mode="wb", method=Download_method)
}
zoo_FRP_Macro <- readr::read_csv(file.path(Data_folder, "macroinvert_FRP.csv"), na=c("", "NA"))
sites_FRP_Macro <- readr::read_csv(file.path(Data_folder, "sitesFRP.csv"), na=c("", "NA"))
#join environmental data to taxa counts and fix some wonky names
FRP_allmac = dplyr::left_join(dplyr::select(zoo_FRP_Macro, -"Date", -"Location"), sites_FRP_Macro, by = "VisitNo") %>%
dplyr::mutate(CommonName = dplyr::case_when(CommonName == "Fish larvae" ~ "Fish UNID", #fix some wonky common names
CommonName == "Insect Unid" ~ "Insect UNID",
CommonName == "Calanoid copepod (gravid)" ~ "Calanoid UNID",
CommonName == "Hymenoptera UNID" ~ "Hymenoptera Other",
CommonName == "Tricoptera larvae Other" ~ "Tricoptera larvae UNID",
CommonName == "Palaemonectes" ~ "Palaemon",
CommonName == "Palaemonetes" ~ "Palaemon",
CommonName == "Hymenoptera UNID" ~ "Hymenoptera Other",
CommonName == "Asellidae UNID" ~ "Asellidae",
CommonName == "Diptera adult" ~ "Diptera Adult",
CommonName == "Coleoptera other" ~ "Coleoptera Other",
CommonName == "Tricoptera larvae UNID"~"Trichoptera larvae Other",
TRUE ~ CommonName))
#Already in long format
data.list[["FRP_Macro"]] <- FRP_allmac%>%
dplyr::filter(.data$GearTypeAbbreviation %in% c("MAC", "MACOBL", "MACBEN"))%>%
# dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
dplyr::mutate(Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$StartTime),
NA_character_,
paste(.data$Date, as.character(.data$StartTime))),
"%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"))%>% #Create a variable for datetime
dplyr::mutate(Source = "FRP",
SizeClass = "Macro",
TowType= dplyr::case_when(GearTypeAbbreviation == "MAC" ~ "Surface",
GearTypeAbbreviation == "MACOBL" ~ "Oblique",
GearTypeAbbreviation == "MACBEN" ~ "Bottom",
TRUE ~ "Surface"),
CPUE = .data$AdjCount/.data$effort, #add variable for data source and calculate CPUE
Microcystis = dplyr::recode(.data$Microcystis, `1=absent`="1", `2=low`="2", `3=medium`="3"))%>%
dplyr::select("Source", "Date", "Datetime", Latitude= "LatitudeStart", Longitude = "LongitudeStart", Station = "Location",
CondSurf = "SC", "Secchi", "pH", "DO", TurbidityNTU = "Turbidity", "Tide", "Microcystis", "SizeClass", "TowType",
Temperature = "Temp", Volume = "effort", FRP_Macro = "CommonName", "CPUE", SampleID = "SampleID_frp")%>% #Select for columns in common and rename columns to match
dplyr::filter(!is.na(.data$Latitude)) %>%
dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=T), .groups="drop")%>% #this just adds up those duplications
tidyr::pivot_wider(names_from="FRP_Macro", values_from="CPUE", values_fill=list(CPUE=0))%>%
tidyr::pivot_longer(cols=c(-"Source", -"Date", -"Datetime",
-"Station", -"CondSurf", -"Secchi", -"pH", -"DO", -"TurbidityNTU",
-"Tide", -"Microcystis", -"SizeClass", -"Latitude", -"Longitude",
-"Temperature", -"Volume", -"SampleID", -"TowType"),
names_to="FRP_Macro", values_to="CPUE")%>%
dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("FRP_Macro", "Lifestage", "Taxname", "Phylum", "Class", "Order", "Family", "Genus", "Species")%>% #only retain FRP codes
dplyr::filter(!is.na(.data$FRP_Macro))%>% #Only retain Taxnames corresponding to FRP codes
dplyr::distinct(),
by = "FRP_Macro")%>%
dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage))%>% #create variable for combo taxonomy x life stage
dplyr::select(-"FRP_Macro")%>% #Remove FRP taxa codes
dtplyr::lazy_dt()%>% #Speed up code
dplyr::group_by(dplyr::across(-"CPUE"))%>% #Some taxa names are repeated as in EMP so
dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #this just adds up those duplications
dplyr::ungroup()%>%
tibble::as_tibble()%>%
dplyr::mutate(SampleID=paste(.data$Source, .data$SampleID)) #Create identifier for each sample
cat("\nFRP_Macro finished!\n\n")
}
# EMP Macro ---------------------------------------------------------------
if("EMP_Macro"%in%Data_sets) {
#download the file
if (!file.exists(file.path(Data_folder, "EMP_Macro.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$EMP$Macro,
destfile=file.path(Data_folder, "EMP_Macro.csv"), mode="wb", method=Download_method)
}
# Import the EMP data
zoo_EMP_Macro<-readr::read_csv(file.path(Data_folder, "EMP_Macro.csv"),
col_types=readr::cols_only(SampleDate="c", Time="c", StationNZ="c",
Chl_a="d", Secchi="d", Temperature="d",
ECSurfacePreTow="d", ECBottomPreTow="d",
Volume="d", Depth="d", AmphipodCode="c", A_aspera="d",
A_hwanhaiensis="d", A_macropsis="d", D_holmquistae="d",
H_longirostris="d", N_kadiakensis="d", N_mercedis="d",
Unidentified_mysid="d", A_spinicorne="d", A_stimpsoni="d",
A_abdita="d", Ampithoe_sp="d", Caprelidae_sp="d",
C_alienense="d", Crangonyx_sp="d", G_daiberi="d",
G_japonica="d", Hyalella_sp="d", Monocorophium_sp="d",
Oedicerotidae_sp="d", Pleustidae="d", Unidentified_Amphipod="d",
Unidentified_Corophium="d", Unidentified_Gammarus="d", Amphipod_Total="d"))%>%
dplyr::filter(dplyr::if_any(dplyr::everything(), ~ !is.na(.)))
# Tranform from "wide" to "long" format, add some variables,
# alter data to match other datasets
data.list[["EMP_Macro"]] <- zoo_EMP_Macro%>%
dplyr::mutate(SampleDate=lubridate::parse_date_time(.data$SampleDate, "%m/%d/%Y", tz="America/Los_Angeles"),
Datetime=lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time), NA_character_, paste(.data$SampleDate, .data$Time)),
c("%Y-%m-%d %I:%M %p"), tz="Etc/GMT+8"), #create a variable for datetime
Datetime=lubridate::with_tz(.data$Datetime, "America/Los_Angeles"), # Ensure everything ends up in local time
Unidentified_Amphipod=dplyr::if_else(lubridate::year(.data$SampleDate)<2014, .data$Amphipod_Total, .data$Unidentified_Amphipod))%>% # Transfer pre 2014 amphipod counts to Amphipod_total
tidyr::pivot_longer(cols=c(-"SampleDate", -"Time", -"Datetime", -"StationNZ", -"Secchi", -"Chl_a", -"Temperature",
-"ECSurfacePreTow", -"ECBottomPreTow", -"Volume", -"Depth", -"AmphipodCode"),
names_to="EMP_Macro", values_to="CPUE")%>% #transform from wide to long
dplyr::mutate(Source="EMP",
SizeClass="Macro")%>% #add variable for data source
dplyr::select("Source", Date = "SampleDate", "Datetime", Station="StationNZ", Chl = "Chl_a",
CondBott = "ECBottomPreTow", CondSurf = "ECSurfacePreTow", "Secchi", "SizeClass",
"Temperature", BottomDepth="Depth", "Volume", "AmphipodCode", "EMP_Macro", "CPUE")%>% #Select for columns in common and rename columns to match
dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("EMP_Macro", "Lifestage", "Taxname", "Phylum", "Class",
"Order", "Family", "Genus", "Species", "Intro", "EMPstart", "EMPend")%>% #only retain EMP codes
dplyr::filter(!is.na(.data$EMP_Macro))%>% #Only retain Taxnames corresponding to EMP codes
dplyr::distinct(),
by="EMP_Macro")%>%
dplyr::filter(!is.na(.data$Taxname))%>% #Should remove all the summed categories in original dataset
dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
SampleID=paste(.data$Source, .data$Station, .data$Date), #Create identifier for each sample
Tide="1", # All EMP samples collected at high slack
TowType="Oblique",
BottomDepth=.data$BottomDepth*0.3048)%>% # Convert to meters
dplyr::mutate(CPUE=dplyr::case_when(
.data$CPUE!=0 ~ .data$CPUE,
.data$CPUE==0 & .data$Date < .data$Intro ~ 0,
.data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$EMPstart ~ NA_real_,
.data$CPUE==0 & .data$Date >= .data$EMPstart & .data$Date < .data$EMPend ~ 0,
.data$CPUE==0 & .data$Date >= .data$EMPend ~ NA_real_),
CPUE=dplyr::if_else(.data$AmphipodCode!="A" & .data$Order=="Amphipoda", NA_real_, .data$CPUE))%>% # Remove any tainted amphipod data (e.g., veg in net)
dplyr::select(-"EMP_Macro", -"EMPstart", -"EMPend", -"Intro")%>% #Remove EMP taxa codes
dtplyr::lazy_dt()%>% #Speed up code using dtplyr package that takes advantage of data.table speed
dplyr::group_by(dplyr::across(-"CPUE"))%>%
dplyr::summarise(CPUE=sum(.data$CPUE, na.rm=TRUE))%>% #Some taxa now have the same names (e.g., CYCJUV and OTHCYCJUV) so we now add those categories together.
dplyr::ungroup()%>%
tibble::as_tibble() %>% #required to finish operation after lazy_dt()
dplyr::left_join(stations, by=c("Source", "Station"))
cat("\nEMP_Macro finished!\n\n")
if(Biomass){
#download the file
if (!file.exists(file.path(Data_folder, "EMP_Lengths.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$EMP$Lengths,
destfile=file.path(Data_folder, "EMP_Lengths.csv"), mode="wb", method=Download_method)
lengths.list[["EMP_Lengths"]]<-readr::read_csv(file.path(Data_folder, "EMP_Lengths.csv"),
col_types=readr::cols_only(SampleDate="c", StationNZ="c",
SpeciesName="c", Size="d", AdjustedFreq="d"))%>%
dplyr::mutate(SampleDate=lubridate::parse_date_time(.data$SampleDate, "%m/%d/%Y", tz="America/Los_Angeles"))%>%
dplyr::rename(Date="SampleDate", Station="StationNZ", EMP_Lengths="SpeciesName", Length="Size", Count="AdjustedFreq")%>%
dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("EMP_Lengths", "Lifestage", "Taxname")%>% #only retain EMP codes
dplyr::filter(!is.na(.data$EMP_Lengths))%>% #Only retain Taxnames corresponding to EMP codes
dplyr::distinct(),
by="EMP_Lengths")%>%
dplyr::filter(!is.na(.data$Taxname))%>%
dplyr::mutate(Taxlifestage=paste(.data$Taxname, .data$Lifestage),
Source="EMP",
SizeClass="Macro",
SampleID=paste(.data$Source, .data$Station, .data$Date))%>%
dplyr::select(-"EMP_Lengths", -"Date", -"Station")
cat("\nEMP_Macro lengths finished!\n\n")
}
}
}
# FMWT Macro --------------------------------------------------------------
if("FMWT_Macro"%in%Data_sets | "STN_Macro"%in%Data_sets) {
#download the file
if (!file.exists(file.path(Data_folder, "FMWTSTN_Macro.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$FMWTSTN$Macro,
destfile=file.path(Data_folder,"FMWTSTN_Macro.csv"), mode="wb", method=Download_method)
}
#download the file
if (!file.exists(file.path(Data_folder, "SMSCG_Macro.csv")) | Redownload_data) {
Tryer(n=3, fun=utils::download.file, url=URLs$SMSCG$Macro,
destfile=file.path(Data_folder, "SMSCG_Macro.csv"), mode="wb", method=Download_method)
}
zoo_FMWT_Macro <- readr::read_csv(file.path(Data_folder, "FMWTSTN_Macro.csv"),
col_types=readr::cols_only(Project="c", Year="d", Survey="d",
Date="c", Station="c", Time="c",
TideCode="c", DepthBottom="d", CondSurf="d",
CondBott="d", TempSurf="d", Secchi="d",
Turbidity="d", Microcystis="c", Volume="d",
Acanthomysis_aspera="d", Hyperacanthomysis_longirostris="d", Acanthomysis_hwanhaiensis="d",
Alienacanthomysis_macropsis="d", Deltamysis_holmquistae="d", Neomysis_kadiakensis="d",
Neomysis_mercedis="d", Unidentified_Mysid="d", Americorophium_spinicorne="d",
Americorophium_stimpsoni="d", Ampelisca_abdita="d", Corophium_alienense="d",
Crangonyx_sp="d", Gammarus_daiberi="d", Grandidierella_japonica="d",
Hyalella_sp="d", Unidentified_Amphipod="d", Unidentified_Corophium="d",
Unidentified_Gammarus="d"))%>%
dplyr::mutate(ID=paste(.data$Year, .data$Project, .data$Survey, .data$Station)) %>%
dplyr::mutate(Datetime = lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time), NA_character_,
paste(.data$Date, .data$Time)), "%Y-%m-%d %H:%M", tz="America/Los_Angeles"),
Date=lubridate::parse_date_time(.data$Date, "%Y-%m-%d", tz="America/Los_Angeles"))
#The 2025 data upload has a new date format for Macro (but not meso, and not SMSCG very annoying)
zoo_SMSCG_Macro <- readr::read_csv(file.path(Data_folder, "SMSCG_Macro.csv"),
col_types=readr::cols_only(Project="c", Year="d", Survey="d",
Date="c", Station="c", Time="c",
TideCode="c", DepthBottom="d", CondSurf="d",
CondBott="d", TempSurf="d", Secchi="d",
Turbidity="d", Microcystis="c", Volume="d",
Acanthomysis_aspera="d", Hyperacanthomysis_longirostris="d", Acanthomysis_hwanhaiensis="d",
Alienacanthomysis_macropsis="d", Deltamysis_holmquistae="d", Neomysis_kadiakensis="d",
Neomysis_mercedis="d", Unidentified_Mysid="d", Americorophium_spinicorne="d",
Americorophium_stimpsoni="d", Ampelisca_abdita="d", Corophium_alienense="d",
Crangonyx_sp="d", Gammarus_daiberi="d", Grandidierella_japonica="d",
Hyalella_sp="d", Unidentified_Amphipod="d", Unidentified_Corophium="d",
Unidentified_Gammarus="d"))%>%
dplyr::mutate(ID=paste(.data$Year, .data$Project, .data$Survey, .data$Station))%>%
dplyr::filter(!.data$ID%in%unique(zoo_FMWT_Macro$ID) & .data$Project%in%c("FMWT", "STN")) %>%
#put date and time in the right format. Date first, tehn datetime to try and fix a problem where the date randomly came out wrong in the 'datetime' version
dplyr::mutate(Date=lubridate::parse_date_time(.data$Date, "%m/%d/%y", tz="America/Los_Angeles"),
Datetime = lubridate::parse_date_time(dplyr::if_else(is.na(.data$Time), NA_character_,
paste(.data$Date, .data$Time)), "%Y-%m-%d %H:%M:%S", tz="America/Los_Angeles"))
data.list[["FMWT_Macro"]] <- dplyr::bind_rows(zoo_FMWT_Macro, zoo_SMSCG_Macro)%>%
dplyr::select(-"ID")%>%
distinct()%>% #remove any samples duplicated between the SMSCG datset and the FMWTdataset
dplyr::mutate(Microcystis = as.character(.data$Microcystis))%>%
tidyr::pivot_longer(cols=c(-"Project", -"Year", -"Survey", -"Date", -"Datetime",
-"Station", -"Time", -"TideCode",
-"DepthBottom", -"CondSurf", -"CondBott",
-"TempSurf", -"Secchi", -"Turbidity", -"Microcystis",
-"Volume"),
names_to="FMWT_Macro", values_to="CPUE")%>% #transform from wide to long
dplyr::select(Source = "Project", "Date", "Datetime", "Station", Tide = "TideCode", BottomDepth = "DepthBottom",
"CondSurf", "CondBott", Temperature = "TempSurf", "Secchi", TurbidityNTU = "Turbidity",
"Microcystis", "Volume",
"FMWT_Macro", "CPUE")%>% #Select for columns in common and rename columns to match
dplyr::left_join(Crosswalk%>% #Add in Taxnames, Lifestage, and taxonomic info
dplyr::select("FMWT_Macro", "Lifestage", "Taxname", "Phylum", "Class", "Order",
"Family", "Genus", "Species", "Intro", "FMWTstart", "FMWTend")%>% #only retain FMWT codes
dplyr::filter(!is.na(.data$FMWT_Macro))%>% #Only retain Taxnames corresponding to FMWT codes
dplyr::distinct(),
by = "FMWT_Macro")%>%
dplyr::filter(!is.na(.data$Taxname))%>%
dplyr::mutate(Station=dplyr::recode(.data$Station, MONT="Mont", HONK="Honk"),
Taxlifestage=paste(.data$Taxname, .data$Lifestage), #create variable for combo taxonomy x life stage
Microcystis=dplyr::if_else(.data$Microcystis=="6", "2", .data$Microcystis), #Microsystis value of 6 only used from 2012-2015 and is equivalent to a 2 in other years, so just converting all 6s to 2s.
SampleID=paste(.data$Source, .data$Station, .data$Date), #Create identifier for each sample
SizeClass="Macro",
TowType="Oblique",
Tide=as.character(.data$Tide))%>%
dplyr::mutate(CPUE=dplyr::case_when(
.data$CPUE!=0 ~ .data$CPUE,
.data$CPUE==0 & .data$Date < .data$Intro ~ 0,
.data$CPUE==0 & .data$Date >= .data$Intro & .data$Date < .data$FMWTstart ~ NA_real_,
.data$CPUE==0 & .data$Date >= .data$FMWTstart & .data$Date < .data$FMWTend ~ 0,
.data$CPUE==0 & .data$Date >= .data$FMWTend ~ NA_real_
))%>%
dplyr::filter(!is.na(.data$CPUE))%>%
dplyr::select(-"FMWT_Macro", -"FMWTstart", -"FMWTend", -"Intro")%>% #Remove FMWT taxa codes
dplyr::left_join(stations, by=c("Source", "Station"))%>% #at latitude and longitude.
{if(!("FMWT_Macro"%in%Data_sets)){
dplyr::filter(., .data$Source != "FMWT")
} else{
.
}}%>%
{if(!("STN_Macro"%in%Data_sets)){
dplyr::filter(., .data$Source != "STN")
} else{
.
}}
cat("\nFMWT_Macro and/or STN_Macro finished!\n\n")
}
# Combine data ----------------------------------------
cat("\nCombining datasets...\n")
zoop<-dplyr::bind_rows(data.list)%>% # Combine data
dplyr::filter(!is.na(.data$Taxname))%>% #Remove NA taxnames (should only correspond to previously summed "all" categories from input datasets)
dplyr::mutate(SalSurf= wql::ec2pss(.data$CondSurf/1000, t=25),
Year=lubridate::year(.data$Date))%>%
{if("Tide"%in%names(.)){
dplyr::mutate(., Tide=dplyr::recode(.data$Tide, "1"="High slack", "2"="Ebb", "3"="Low slack", "4"="Flood", "1=high slack"="High slack", "2=ebb"="Ebb", "3=low slack"="Low slack", "4=flood"="Flood")) #Rename tide codes to be consistent
} else{
.
}}%>%
{if("CondBott"%in%names(.)){
dplyr::mutate(., SalBott=wql::ec2pss(.data$CondBott/1000, t=25))
} else{
.
}}%>%
dplyr::select(-tidyselect::any_of(c("Region", "CondBott", "CondSurf"))) #Remove some extraneous variables to save memory
stationsEMPEZ<-zooper::stationsEMPEZ
if(any(unique(stationsEMPEZ$Station)%in%unique(zoop$Station))){
zoop<-zoop%>%
dplyr::filter(.data$Station%in%unique(stationsEMPEZ$Station))%>%
dplyr::select(-"Latitude", -"Longitude")%>%
dplyr::left_join(stationsEMPEZ, by=c("Date", "Station"))%>%
dplyr::bind_rows(zoop%>%
dplyr::filter(!.data$Station%in%unique(stationsEMPEZ$Station)))
}
zoopEnv<-zoop%>%
dplyr::select(-"SizeClass", -"Volume", -"Lifestage", -"Taxname", -"Phylum", -"Class", -"Order",
-"Family", -"Genus", -"Species", -"Taxlifestage", -"CPUE")%>%
dplyr::distinct()
# Remove duplicated samples not caught by distinct
dups<-dplyr::filter(zoopEnv, .data$SampleID%in%.data$SampleID[which(duplicated(.data$SampleID))])%>%
dplyr::group_by(.data$SampleID)%>%
dplyr::mutate(dplyr::across(where(is.numeric), ~mean(.x, na.rm=T)))%>%
dplyr::mutate(dplyr::across(where(lubridate::is.POSIXct), ~suppressWarnings(dplyr::if_else(all(is.na(.x)), lubridate::parse_date_time(NA_character_, tz="America/Los_Angeles"), min(.x, na.rm=T)))))%>%
tidyr::fill(where(is.character), .direction="downup")%>%
dplyr::mutate(dplyr::across(where(is.character), ~unique(.x)[1]))%>%
dplyr::ungroup()%>%
dplyr::distinct()
zoopEnv<-zoopEnv%>%
dplyr::filter(!.data$SampleID%in%dups$SampleID)%>%
dplyr::bind_rows(dups)%>%
dplyr::mutate(dplyr::across(where(is.numeric), ~ dplyr::if_else(is.nan(.x), NA_real_, .x)))
zoop<-zoop%>%
dplyr::select("Source", "SizeClass", "Volume", "Lifestage", "Taxname", "Phylum", "Class",
"Order", "Family", "Genus", "Species", "Taxlifestage", "SampleID", "CPUE")
if(Biomass){
zoop_lengths<-dplyr::bind_rows(lengths.list)
zoop<-Zoopbiomass(zoop, zoop_lengths)%>%
dplyr::select("Source", "SizeClass", "Volume", "Lifestage", "Taxname", "Phylum", "Class",
"Order", "Family", "Genus", "Species", "Taxlifestage", "SampleID", "CPUE", "BPUE")
}
if(Save_object){
saveRDS(zoop, file=paste0(Zoop_path, ".Rds"))
saveRDS(zoopEnv, file=paste0(Env_path, ".Rds"))
}
if(Return_object){
if(Return_object_type=="Combined"){
zoop_full <- dplyr::left_join(zoop, dplyr::select(zoopEnv, -.data$Source), by="SampleID")
return(zoop_full)
}
if(Return_object_type=="List"){
return(list(Zooplankton = zoop, Environment = zoopEnv))
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.