#' Pull biological data (age, length, weight) from the NWFSC data warehouse
#' The website is: https://www.webapps.nwfsc.noaa.gov/data
#' This function can be used to pull a single species or all observed species
#' In order to pull all species leave Name = NULL and SciName = NULL
#'
#' @param Name common name of species data to pull from the data warehouse
#' @param SciName scientific name of species data to pull from the data warehouse
#' @param YearRange range of years to pull data
#' @param SurveyName survey to pull the data for the options are:
#' Triennial, AFSC.Slope, NWFSC.Combo, NWFSC.Slope, NWFSC.Shelf, NWFSC.Hypoxia,
#' NWFSC.Santa.Barb.Basin, NWFSC.Shelf.Rockfish (NWFSC.Hook.Line but both are not working), NWFSC.Video#'
#' @param SaveFile option to save the file to the directory
#' @param Dir directory where the file should be saved
#' @template verbose
#'
#' @author Chantel Wetzel based on code by John Wallace
#' @export
#'
#' @import chron
#' @importFrom dplyr rename
#' @importFrom stringr str_replace_all
#'
#' @examples
#' \dontrun{
#' # SurveyName is only arg that has to be specified
#' bio_dat <- PullBio.fn(SurveyName = "NWFSC.Combo")
#'
#' # Example with specified common name
#' bio_dat <- PullBio.fn(Name = "vermilion rockfish",
#' SurveyName = "NWFSC.Combo")
#'
#' # Example with specified scientific name
#' bio_dat <- PullBio.fn(SciName = "Eopsetta jordani",
#' SurveyName = "NWFSC.Combo")
#'
#' # Example with multiple names
#' bio_dat <- PullBio.fn(SciName = c("Sebastes aurora","Eopsetta jordani"),
#' SurveyName = "NWFSC.Combo")
# bio_dat <- PullBio.fn(Name = c("Sunset rockfish", "vermilion rockfish",
# "vermilion and sunset rockfish"), SurveyName = "NWFSC.Combo")
#' }
#'
PullBio.fn <- function(Name = NULL, SciName = NULL, YearRange = c(1980, 5000), SurveyName = NULL, SaveFile = FALSE, Dir = NULL, verbose = TRUE) {
# increase the timeout period to avoid errors when pulling data
options(timeout = 4000000)
if (SurveyName %in% c("NWFSC.Shelf.Rockfish", "NWFSC.Hook.Line")) {
stop("The bio pull currently does not work for hook & line data. Pull directly from the warehouse https://www.webapp.nwfsc.noaa.gov/data")
}
if (SaveFile) {
if (is.null(Dir)) {
stop("The Dir input needs to be specified in order to save output file.")
}
if (!file.exists(Dir)) {
stop(
"The Dir argument leads to a location",
",\ni.e., ", Dir, ", that doesn't exist."
)
}
}
if (is.null(Name)) {
var.name <- "scientific_name"
Species <- SciName
new.name <- "Scientific_name"
outName <- Name
}
if (is.null(SciName)) {
var.name <- "common_name"
Species <- Name
new.name <- "Common_name"
outName <- SciName
outName <- "All"
}
if (is.null(SciName) & is.null(Name)) {
var.name <- c("scientific_name", "common_name")
Species <- "pull all"
new.name <- c("Scientific_name", "Common_name")
} # stop("Need to specifiy Name or SciName to pull data!")}
surveys <- createMatrix()
if (!SurveyName %in% surveys[, 1]) {
stop(cat("The SurveyName does not match one of the available options:", surveys[, 1]))
}
for (i in 1:dim(surveys)[1]) {
if (SurveyName == surveys[i, 1]) {
project <- surveys[i, 2]
projectShort <- surveys[i, 1]
}
}
if (length(YearRange) == 1) {
YearRange <- c(YearRange, YearRange)
}
if (projectShort != "NWFSC.Hook.Line") {
Vars <- c(
"project", "trawl_id", var.name, "year", "vessel", "pass",
"tow", "datetime_utc_iso", "depth_m", "weight_kg", "ageing_laboratory_dim$laboratory",
"length_cm", "width_cm", "sex", "age_years", "otosag_id", "latitude_dd", "longitude_dd",
"standard_survey_age_indicator",
"standard_survey_length_or_width_indicator",
"standard_survey_weight_indicator",
"operation_dim$legacy_performance_code"
)
Vars.short <- c(
"project", "trawl_id", var.name, "year", "vessel", "pass",
"tow", "datetime_utc_iso", "depth_m", "weight_kg", "ageing_lab", "otosag_id",
"length_cm", "width_cm", "sex", "age_years", "latitude_dd", "longitude_dd"
)
} else {
Vars <- Vars.short <- c(var.name, "age_years", "drop_latitude_dim$latitude_in_degrees", )
}
# symbols here are generally: %22 = ", %2C = ",", %20 = " "
species_str <- paste0("%22",stringr::str_replace_all(Species[1]," ","%20"),"%22")
if(length(Species) > 1) {
for(i in 2:length(Species)) {
species_str <- paste0(species_str, "%2C", paste0("%22",stringr::str_replace_all(Species[i]," ","%20"),"%22"))
}
}
UrlText <- paste0(
"https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.individual_fact/selection.json?filters=project=", paste(strsplit(project, " ")[[1]], collapse = "%20"), ",",
"station_invalid=0,",
"performance=Satisfactory,",
"depth_ftm>=30,depth_ftm<=700,",
"field_identified_taxonomy_dim$", var.name, "|=[", species_str, "]",
",year>=", YearRange[1], ",year<=", YearRange[2],
"&variables=", paste0(Vars, collapse = ",")
)
if (Species[1] == "pull all") {
UrlText <- paste0(
"https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.individual_fact/selection.json?filters=project=", paste(strsplit(project, " ")[[1]], collapse = "%20"), ",",
"station_invalid=0,",
"performance=Satisfactory,", "depth_ftm>=30,depth_ftm<=700,",
"year>=", YearRange[1], ",year<=", YearRange[2],
"&variables=", paste0(Vars, collapse = ",")
)
}
DataPull <- NULL
if (verbose) {
message("Pulling biological data. This can take up to ~ 30 seconds (or more).")
}
DataPull <- try(get_json(url = UrlText))
if (is.data.frame(DataPull)) {
if (SurveyName == "NWFSC.Combo") {
# Filter out non-standard samples
keep <- DataPull[, "standard_survey_length_or_width_indicator"] %in% c("NA", "Standard Survey Length or Width")
DataPull <- DataPull[keep, ]
remove <- DataPull[, "standard_survey_age_indicator"] == "Not Standard Survey Age"
if (sum(remove) != 0) {
DataPull[remove, "age_years"] <- NA
}
remove <- DataPull[, "standard_survey_weight_indicator"] == "Not Standard Survey Weight"
if (sum(remove) != 0) {
DataPull[remove, "weight_kg"] <- NA
}
}
if (SurveyName == "Triennial") {
# Remove water hauls
fix <- is.na(DataPull[, "operation_dim$legacy_performance_code"])
if (sum(fix) > 0) {
DataPull[fix, "operation_dim$legacy_performance_code"] <- -999
}
keep <- DataPull[, "operation_dim$legacy_performance_code"] != 8
DataPull <- DataPull[keep, ]
}
find <- colnames(DataPull) == "ageing_laboratory_dim$laboratory"
colnames(DataPull)[find] <- "ageing_lab"
# Remove the extra columns now that they are not needed
DataPull <- DataPull[, Vars.short]
}
if (SurveyName %in% c("Triennial", "AFSC.Slope")) {
UrlText <- paste0(
"https://www.webapps.nwfsc.noaa.gov/data/api/v1/source/trawl.triennial_length_fact/selection.json?filters=project=",
paste(strsplit(project, " ")[[1]], collapse = "%20"), ",",
"station_invalid=0,",
"performance=Satisfactory,",
"field_identified_taxonomy_dim$", var.name, "=", paste(strsplit(Species, " ")[[1]], collapse = "%20"),
",year>=", YearRange[1], ",year<=", YearRange[2],
"&variables=", paste0(Vars, collapse = ",")
)
LenPull <- try(get_json(url = UrlText))
# Remove water hauls
if (is.data.frame(LenPull)) {
fix <- is.na(LenPull[, "operation_dim$legacy_performance_code"])
if (sum(fix) > 0) {
LenPull[fix, "operation_dim$legacy_performance_code"] <- -999
}
keep <- LenPull[, "operation_dim$legacy_performance_code"] != 8
LenPull <- LenPull[keep, ]
colnames(LenPull)[2] <- "Date"
LenPull$Weight <- NA
LenPull$Age <- NA
Len <- dplyr::rename(LenPull,
Trawl_id = trawl_id, Year = year, Vessel = vessel, Project = project,
Pass = pass, Tow = tow, Depth_m = depth_m, Length_cm = length_cm,
Width_cm = width_cm, Sex = sex, Latitude_dd = latitude_dd, Longitude_dd = longitude_dd
)
names(Len)[which(names(Len) == "scientific_name")] <- "Scientific_name"
names(Len)[which(names(Len) == "common_name")] <- "Common_name"
Len$Date <- chron::chron(format(as.POSIXlt(Len$Date, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d")
Len$Trawl_id <- as.character(Len$Trawl_id)
Len$Project <- projectShort
Len$Depth_m <- as.numeric(as.character(Len$Depth_m))
Len$Length_cm <- as.numeric(as.character(Len$Length_cm))
Len$Age <- as.numeric(as.character(Len$Age))
}
}
if (!is.data.frame(DataPull) & !SurveyName %in% c("Triennial", "AFSC.Slope")) {
stop(cat("\nNo data returned by the warehouse for the filters given.
Make sure the year range is correct for the project selected and the input name is correct,
otherwise there may be no data for this species from this project.\n"))
}
Data <- NULL
if (length(DataPull) > 0) {
Data <- dplyr::rename(DataPull,
Trawl_id = trawl_id, Year = year, Vessel = vessel, Project = project, Pass = pass,
Tow = tow, Date = datetime_utc_iso, Depth_m = depth_m, Weight = weight_kg,
Length_cm = length_cm, Width_cm = width_cm, Sex = sex, Age = age_years, Oto_id = otosag_id,
Ageing_Lab = ageing_lab,
Latitude_dd = latitude_dd, Longitude_dd = longitude_dd
)
names(Data)[which(names(Data) == "scientific_name")] <- "Scientific_name"
names(Data)[which(names(Data) == "common_name")] <- "Common_name"
Data$Date <- chron::chron(format(as.POSIXlt(Data$Date, format = "%Y-%m-%dT%H:%M:%S"), "%Y-%m-%d"), format = "y-m-d", out.format = "YYYY-m-d")
Data$Trawl_id <- as.character(Data$Trawl_id)
Data$Project <- projectShort
Data$Depth_m <- as.numeric(as.character(Data$Depth_m))
Data$Length_cm <- as.numeric(as.character(Data$Length_cm))
Data$Age <- as.numeric(as.character(Data$Age))
}
Ages <- NULL
if (SurveyName %in% c("Triennial", "AFSC.Slope")) {
if (!is.null(Data) & sum(is.na(Data$Age)) != length(Data$Age)) {
Ages <- Data
}
Data <- list()
if (is.data.frame(LenPull)) {
Data$Lengths <- Len
} else {
Data$Lengths <- "no_lengths_available"
}
if (!is.null(Ages)) {
Data$Ages <- Ages
} else {
Data$Ages <- "no_ages_available"
}
if (verbose) {
message("Triennial & AFSC Slope data returned as a list: Data$Lengths and Data$Ages\n")
}
}
if (SaveFile) {
time <- Sys.time()
time <- substring(time, 1, 10)
# save(Data, file = paste0(Dir, "/Bio_", outName, "_", SurveyName, "_", time, ".rda"))
save(Data, file = file.path(Dir, paste("Bio_", outName, "_", SurveyName, "_", time, ".rda", sep = "")))
if (verbose) {
message(paste("Biological data file saved to following location:", Dir))
}
}
return(Data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.