#' Retrieve SQL query string from .sql files
#'
#' This function extracts a query string from simple .sql file. The SQL file should only contain a comment block at the top and the SQL query itself.
#'
#' @param sql_path File path to .sql file as a character vector.
#' @return Returns an SQL statement as a character vector, which can be executed on a database connection using functions in the RODBC or ROracle packages.
#' @noRd
sql_to_rqry <- function(sql_path) {
in_string <- readr::read_file(sql_path)
in_string <- sub("/*.*/", "", in_string)
out_string <- stringr::str_replace_all(in_string, pattern = "\r\n", replacement = " ")
return(out_string)
}
#' Create a database connection using RODBC
#'
#' A function that accepts a data source name, username, and password to establish returns an Oracle DataBase Connection (ODBC) as an RODBC class in R.
#'
#' @param schema Data source name (DSN) as a character vector.
#' @return An RODBC class ODBC connection.
#' @noRd
get_connected <- function(channel = NULL, schema = NA){
if(is.null(channel)) {
(echo = FALSE)
if(is.na(schema)) {
schema <- getPass::getPass(msg = "Enter ORACLE schema: ")
}
username <- getPass::getPass(msg = "Enter your ORACLE Username: ")
password <- getPass::getPass(msg = "Enter your ORACLE Password: ")
channel <- RODBC::odbcConnect(dsn = paste(schema),
uid = paste(username),
pwd = paste(password),
believeNRows = FALSE)
}
return(channel)
}
#' Select species
#'
#' Select length, weight, and CPUE (kg/km2) from csv files based on region and species code.
#'
#' @param species_code RACE species code as a 1L numeric vector
#' @param region Region as a character vector. One of "GOA", "AI", "NBS", "EBS")
#' @param remove_outliers If TRUE, remove outliers based on Bonferroni test.
#' @param fork_lengths_mm Numeric vector of fork lengths to include.
#' @noRd
select_species <- function(species_code, region, remove_outliers = TRUE, bonferroni_threshold = 0.05, fork_lengths_mm = NULL){
if(file.exists(paste0(getwd(),"/data/", region, "_all_species.csv"))) {
all_species <- read.csv(paste0(getwd(),"/data/", region, "_all_species.csv"))
} else {
stop(paste0("select_species: Species file not found (", getwd(),"/data/", region, "_all_species.csv)"))
}
all_species$latitude <- as.numeric(all_species$latitude)
all_species$longitude <- as.numeric(all_species$longitude)
all_species$year <- as.numeric(all_species$year)
all_species$cpue_kg_km2 <- as.numeric(all_species$cpue_kg_km2)
if(species_code %in% c(21721, 21722, 21741, 21742)) {
sel_code <- ifelse(species_code %in% c(21721, 21722), 21720, 21740)
} else {
sel_code <- species_code
}
specimen_sub <- all_species[which(all_species$species_code == sel_code),]
if(!is.null(fork_lengths_mm)) {
message("run_vast_condition: Selecting fork length range ", paste(fork_lengths_mm, collapse = "-"), " mm")
fl_range <- range(fork_lengths_mm)
cpue_sub <- dplyr::filter(specimen_sub, !is.na(number_fish) & !is.na(effort_km2))
lw_sub <- dplyr::filter(specimen_sub, length_mm >= fl_range[1] & length_mm <= fl_range[2])
specimen_sub <- dplyr::bind_rows(lw_sub, cpue_sub)
}
if(!(nrow(specimen_sub) > 1)) {
stop(paste0("select_species: Species code", species_code, "not found for region ", region))
}
#Assessing Outliers using Bonferroni Outlier Test
if(remove_outliers) {
# Split CPUE and length-weight data
lw_sub <- dplyr::filter(specimen_sub, !is.na(length_mm) & !is.na(weight_g))
cpue_sub <- dplyr::filter(specimen_sub, !is.na(number_fish) & !is.na(effort_km2))
# Fit linear model
loglen <- log(lw_sub$length_mm)
logwt <- log(lw_sub$weight_g)
lw_mod <-lm(logwt~loglen, na.action = na.exclude)
# Produce a Bonferroni value for each length-weight records
bonf_p <- car::outlierTest(lw_mod, n.max=Inf, cutoff=Inf, order=FALSE)$bonf.p
outlier_index <- unique(which(bonf_p < bonferroni_threshold))
if(length(outlier_index) > 0) {
message("select_species: Removing ", length(outlier_index), " outliers from ", species_code, " in ", region, " based on Bonferroni test.")
lw_sub <- lw_sub[-outlier_index, ]
} else {
message("select_species: No outliers detected using Bonferroni test.")
}
# Rejoin CPUE and length-weight w/ outliers removed
specimen_sub <- dplyr::bind_rows(lw_sub, cpue_sub)
}
saveRDS(specimen_sub, paste0(getwd(),"/data/",species_code,"_Data_Geostat.Rds"))
return(specimen_sub)
}
#' Generate data summaries for QA/QC
#'
#' Summaries of length-weight and CPUE data from a csv file containing the following columns: year, cruise, vessel, common_name, cpue_kg_km2, length_mm, weight_g, start_time
#'
#' @param dat_csv csv file containing
#' @param region Region name ("EBS", "NBS", "GOA" or "AI")
#' @noRd
make_data_summary <- function(dat_csv, region) {
dat <- read.csv(file = dat_csv) |>
dplyr::mutate(start_time = as.POSIXct(start_time)) |>
dplyr::mutate(yday = lubridate::yday(start_time)) |>
dplyr::arrange(year)
n_spp_by_year <- dat |>
dplyr::filter(!is.na(length_mm)) |>
dplyr::group_by(common_name, year) |>
dplyr::summarise(n = n())
out <- list(n_cpue_by_year = dat |>
dplyr::filter(!is.na(cpue_kg_km2)) |>
dplyr::group_by(common_name, year) |>
dplyr::summarise(n = n()) |>
tidyr::pivot_wider(names_from = c("common_name"), values_from = "n", values_fill = 0) |>
data.frame(),
n_specimen_by_year = n_spp_by_year|>
tidyr::pivot_wider(names_from = c("common_name"), values_from = "n", values_fill = 0) |>
dplyr::arrange(year) |>
data.frame(),
n_specimen_by_vessel =dat |>
dplyr::filter(!is.na(length_mm)) |>
dplyr::group_by(vessel, cruise) |>
dplyr::summarise(n = n()) |>
tidyr::pivot_wider(names_from = c("vessel"), values_from = "n", values_fill = 0) |>
dplyr::arrange(cruise) |>
data.frame())
dir.create(here::here("output", region), recursive = TRUE)
saveRDS(object = out, file = here::here("output", region, paste0(region, "_sample_tables.rds")))
yday_df <- dat |>
dplyr::filter(!is.na(length_mm))
yday_range <- range(yday_df$yday)
yday_years <- unique(yday_df$year)
png(file = here::here("output", region, paste0(region, "_species_samples.png")), width = 7, height = 7, units = "in", res = 120)
print(ggplot(data = n_spp_by_year,
aes(x = year, y = n, color = common_name)) +
geom_point() +
geom_line() +
scale_x_continuous(name = "Year") +
ggthemes::scale_color_tableau(palette = "Tableau 20") +
facet_wrap(~common_name, scales = "free_y") +
theme_bw() +
theme(legend.position = "none"))
dev.off()
for(jj in 1:ceiling(length(yday_years)/6)) {
png(file = here::here("output", region, paste0(region, "_ecdf_samples_by_year_", jj, ".png")), width = 7, height = 7, units = "in", res = 120)
print(ggplot(data = yday_df |>
dplyr::filter(year %in% yday_years[(1+6*(jj-1)):min(c((6+6*(jj-1)), length(yday_years)))]),
aes(x = yday, color = common_name)) +
stat_ecdf(size = rel(1.2)) +
facet_wrap(~year) +
ggthemes::scale_color_tableau(palette = "Tableau 20") +
scale_y_continuous(name = "Cumulative proportion of samples") +
scale_x_continuous(name = "Day of Year",
limits = yday_range) +
ggthemes::theme_few() +
theme(legend.title = element_blank()))
dev.off()
}
ecdf_files <- list.files(here::here('output', region), pattern = 'ecdf_samples_by_year', full.names = TRUE)
if(region == "EBS") {
map_region <- "sebs"
} else {
map_region <- region
}
map_layers <- akgfmaps::get_base_layers(select.region = tolower(map_region),
set.crs = "EPSG:3338")
lw_dat.sub <- read.csv(file = here::here("data", paste0(region, "_all_species.csv"))) |>
dplyr::filter(!is.na(length_mm)) |>
dplyr::select(year, species_code, latitude, longitude, common_name) |>
unique() |>
akgfmaps::transform_data_frame_crs(coords = c("longitude", "latitude"),
in.crs = "+proj=longlat",
out.crs = "EPSG:3338")
unique_spp <- unique(lw_dat.sub$common_name)
pdf(file = here::here("output", region, paste0(region, "_lw_sample_maps.pdf")), onefile = TRUE, width = 12, height = 8)
for(ii in unique_spp) {
print(
ggplot() +
geom_sf(data = map_layers$survey.area,
fill = NA,
size = 0.7) +
geom_point(data = lw_dat.sub |>
dplyr::filter(common_name == ii),
aes(x = longitude, y = latitude),
size = 0.4,
color = "red") +
ggtitle(label = ii) +
facet_wrap(~year, ncol = 4) +
theme_minimal() +
theme(axis.title = element_blank())
)
}
dev.off()
# Make summary docx
make_figs <- ""
for(ii in 1:length(ecdf_files)) {
make_figs <- paste0(make_figs,
"```{r echo=FALSE, fig.cap='\\\\label{fig:figs}Cumulative samples by day of year, by survey year.'}
knitr::include_graphics('", ecdf_files[ii], "')
```\n\n")
}
fileConn<-file(here::here("output", region, paste0(region, "_data_summary.Rmd")))
writeLines(paste0("---
title: '", region, " Condition Data Summary'
date: '", Sys.Date(),"'
output: word_document
---\n
```{r include=FALSE}
library(akfishcondition)
out <- readRDS(here::here('output','", region, "', paste0('", region, "' ,'_sample_tables.rds')))
```\n\n", make_figs, "
```{r echo=FALSE, fig.cap='\\\\label{fig:figs}Annual samples by species.'}
knitr::include_graphics('", here::here("output", region, paste0(region, "_species_samples.png")), "')
```\n\n
```{r echo=FALSE}
knitr::kable(out$n_cpue_by_year, caption = 'Table 1. Hauls with CPUE by species')
```\n\n
```{r echo=FALSE}
knitr::kable(out$n_specimen_by_year, caption = 'Table 2. Number of length-weight samples by species and year')
```\n\n
```{r echo=FALSE}
knitr::kable(out$n_specimen_by_vessel, caption = 'Table 3. Number of length-weight samples by vessel and cruise')
```"), fileConn)
close(fileConn)
rmarkdown::render(input = here::here("output", region, paste0(region, "_data_summary.Rmd")),
output_file = here::here("output", region, paste0(region, "_data_summary.docx")))
}
#' Append common name to data frame using species_code
#'
#' @param x data.frame
#' @noRd
add_common_name <- function(x) {
spp_df <- data.frame(common_name = c(
"walleye pollock", "walleye pollock (100-250 mm)", "walleye pollock (>250 mm)", "Pacific cod",
"Pacific cod (juvenile)", "Pacific cod (adult)", "Atka mackerel", "arrowtooth flounder",
"flathead sole", "yellowfin sole", "northern rock sole", "southern rock sole", "Alaska plaice",
"Pacific ocean perch", "dusky rockfish", "northern rockfish", "Dover sole", "rex sole", "shortraker rockfish", "rougheye rockfish", "blackspotted rockfish", "sharpchin rockfish"),
species_code = c(21740, 21741, 21742, 21720, 21721, 21722, 21921, 10110, 10130,
10210, 10261, 10262, 10285, 30060, 30152, 30420, 10180, 10200, 30576, 30051, 30052, 30560),
AI = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE),
GOA = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE),
EBS = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE),
NBS = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE))
x <- dplyr::left_join(x, spp_df)
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.