#' Process data-raw
#'
#' Process a data file in data-raw and create an documentation R file
#' in the R directory with an Roxygen header. This also creates files
#' in docs/data so that user can download the raw data.
#'
#' 1. Delete all files in R dir except the R files need to process data files
#' 2. Delete all data in data dir
#' 3. Delete all vignettes in vignettes dir
#'
#' @keywords internal
#' @export
#' @importFrom dplyr %>%
#' @importFrom rlang .data
process_data_raw <- function() {
# requireNamespace("dplyr", include.only = '%>%')
# requireNamespace("rlang", include.only = '.data')
# Clean up the data, R, vignettes and docs/data folders
clean_up()
# Go through each data file in data-raw
fils <- list.files("data-raw", pattern = "[.]csv")
for(fil in fils){
filpath <- file.path("data-raw", fil)
if(file.info(filpath)$isdir) next
# Read in the data
# Can have a header to it marked with #'
thetext <- readLines(filpath, warn = FALSE)
isheader <- stringr::str_sub(thetext, 1, 2) == "#'"
thecols <- utils::read.csv(text=thetext[min(which(!isheader))])
thecols <- colnames(thecols)
headr <- thetext[isheader]
datar <- utils::read.csv(text=thetext[!isheader])
colnames(datar) <- tolower(colnames(datar))
# Error checking
ESU <- unique(datar$esu)
if(length(ESU)!=1) stop(paste(fil, "data problem. Each file should have only one ESU."))
if("series" %in% colnames(datar)){
series <- unique(datar$series)
if(length(series)!=1) stop(paste(fil, "data problem. Each file should have only one value in the Series column."))
series <- ifelse(series!="", paste0("-", series), "")
}else{
series <- ""
}
# Create parts needed for the R file Roxygen2 header
# dataname <- stringr::str_replace_all(stringr::str_sub(headr[1], 4), " ","-")
esuname <- ESU %>%
stringr::str_replace_all(",", "") %>%
stringr::str_replace_all("[(]", "-") %>%
stringr::str_replace_all("[)]", "") %>%
stringr::str_replace_all(" -", "-") %>%
stringr::str_squish() %>%
stringr::str_replace_all(" ", "-")
dataname <- paste0(esuname, series, collapse = "-")
dataname <- stringr::str_replace_all(dataname, "/", "-")
yearname <- ifelse(any(colnames(datar)=="brood_year"), "BROOD_YEAR", "YEAR")
## Header and footer for the Roxygen2 header
## Footer also creates a generic example
headr <- c(paste0("#' ", dataname, "\n#' \n"), headr)
footr <- paste0("#' @name ", dataname, "\n",
"#' @docType data\n",
"#' @examples\n",
"#' data('", dataname, "')\n",
"#' library(ggplot2)\n",
"#' out$NUMBER_OF_SPAWNERS[out$NUMBER_OF_SPAWNERS==-99] <- NA\n",
"#' ggplot(out, aes(x=", yearname, ", y=NUMBER_OF_SPAWNERS)) + \n",
"#' geom_point(na.rm = TRUE) +\n",
"#' ggtitle('", dataname, "') +\n",
"#' facet_wrap(~COMMON_POPULATION_NAME) +\n",
"#' theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)",
ifelse(dplyr::n_distinct(datar$common_population_name)>10, ", \n#' strip.text.x = element_text(size = 6))\n", ")\n"), "#' \n",
"#' out$FRACWILD[out$FRACWILD == -99] <- NA\n",
"#' ggplot(out, aes(x=", yearname, ", y=FRACWILD)) +\n",
"#' geom_point(na.rm = TRUE) +\n",
"#' ggtitle('Fraction Wild') +\n",
"#' ylim(0,1) +\n",
"#' facet_wrap(~COMMON_POPULATION_NAME) +\n",
"#' theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)",
ifelse(dplyr::n_distinct(datar$common_population_name)>10, ", \n#' strip.text.x = element_text(size = 6))\n", ")\n"),
"NULL\n"
)
## Record the compiler in the data file
compiler <- ifelse("compiler" %in% colnames(datar), datar$compiler[1], "")
## Create the citation section. If there is citation information,
## then a section with info for each population be created.
citation <- "#' @references\n #' Ford, M.J., et al. 2022. Biological Viability Assessment Update for Pacific Salmon and Steelhead Listed Under the Endangered Species Act: Pacific Northwest. U.S. Department of Commerce, NOAA Technical Memorandum NMFS-NWFSC-171. https://doi.org/10.25923/kq2n-ke70\n#'\n#'\n#' Salmon Population Summaries (SPS) Database: \\url{https://www.fisheries.noaa.gov/resource/tool-app/salmon-population-summaries-sps}\n#' \n"
if("citation" %in% colnames(datar)){
citation <- paste0(citation, "#' Original source: ")
}
if(("citation" %in% colnames(datar)) && length(unique(datar$citation))==1){
citation <- paste0(citation, unique(datar$citation), ". ")
}
if(("citation" %in% colnames(datar)) && stringr::str_detect(datar$citation, "CAX")){
citation <- paste0(citation, "Citations with the format CAXMMDDYYYY indicate that the data are from the Coordinated Assessments Partnership (CAP) database and were downloaded on on MM-DD-YYYY. \\url{https://www.streamnet.org/home/data-maps/fish-hlis/}.")
}
## Similarly for contributor section. Create entry for each population
## if needed
contributor <- NULL
if(("contributor" %in% colnames(datar)) && length(unique(datar$contributor))>1) contributor <- "#' \n#' @source\n"
if(("contributor" %in% colnames(datar)) && length(unique(datar$contributor))==1){
contributor <- paste0("#' \n#' @source ", unique(datar$contributor))
}
meth <- ifelse("method" %in% colnames(datar), "Method: ", "")
if(("method" %in% colnames(datar)) && length(unique(datar$method))==1){
meth <- paste0(meth, unique(datar$method))
}
for(i in unique(datar$population_name)){
if(("contributor" %in% colnames(datar)) && length(unique(datar$contributor))>1){
contributor <- c(contributor, paste0("#' * ",i,": ", unique(subset(datar, datar$population_name==i)$contributor)))
}
if(("citation" %in% colnames(datar)) && length(unique(datar$citation))>1){
citation <- c(citation, paste0("#' * ",i,": ", unique(subset(datar, datar$population_name==i)$citation)))
}
## Same for method
if(("method" %in% colnames(datar)) && length(unique(datar$method))>1){
meth <- c(meth, paste0("\n#' * ",i,": ", unique(subset(datar, datar$population_name==i)$method)))
}
}
## Very minimal description section
middlr <- paste0("#' @description Spawner and fraction wild data. Species: ",
unique(datar$species), ". ", meth, ". \\href{../data/", dataname, ".html}{View raw data} or \\href{../data/", dataname, ".csv}{Download raw data}")
# Assemble all the pieces into an R file
cat(headr,
describe_text(thecols, compiler=compiler), sep="\n",
middlr, "#' ", contributor, "#' ", citation, "#' ", footr,
file=file.path("R", paste0(dataname, ".R")))
# Create the vignette
create_vignette(dataname, yearname)
# Make the data files
out <- utils::read.csv(filpath, skip=sum(isheader))
save(out, file=file.path("data", paste0(dataname, ".rda")))
# Save html of data
## First clean up the data by removing the AGE and CATCH columns
if(any(stringr::str_detect(colnames(out), "AGE_"))){
out <- out %>% dplyr::select(-dplyr::starts_with("AGE_"))
}
if("CATCH" %in% colnames(out)){
out <- out %>% dplyr::select(-.data$CATCH)
}
## Create an html table
x = out %>%
dplyr::mutate_all( ~paste0("<div>", ., "</div>")) %>%
kableExtra::kbl() %>%
kableExtra::row_spec(0, background = "yellow") %>%
kableExtra::kable_styling(
bootstrap_options = c("striped"),
full_width = FALSE,
fixed_thead = TRUE) %>%
kableExtra::save_kable(
file=file.path("docs", "data", paste0(dataname, ".html")),
self_contained = FALSE,
extra_dependencies = '<link href="div.css" rel="stylesheet" />')
## This weird part is because I couldn't stop save_kable from
## trashing the < and >
a <- readLines(file.path("docs", "data", paste0(dataname, ".html")))
a <- stringr::str_replace_all(a, "<", "<")
a <- stringr::str_replace_all(a, ">", ">")
cat(a, file=file.path("docs", "data", paste0(dataname, ".html")), sep="\n")
# Write the data to a csv file for the user to download
## Store in data dir for website for easy download.
utils::write.csv(out, row.names=FALSE, file.path("docs", "data", paste0(dataname, ".csv")))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.