#' Create complete data sheet from xls input
#'
#' This function reads in a hand-entered excel data sheet along with a station
#' summary csv file (created using `create_summary()`) and combines them to produce well-formatted csv and odv file
#' outputs.
#'
#' data_input is the only argument that the function *needs* to create an
#' output. The default values for summary_input and the csv and odv outputs are
#' set to work with the default directory configuration of the SEA cruise
#' project so shouldn't need to be set unless you are trying to produce
#' alternative outputs for specific custom cases or trials.
#'
#' data_type can be either a stand-alone value entered into the deployment
#' column of the station summary sheet. Or it can take one of these special
#' cases:
#'
#' * "CTD" for ctd datasheets (default)
#' * "neuston" for neuston datasheets
#' * "bottle" for bottle datasheets
#' * "meter" for meter net datasheets
#' * "<custom>" for custom datasheets you've created and given a unique code to in station summary
#'
#' @param data_input File path for the .xls file with hand-recorded data values. These are created by default in the "datasheets" folder and all have the suffix "_input"
#' @param summary_input File path for the csv summary datasheet produced with
#' create_summary() (see details below). This defaults to the output/csv folder in the standard SEA data project organization
#' @param data_type The data type code that will draw data from the summary
#' sheet (see details below)
#' @param csv_folder The directory path to output the csv file. Set to NULL for
#' no csv output.
#' @param csv_filename The csv filename to output the data
#' @param odv_folder The directory path to output the odv file. Set to NULL for
#' no odv output.
#' @param odv_filename The odv .txt filename to output the data
#' @param cruiseID Optional string specifying cruise ID (i.e. "S301")
#' @param add_cruise_ID If cruiseID is set, logical to specify whether cruiseID
#' should be appended to beginning of filenames for csv and odv output
#' @param add_deployment_type logical to tell function whether to add the default deployment type to the beginning of the filename that is being created.
#' @param add_deployment_fold logical to tell function whether to add a new
#' directory to the end of the odv_folder directory path to keep .txt files
#' separate. Will create the new directory name depending on data_type
#' @param ... option arguments to be sent to compile_bottle. Initially, this is
#' just ros_input
#'
#'
#' @return If assigned to an object, the function will return the formatted
#' tibble that was exported to csv
#'
#' @md
#'
#' @export
#'
create_datasheet <- function(data_input, summary_input = "output/csv/summary_datasheet.csv",
data_type = "CTD",
csv_folder = "output/csv", csv_filename = "datasheet.csv",
odv_folder = "output/odv", odv_filename = "datasheet.txt",
cruiseID = NULL, add_cruiseID = TRUE,
add_deployment_type = TRUE,
add_deployment_subfold = TRUE, ...) {
if(add_cruiseID == TRUE & !is.null(cruiseID)) {
if(summary_input == "output/csv/summary_datasheet.csv") {
summary_input <- add_file_cruiseID(summary_input, cruiseID)
}
}
odv_export <- FALSE
if(data_type == "bottle") {
if(add_deployment_type) {
csv_filename <- paste0("bottle_", csv_filename)
odv_filename <- paste0("bottle_", odv_filename)
}
odv_export <- TRUE
if(add_deployment_subfold) {
odv_folder <- file.path(odv_folder,"bottle")
}
data_type <- c("HC","B")
} else if (data_type == "neuston") {
if(add_deployment_type) {
csv_filename <- paste0("neuston_", csv_filename)
odv_filename <- paste0("neuston_", odv_filename)
}
odv_export <- TRUE
if(add_deployment_subfold) {
odv_folder <- file.path(odv_folder,"neuston")
}
data_type <- "NT"
} else if (data_type == "CTD") {
data_type <- c("CTD","HC")
if(add_deployment_type) {
csv_filename <- paste0("ctd_", csv_filename)
odv_filename <- paste0("ctd_", odv_filename)
}
} else if (data_type == "meter") {
if(add_deployment_type) {
csv_filename <- paste0("meter_", csv_filename)
odv_filename <- paste0("meter_", odv_filename)
}
odv_export <- TRUE
if(add_deployment_subfold) {
odv_folder <- file.path(odv_folder,"meter")
}
data_type <- c("MN")
} else {
if(add_deployment_type) {
csv_filename <- paste0(stringr::str_to_lower(data_type), "_", csv_filename)
odv_filename <- paste0(stringr::str_to_lower(data_type), "_", odv_filename)
}
}
# read in the data_input excel sheet datasheet
data <- readxl::read_excel(data_input)
#Set all header names to lower case
colnames(data) <- stringr::str_to_lower(colnames(data))
#replace spaces with _
colnames(data) <- stringr::str_replace_all(colnames(data), " ", "_")
#remove unit identifiers inside ()
colnames(data) <- stringr::str_remove_all(colnames(data), "_\\(.*\\)")
colnames(data) <- stringr::str_remove_all(colnames(data), "_%")
colnames(data) <- stringr::str_remove_all(colnames(data), "\\.")
# makes a neuston correction if not enough rows to make good on the excel read
if(sum(data_type %in% "NT")>0) {
# Make sure all non note/description/station columns are numeric
data <- dplyr::mutate(data,dplyr::across(!dplyr::matches("note|desc|stat"),as.numeric))
}
# read in station summary datasheet
# TODO: determine what formatting to apply when read in (beyond zd)
summary <- readr::read_csv(
summary_input,
col_types = readr::cols(
zd = readr::col_character()
)
)
# filter by data_type
summary <- dplyr::filter(summary, deployment %in% data_type)
# Bottle specific stuff for combining summary, otherwise right join
if(sum(data_type %in% c("HC", "B")) > 1) {
data <- dplyr::mutate(data, bottle = as.character(bottle))
# add a new column to aid the joining later
data <- dplyr::mutate(data, deployment = ifelse(is.na(as.numeric(bottle)) | as.numeric(bottle) > 12, "B", "HC"))
data <- dplyr::right_join(summary, data, by=c("station","deployment"))
data <- compile_bottle(data, ...)
} else {
data <- dplyr::right_join(summary, data, by=c("station"))
}
# Neuston specific stuff
if(sum(data_type %in% "NT")>0) {
data <- compile_neuston(data)
}
# Meter net specific stuff
if(sum(data_type %in% c("MN","2MN")) > 0) {
data <- compile_meter(data)
}
# Remove columns we don't need, e.g. deployment
data <- dplyr::select(data, -deployment)
# export to csv
if(!is.null(csv_filename) & !is.null(csv_folder)) {
csv_output <- file.path(csv_folder, csv_filename)
if(add_cruiseID == TRUE & !is.null(cruiseID)) {
csv_output <- add_file_cruiseID(csv_output, cruiseID)
}
readr::write_csv(format_csv_output(data),csv_output, na = "")
}
# export to odv
if(odv_export) {
if(!is.null(odv_filename) & !is.null(odv_folder)) {
odv_output <- file.path(odv_folder, odv_filename)
if(add_cruiseID == TRUE & !is.null(cruiseID)) {
odv_output <- add_file_cruiseID(odv_output, cruiseID)
}
format_odv(data, odv_output, data_type = data_type, cruiseID = cruiseID)
}
}
return(data)
}
compile_meter <- function(data) {
data <- dplyr::mutate(data, total_flow = ifelse(is.na(total_flow),
flow_out - flow_in,
total_flow),
.after = flow_in)
data <- dplyr::mutate(data, tow_length = ifelse(is.na(tow_length),
total_flow * flow_constant,
tow_length),
.after = flow_constant)
data <- dplyr::mutate(data, tow_volume = ifelse(is.na(tow_volume),
tow_length * net_area,
tow_volume),
.after = net_area)
data <- dplyr::mutate(data, biodens = as.numeric(zooplankton_biovol)/tow_volume, .after = zooplankton_biovol)
}
#' @export
#' @rdname compile_neuston
compile_neuston <- function(data) {
# calculate biodensity
if(length(which(is.na(data$station_distance)))>0) {
warning("One or more tow distances are not available - be sure that they exist in the summary data csv")
}
# calculate the biodensity
data <- dplyr::mutate(data, biodens = zooplankton_biovol/(station_distance/1000))
data <- dplyr::relocate(data, biodens, .after = zooplankton_biovol)
# sum the total 100 count animals
data <- dplyr::rowwise(data)
data <- dplyr::mutate(data, total_100count = sum(dplyr::c_across(medusa:other3)))
# data <- dplyr::mutate(data, shannon_wiener = sum(dplyr::c_across(medusa:other3)/total_100count * log(dplyr::c_across(medusa:other3)/total_100count)))
data <- dplyr::ungroup(data)
# Calculate moon data
moon_data <- oce::moonAngle(data$dttm,data$lon,data$lat)
# Add moon info to dataset and set the decimal to zero
data <- dplyr::mutate(data,
moon_phase = moon_data$illuminatedFraction * 100,
moon_risen = moon_data$altitude > 0)
nodec <- 0
data <- format_decimal(data, "moon_phase", nodec)
return(data)
}
#' Create bottle file sheet
#'
#' @param summary_csv
#' @param ros_input
#' @param datasheet_folder
#'
#' @return
#' @export
#'
#' @examples
compile_bottle <- function(data, ros_input) {
# get just the list of stations that are unique
stations <- unique(data$station)
# Go find appropriate bottle files from ctd folder and
# list all files with *.ros extension in ros_input
ros_files <- list.files(ros_input,pattern = "\\.ros")
# Loop through all ros files and
ros_output <- NULL
for (i in 1:length(stations)) {
# select the ros file name than matches the station name from the input data
ros_file <- ros_files[stringr::str_detect(ros_files,stations[i])]
if(length(ros_file) > 0) {
if(length(ros_file) > 1) {
ros_file <- ros_file[1]
warning(paste0("More than one ros file in folder matches the stations number ",
stations[i],
". Selecting the first file found: ",
ros_file[1]))
}
# Read in the ros file and combine with datasheet
ros <- read_ros(file.path(ros_input,ros_file))
ros <- dplyr::mutate(ros, station = stations[i])
} else {
warning(paste("No .ros file found for station",
stations[i]))
ros <- NULL
}
if(!is.null(ros)) {
if(is.null(ros_output)) {
ros_output <- ros
} else {
ros_output <- dplyr::bind_rows(ros_output, ros)
}
}
}
if(!is.null(ros_output)) {
ros_output$bottle <- as.character(ros_output$bottle)
}
# Now that we have our depths and metadata for each bottle in a hydrocast, add all the bucket samples to this
bottle_lines <- dplyr::filter(data, deployment == "B")
if(nrow(bottle_lines) > 0) {
if(!is.null(ros_output)) {
data_add <- purrr::quietly(tibble::as_tibble)(t(rep(NA_real_, ncol(ros_output))))$result
names(data_add) <- names(ros_output)
data_add <- dplyr::mutate(data_add, count = nrow(bottle_lines))
data_add <- tidyr::uncount(data_add, count)
data_add <- dplyr::mutate(data_add,
bottle = "B",
depth = 0,
temperature = bottle_lines$temp,
pressure = 0,
salinity = bottle_lines$sal,
theta = oce::swTheta(salinity = salinity,
temperature = temperature,
pressure = pressure),
sigma = oce::swSigma0(salinity = salinity,
temperature = temperature,
pressure = pressure),
station = bottle_lines$station)
# combine ros with bottle
all_output <- dplyr::bind_rows(ros_output, data_add)
} else {
data_add <- tibble::tibble(bottle = "SS",
depth = 0,
temperature = bottle_lines$temp,
pressure = 0,
salinity = bottle_lines$sal,
theta = oce::swTheta(salinity = salinity,
temperature = temperature,
pressure = pressure),
sigma = oce::swSigma0(salinity = salinity,
temperature = temperature,
pressure = pressure))
# set to be the total output as no ros data exists
all_output <- data_add
}
} else {
all_output <- ros_output
}
output <- dplyr::left_join(data,all_output, by = c("station","bottle"))
return(output)
}
##Part of function to take in a archive CSV and make it into a readable excel file
#data_input <- "~/GitHub/seaprocess/inst/extdata/Data Sheet CSVs/S285_NT.csv"
#data <- read.csv(data_input)
#colnames(data) <- stringr::str_to_title(colnames(data))
#colnames(data) <- stringr:: str_replace_all(colnames(data), "_", " ")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.