#' Analyze the results of a particle size test
#'
#' Accepts a variety of laboratory protocols, see [`psa_protocols()`]
#'
#' @param dir Folder containing the pertinent files
#' @param ... other arguments passed for individual protocols. These identify sets of lab equipment used to perform calculations. See details.
#' @param bouyoucos_cylinder_dims dimensions of sedimentation cylinders
#' @param tin_tares data frame of tin tares used for water content determination
#' @param beaker_tares data frame of beaker tares used for pipette methods
#' @param hydrometer_dims data frame of hydrometer dimensions
#'
#' @return List of length 6 containing:
#'
#' 1. Data frame of cumulative percent passing data (tidy/long format)
#' 2. Data frame with breakdown into traditional SSSA size classes (wide format)
#' 3. Data frame with breakdown into size sub classes, if measured
#' (wide format); if not possible due to limited number of particle diameters,
#' this element is `NULL`; various breakdowns are available based on the chosen
#' protocol (see [`psa_protocols()`])
#' 4. List of ggplot objects (one per specimen)
#' 5. Metadata about the test protocol
#' 6. If pre-treatment was performed, the loss on pre-treatment for each specimen;
#' otherwise `NULL`
#'
#' @export
#'
#' @details This function relies on the dimensions of lab equipment used during the tests.
#' It is recommended to set these as global options at the top of your script or in an `.Rprofile file`, but they can also be passed as individual named arguments. See the vignette on setting lab equipment to work with **soiltestr**. (link needed).
#' Choices include the following (tests which require each type of equipment are in parentheses):
#' - `bouyoucos_cylinder_dims` (hydrometer)
#' - `tin_tares` (all tests)
#' - `psa_beaker_tares` (pipette)
#' - (need to finish this list before publishing on CRAN)....
#'
psa <- function(dir, bouyoucos_cylinder_dims = NULL, tin_tares = NULL,
beaker_tares = NULL,
hydrometer_dims = NULL, ...){
# browser()
# error message if directory does not exist
if(!dir.exists(dir)){
stop("Directory ", dir, "does not exist. Did you supply the full path?")
}
# determine which protocol was used and assign to a local variable
protocol_ID <- find_protocol_ID()
# browser()
# if mastersizer protocol was used, need to convert the whole directory of
# .xlsx files to a single csv
mastersizer_csv_exists <- detect_mastersizer_csv()
if(protocol_ID %in% internal_data$fines_laser_diffraction_invoking_protocol_IDs & !mastersizer_csv_exists){
# browser()
tidy_mastersizer(dir = eval(dir, envir = rlang::current_env()), fmt = ".xlsx")
}
# read all raw data files and put into a list, then divide into common
# and method-specific lists and clean up any empty rows from each
test_date <- stringr::str_extract(string = dir, pattern = "\\d{4}-\\d{2}-\\d{2}")
# browser()
all_datafile_paths <- divide_psa_datafiles()
# read all the files in; this returns a list of length 2; each element
# is also a list containing the appropriate data frames (tibbles)
# browser()
datafiles <- purrr::map(all_datafile_paths, import_psa_datafile)
# assign the contents of the datafiles list to the current environment
list2env(datafiles, envir = rlang::current_env())
# for safety, remove the datafiles object since its contents are now dumped
# into the current environment
rm(datafiles, all_datafile_paths)
# compute hygroscopic water contents --------------------------------------
# experimenting with using options instead of tying directly
# to asi468 package
# first need to pair data with correct set of tin tares
#
#
# tin_tare_set <- as.character(unique(common_datafiles$hygroscopic_corrections$tin_tare_set))
#
# tin_tares <- asi468::tin_tares[[tin_tare_set]]
tin_tares <- tin_tares %||% getOption('soiltestr.tin_tares') %||% internal_data$equipment_instructions("tin_tares")
# browser()
# calculate air-dry water contents
# throw an error if the join fails to recognize
# the set of tin tares and the numbers supplied
if(!all(common_datafiles$hygroscopic_corrections$tin_number %in% tin_tares$tin_number)){
stop("One or more tin numbers supplied in data sheet were not found in the data frame supplied to `tin_tares`. Did you supply the correct set?")
}
if(!all(common_datafiles$hygroscopic_corrections$tin_tare_set %in% tin_tares$tin_tare_set)){
stop("One or more observations in data sheet has no match for `tin_tare_set` in the data frame supplied to `tin_tares`. Did you supply the correct set?")
}
# browser()
hygroscopic_water_contents <- common_datafiles$hygroscopic_corrections %>%
dplyr::left_join(tin_tares, by = c("tin_tare_set", "tin_number")) %>%
soiltestr::add_w() %>%
dplyr::rename(hygroscopic_water_content = .data$water_content) %>%
dplyr::select(.data$date:.data$batch_sample_number, .data$hygroscopic_water_content)
# compute oven-dry specimen masses used in actual PSA tests
# browser()
OD_specimen_masses <- common_datafiles$specimen_masses %>%
dplyr::left_join(hygroscopic_water_contents,
by = c("date", "experiment_name", "sample_name",
"replication", "batch_sample_number")) %>%
dplyr::mutate(OD_specimen_mass = .data$air_dry_specimen_mass_for_test / (1 + .data$hygroscopic_water_content)) %>%
dplyr::select(.data$date:.data$batch_sample_number, .data$OD_specimen_mass)
# determine whether pretreatment correction should be applied
# if it should, apply it; otherwise assign this variable a value of NULL
use_pretreatment_correction <- check_pretreatment_correction()
# if it should be, calculate the corrections and then apply them, altering the existing copy of the
# OD specimen masses data frame
if(use_pretreatment_correction){
pretreatment_loss <- compute_pretreatment_loss()
OD_specimen_masses <- OD_specimen_masses %>%
dplyr::left_join(
pretreatment_loss, by = c(
"date", "experiment_name", "sample_name", "replication",
"batch_sample_number")) %>%
dplyr::mutate(OD_specimen_mass = .data$OD_specimen_mass * (1 - .data$pretreatment_loss_pct))
} else
pretreatment_loss <- NULL
# now that the correct specimen mass is known, compute the coarse
# percent passing
# browser()
# compute the coarse particles % passing
coarse_percent_passing <- switch (
protocol_ID,
"1" = compute_sieves_percent_passing(),
"2" = compute_sieves_percent_passing(),
"3" = compute_sieves_percent_passing(),
"4" = compute_sieves_percent_passing(),
"5" = compute_sieves_percent_passing(),
"6" = compute_sieves_percent_passing(),
"7" = compute_sieves_percent_passing(),
"8" = compute_sieves_percent_passing(),
"9" = compute_sieves_percent_passing(),
"10" = compute_sieves_percent_passing(),
"11" = compute_sieves_percent_passing(),
"12" = compute_sieves_percent_passing(),
"13" = compute_sieves_percent_passing(),
"14" = compute_sieves_percent_passing(),
"15" = compute_sieves_percent_passing(),
"16" = compute_sieves_percent_passing(),
"17" = compute_sieves_percent_passing(),
"18" = compute_sieves_percent_passing(),
"19" = compute_sieves_percent_passing(),
"20" = compute_sieves_percent_passing(),
"21" = compute_sieves_percent_passing(),
"22" = compute_sieves_percent_passing(),
"23" = compute_sieves_percent_passing(),
"24" = compute_sieves_percent_passing(),
stop(
"Can't find the protocol - unable to compute % coarse particles for protocol_ID ",
protocol_ID,
call. = T
)
)
# next compute the fines % passing
# browser()
fines_percent_passing <- switch (protocol_ID,
"1" = compute_pipette_fines_pct_passing(...),
"2" = compute_152H_hydrometer_fines_pct_passing(...),
"3" = compute_pipette_fines_pct_passing(...),
"4" = compute_pipette_fines_pct_passing(...),
"5" = compute_152H_hydrometer_fines_pct_passing(...),
"6" = compute_pipette_fines_pct_passing(...),
"7" = compute_pipette_fines_pct_passing(...),
"8" = compute_152H_hydrometer_fines_pct_passing(...),
"9" = compute_152H_hydrometer_fines_pct_passing(...),
"10" = compute_pipette_fines_pct_passing(...),
"11" = compute_pipette_fines_pct_passing(...),
"12" = compute_pipette_fines_pct_passing(...),
"13" = compute_pipette_fines_pct_passing(...),
"14" = wash_through_fines_df( ...),
"15" = compute_pipette_fines_pct_passing(...),
"16" = compute_pipette_fines_pct_passing(...),
"17" = compute_pipette_fines_pct_passing(...),
"18" = compute_pipette_fines_pct_passing(...),
"19" = compute_hydrometer_plus_pipette_fines_pct_passing(),
"20" = compute_pipette_fines_pct_passing(),
"21" = compute_152H_hydrometer_fines_pct_passing(...),
"22" = compute_152H_hydrometer_fines_pct_passing(...),
"23" = compute_mastersizer_fines_pct_passing(...),
"24" = compute_152H_hydrometer_fines_pct_passing(...),
stop("Can't find the protocol... unable to compute % fines for protocol_ID ", protocol_ID, call. = T)
)
# bind coarse and fine data frames together
# browser()
cumulative_percent_passing <- dplyr::bind_rows(fines_percent_passing,
coarse_percent_passing) %>%
dplyr::arrange(.data$batch_sample_number,
dplyr::desc(.data$microns)) %>%
dplyr::filter(
!is.na(microns)
) %>%
tidyr::fill(dplyr::everything()) %>%
dplyr::select(
date, experiment_name, sample_name, replication, batch_sample_number, protocol_ID, dplyr::everything()
)
# in special case of wash-through protocols,
# exit early by using a special function which calculates the bin % for
# gravel, sand, and fines
# in the special case that the protocol is number 14, the only things
# computed should be gravel, sand ,and fines....call a different
# function that will exit early without trying to calculate any of the
# other things normally associated with a psa
# browser
if(protocol_ID %in% internal_data$wash_through_protocol_IDs){
# browser()
coarse_fine_split <- wash_through_coarse_grains()
return(coarse_fine_split)
}
####
# create data frame of standard bin sizes
# browser()
simple_bins <- simple_bins()
# browser()
# check if the protocol permits more complex bin sizes to be computed for
# the fines method
# if it does, create a new tibble with the fine-grained sub bins
if(check_for_fines_complex_bins()){
# shouldn't need this to ever be called because the logic doesn't allow
# the protocols to opt into this switch statement but putting in just
# in case; by assigning as a function I can reduce duplication
insufficient_fines_sampling <- function() {
stop("Only total clay can be computed for this protocol (not enough samples collected).",
call. = T)
}
fines_sub_bins <- switch (protocol_ID,
"1" = CSSC_fines_bins(),
"2" = insufficient_fines_sampling(),
"3" = CSSC_fines_bins(),
"4" = insufficient_fines_sampling(),
"5" = insufficient_fines_sampling(),
"6" = CSSC_fines_bins(),
"7" = insufficient_fines_sampling(),
"8" = insufficient_fines_sampling(),
"9" = insufficient_fines_sampling(),
"10" = insufficient_fines_sampling(),
"11" = insufficient_fines_sampling(),
"12" = insufficient_fines_sampling(),
"11" = insufficient_fines_sampling(),
"13" = fines_20_to_0.2_only(),
"14" = insufficient_fines_sampling(),
"15" = CSSC_fines_bins(),
"16" = CSSC_fines_bins(),
"17" = CSSC_fines_bins(),
"18" = CSSC_fines_bins(),
"19" = CSSC_fines_bins(),
"20" = non_standard_fines_bins_1(),
"21" = CSSC_fines_bins(),
"22" = non_standard_fines_bins_2(),
"23" = CSSC_fines_bins(),
"24" = non_standard_fines_bins_2(),
stop("Could not find any info for psa_protocol ID ", protocol_ID, ". Can't compute sub-bins.", call. = T)
)
}
# browser()
# check if the protocol permits more complex bin sizes to be computed for
# the coarse method
# if it does, create a new tibble with the coarse-grained sub bins
if(check_for_coarse_complex_bins()){
# as for the fines, this function should never be called due to
# logic above but putting here to help with debugging in case
# something was coded wrong
insufficient_coarse_sampling <- function() {
stop("Only total sand and gravel can be computed for this protocol (not enough samples collected)",
call. = T)
}
# browser()
coarse_sub_bins <- switch(
protocol_ID,
"1" = USGA_bins(),
"2" = USGA_bins(),
"3" = USGA_bins(),
"4" = USGA_bins(),
"5" = USGA_bins(),
"6" = USGA_bins(),
"7" = insufficient_coarse_sampling(),
"8" = insufficient_coarse_sampling(),
"9" = insufficient_coarse_sampling(),
"10" = insufficient_coarse_sampling(),
"11" = expanded_sieve_bins_1(),
"12" = insufficient_coarse_sampling(),
"13" = USGA_bins(),
"14" = insufficient_coarse_sampling(),
"15" = USGA_bins(),
"16" = USGA_bins(),
"17" = USGA_bins(),
"18" = USGA_bins(),
"19" = USGA_bins(),
"20" = USGA_bins(),
"21" = USGA_bins(),
"22" = insufficient_coarse_sampling(),
"23" = USGA_bins(),
"24" = USGA_bins_plus_gravel(),
stop("Could not find any info for psa_protocol ID ", protocol_ID, ". Can't compute any sub-bins for sand-size parcticles.", call. = T)
)
} else{
# if code makes it into this statement, just attach the gravel and sand bins to the sub-bins
coarse_sub_bins <- simple_bins %>%
dplyr::select(
.data$date,
.data$experiment_name,
.data$sample_name,
.data$replication,
.data$batch_sample_number,
.data$protocol_ID,
.data$gravel,
.data$sand
)
}
# browser()
# combine all of the sub-bins if they have been computed....
# otherwise this element gets NULL
all_sub_bins <- ls(pattern = "sub_bins")
if(length(all_sub_bins != 0)){
# browser()
sub_bins <- mget(all_sub_bins) %>%
purrr::reduce(
dplyr::left_join,
by = c("date", "experiment_name", "protocol_ID", "sample_name",
"replication", "batch_sample_number")) %>%
dplyr::arrange(.data$batch_sample_number,
.data$replication)
} else {
sub_bins <- NULL
}
# make the ggplots for each sample and replication ---------------------
# browser()
base_plots <- cumulative_percent_passing %>%
tidyr::fill(dplyr::everything()) %>%
dplyr::left_join(psa_protocols_summary, by = "protocol_ID") %>%
dplyr::group_by(.data$batch_sample_number) %>%
dplyr::arrange(.data$batch_sample_number) %>%
tidyr::nest() %>%
dplyr::mutate(plot = purrr::map(data, ggpsd)) %>%
purrr::pluck("plot")
# this is a hack to filter to a single row per replication
# not ideal but it works
# browser()
plots_extra_stuff <- cumulative_percent_passing %>%
tidyr::fill(dplyr::everything()) %>%
dplyr::group_by(dplyr::across(.cols = -c(microns, percent_passing))) %>%
dplyr::summarise() %>%
dplyr::left_join(psa_protocols_summary, by = "protocol_ID") %>%
dplyr::mutate(subtitle = paste(
" Sample name: ", .data$sample_name, "\n",
"Replication ", .data$replication, "\n") ) %>%
dplyr::arrange(.data$batch_sample_number)
# this was some extra stuff I was going to include right on the plot
# but I think it clutters it too much
# this is mostly for EDA/qc anwyay; for a publication or
# polished report I would do this in R Markdown
# "Fines sampling: ", .data$fines_method, "\n",
# g_sample, " g sample", "\n",
# "[See protocol ", protocol_ID, "for other method details.") ) %>%
plot_subtitles <- plots_extra_stuff$subtitle
plot_names <- paste0(plots_extra_stuff$sample_name, "_rep", plots_extra_stuff$replication)
add_subtitle <- function(plot, text) {
plot +
labs(subtitle = text)+
ggplot2::theme(plot.subtitle = ggplot2::element_text(size = 8))
}
# browser()
psd_plots <- purrr::map2(base_plots, plot_subtitles, add_subtitle) %>%
purrr::set_names(plot_names)
################################################################################
# browser()
method_metadata <-switch (protocol_ID,
"1" = psa_protocols[["1"]],
"2" = psa_protocols[["2"]],
"3" = psa_protocols[["3"]],
"4" = psa_protocols[["4"]],
"5" = psa_protocols[["5"]],
"6" = psa_protocols[["6"]],
"7" = psa_protocols[["7"]],
"8" = psa_protocols[["8"]],
"9" = psa_protocols[["9"]],
"10" = psa_protocols[["10"]],
"11" = psa_protocols[["11"]],
"12" = psa_protocols[["12"]],
"13" = psa_protocols[["13"]],
"14" = psa_protocols[["14"]],
"15" = psa_protocols[["15"]],
"16" = psa_protocols[["16"]],
"17" = psa_protocols[["17"]],
"18" = psa_protocols[["18"]],
"19" = psa_protocols[["19"]],
"20" = psa_protocols[["20"]],
"21" = psa_protocols[["21"]],
"22" = psa_protocols[["22"]],
"23" = psa_protocols[["23"]],
"24" = psa_protocols[["24"]],
stop("Could not find any metadata for psa_protocol number ", protocol_ID, call. = T))
# make another list that has the averages of each variable for the applicable
# data frames produced above
# this uses some helpers defined in psa-summarizing-helpers.R
# the predicates check that the element is not null and whether
# it has the relevant column names
# browser()
averages <- mget(x = c("cumulative_percent_passing", "simple_bins", "sub_bins", "pretreatment_loss"),
envir = rlang::current_env()) %>%
purrr::modify_if(
.p = ~ !is.null(.) & "microns" %in% names(.),
.f = pivot_cumulative_percent_passing_wider) %>%
purrr::modify_if(
.p = ~ !is.null(.),
.f = summarize_psa) %>%
#purrr::map(summarize_psa) %>%
purrr::modify_if(
.p = ~ !is.null(.) & any(stringr::str_detect(string = names(.), pattern = "^\\d")) ,
.f = pivot_cumulative_percent_passing_longer)
# construct list to return
psa_object <- structure(
.Data = mget(
c("cumulative_percent_passing",
"simple_bins",
"sub_bins",
"method_metadata",
"pretreatment_loss",
"averages",
"psd_plots")),
class = "soiltestr_psa"
)
return(psa_object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.