#' Check the input data
#'
#' This functions does some check-up on the data frame used for
#' the \code{tapas::pretreatment_data()} function,
#' which requires that the depth and age scales are continuous.
#' In other words, for every *i-th* row, it requires that \itemize{
#' \item{\code{CmBot[i] > CmTop[i]}}
#' \item{\code{AgeBot[i] > AgeTop[i]}}
#' \item{there shouldn't be duplicate values
#' in the \code{CmTop, CmBot, AgeTop}, and \code{AgeBot} columns}.
#' }
#'
#'
#' @details
#'
#' If any of the following is true: \itemize{
#' \item{any \code{CmBot[i] < CmTop[i]}}
#' \item{\code{AgeBot[i] < AgeTop[i]}}
#' \item{there are duplicate values in one of the columns \code{CmTop, CmBot}}
#' }
#' ...the function returns a fatal error and stops.
#'
#'
#' The function also fixes a few things:
#'
#' Add missing samples:
#' If any \code{CmBot[i] < CmTop[i+1]}, then
#' the function adds a new row \code{[j = i+1]}
#' to fill in the missing \code{CmTop} and \code{CmBot} values.
#' Thus, the added samples will have \code{CmTop[j] == CmBot[i]}
#' and \code{CmBot[j] == CmTop[i+1]}.
#' The new age scale values will be:
#' \code{AgeTop[j] == AgeBot[i]} and \code{AgeBot[j] == AgeTop[i+1]}.
#'
#' Check for overlapping sample depths:
#' If any \code{CmBot[i] > CmTop[i+1]}, the function flags a warning due to the
#' presence of overlapping sample depths. To get a data.frame of the
#' overlapping sample depths, use the \code{tapas::get_overlaps()} function.
#'
#' Check presence of slumps:
#' If after these checks any \code{AgeTop[i] == AgeBot[i]}, then
#' the function *removes the flagged rows*,
#' and *creates new* corrected \code{CmTop} and \code{CmBot} scales
#' that exclude slumps,
#' such that \code{CmBot[i] > CmTop[i]}
#' and \code{CmBot[i] == CmTop[i+1]}.
#'
#' @param series The input data frame. A matrix with the following columns:
#' \code{CmTop, CmBot, AgeTop, AgeBot, Volume},
#' and one or more columns with the data
#' which should be resampled (variables).
#'
#' @return A data frame.
#'
#' @seealso [get_overlap_depths]
#'
#' @export
#'
#' @author Walter Finsinger
#'
#' @examples
#' co <- tapas::co_char_data
#' co_check <- tapas::check_pretreat(co)
#'
check_pretreat <- function(series) {
## Gather data --------------------------------------------------------------
A <- series
cm <- A[ ,1]
cmB <- A[ ,2]
ybp <- A[ ,3]
ybpB <- A[ ,4]
n_var <- dim(A)[2] - 5
# Initial check ups ---------------------------------------------------------
### Check that all CmTop < CmBot --------------------------------------------
if (any((cmB < cm))) {
A[which(cmB < cm), ]
stop('Fatal error: CmTop > CmBot in some of the samples')
}
### Check that all AgeTop < AgeBot ------------------------------------------
if (any((ybpB < ybp))) {
A[which(ybpB < ybp), ]
stop('Fatal error: AgeTop > AgeBot in some of the samples')
}
### Check for duplicate sample depths ---------------------------------------
if (any(duplicated(cm) == T)) {
print('Fatal error: the following CmTop samples depths are duplicated')
return(A[duplicated(cm) == T, ])
}
if (any(duplicated(cmB) == T)) {
print('Fatal error: the following CmBot samples depths are duplicated')
return(A[duplicated(cmB) == T, ])
}
## Check if there are slump samples ----------------------------------------
## This could arise if there's a slump or a tephra, thus for sediment
## sections that were deposited in a very short time interval.
## The result will be that AgeBot[i] == AgeTop[i]
# Check for presence of samples having AgeBot[i] == AgeTop[i]
slumps <- ifelse(ybpB == ybp, yes = 1, no = 0)
slumps_index <- which(slumps == 1)
slumps_n <- sum(slumps)
#### If slumps were found ---------------------------------------------------
## Create a new depth scale based on the sample thicknesses of samples that
## are not identified as slump samples
if (!length(slumps_index) > 0) {
print('No slump samples detected.')
print('')
} else {
slumps_cm <- vector()
# Quantify total thickness of slumps
for (i in 1:length(slumps_index)) {
slumps_cm[i] <- cmB[ slumps_index[i] ] - cm[ slumps_index[i] ]
}
slumps_tot_cm <- sum(slumps_cm)
print(paste0('Warning: AgeTop = AgeBot for ',slumps_n,' samples, totaling ',slumps_tot_cm,'cm'))
print('')
# Remove samples identified as slumps (those with AgeBot[i] == AgeTop[i])
A <- A[-slumps_index, ]
row.names(A) <- NULL ## reset row.names
}
## Check if the depth scale is continuous (cmBot[i] = cmTop[i+1]) ----------
## Get difference between cmBot of sample[i] and cmTop of sample[i+1].
## The idea being that:
## - negative differences point to gaps, whereas
## - positive differences point to partially overlapping samples.
## First, redefine the depth and age scales as they may have changed in the
## previous loop:
cm <- A[ ,1]
cmB <- A[ ,2]
ybp <- A[ ,3]
ybpB <- A[ ,4]
gaps_index <- cmB[1:length(cmB) - 1] - cm[2:length(cm)]
# Get indices for samples above missing samples
gaps <- which(gaps_index < 0)
if (length(gaps) > 0) { # if gaps were found...
print('Warning: there are gaps in the depth scale,')
print('')
} else {
print('No gaps found in the depth scale')
print('')
}
# Get indices for overlapping samples
overlaps <- which(gaps_index > 0)
if (length(overlaps) > 0) { #if overlaps were found...
print('Warning: overlapping depths found,')
print('you may get them with the "tapas::get_overlap_depths() function')
print('')
} else {
print('No overlapping depths found')
print('')
}
## Check if the age scale is continuous (AgeBot[i] = AgeTop[i+1]) ----------
## Difference between cmBot of sample[i] - cmTop of sample[i+1]
gaps_age_index <- ybpB[1:length(ybpB) - 1] - ybp[2:length(ybp)]
if (any(gaps_age_index != 0) == T) {
print('Warning: the age scale is not continuous.')
} else {
print('The age scale is continuous')
print('')
}
# Indices for samples above missing samples
gaps_age <- which(gaps_age_index < 0)
if (length(gaps_age) > 0) { # if gaps were found...
print('Gaps found in the age scale. Missing samples will be added')
}
## If age scale is continuous & depth scale is not --------------------------
if (!length(gaps_age) > 0 && length(gaps) > 0) {
print('A new depth scale will be created')
# Build a new, corrected depth scale based on the sample thicknesses
smpl_thickn <- A[ ,2] - A[ ,1]
cm_corr <- cumsum(smpl_thickn) - smpl_thickn[1]
cmB_corr <- c(cm_corr[2:length(cm_corr)],
cm_corr[length(cm_corr)] + smpl_thickn[length(smpl_thickn)])
A[ ,1] <- cm_corr
A[ ,2] <- cmB_corr
}
## If both the age scale & the depth scale are not continuous ---------------
if (length(gaps_age) > 0 && length(gaps) > 0) {
## Add rows for missing samples
gap_cm <- vector()
# Get gaps
for (i in 1:length(gaps)) {
gap_cm[i] <- cm[ gaps[i] + 1] - cmB[ gaps[i] ]
}
tot_gap <- sum(gap_cm)
# print warning
print(paste0("Warning: added ",length(gaps)," missing samples, totaling ",
tot_gap," cm."))
# get params and series data for each of the detected gaps
A_gaps <- A[0, ]
for (i in 1:length(gaps)) {
A_gaps[i, 1] <- cmB[ gaps[i] ]
A_gaps[i, 2] <- cm[ gaps[i] + 1 ]
A_gaps[i, 3] <- ybpB[ gaps[i] ]
A_gaps[i, 4] <- ybp[ gaps[i] + 1 ]
A_gaps[i, 5] <- 0
A_gaps[i, c(6:(5 + n_var) )] <- NA
}
# add gaps to input data frame
A <- rbind(A, A_gaps)
A <- A[order(A[ ,1]), ]
row.names(A) <- NULL ## reset row.names
# If there are still any A_gaps$TopAge[i] == A_gaps$BotAge[i],
# slightly modify the sample ages
p <- which(A[, 3] == A[, 4])
A[p, 4] <- A[p, 4] + 0.0001
A[p + 1, 3] <- A[p, 4]
}
## Prepare output ####
output <- A
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.