################################################################################
#' TimeStamp format check
#'
#' Function to check if the format of the timestamp is the correct one
#'
#' Template timestamp is requiered to comply POSIXct format. This function
#' checks if the contributor followed the mandatory format.
#'
#' @family Quality Checks Functions
#'
#' @param data Data frame containing the data (sapflow or environmental). Also
#' a vector with the TIMESTAMP values. If data frame, \bold{it must contain} a
#' TIMESTAMP variable
#'
#' @param verbose Logical indicating if messages of success and warnings of
#' failures must be presented. Default is \code{TRUE}. In order to use
#' inside another function, is recommended to set \code{verbose = FALSE}.
#'
#' @return A message/warning indicating if TIMESTAMP is correct or not. Also
#' an invisible logical object is returned, indicating success (TRUE) or
#' failure (FALSE).
#'
#' @export
# START
# Function declaration
qc_is_timestamp <- function(data, verbose = TRUE,
parent_logger = 'test') {
# Using calling handlers to logging
withCallingHandlers({
# STEP 0
# Argument checking
# is data a data frame?
if(!is.data.frame(data) & !is.vector(data) & class(data)[1] != 'POSIXct') {
stop('Data provided is not a data frame or a vector')
}
# STEP 1
# Data frame
if (is.data.frame(data)) {
# have data a TIMESTAMP variable?
if(is.null(data$TIMESTAMP)) {
stop('TIMESTAMP variable is missing in the data provided')
}
# is all TIMESTAMP NAs?
if (all(is.na(data$TIMESTAMP))) {
if (verbose) {warning('WARNING: TIMESTAMP is all NAs')}
return(invisible(FALSE))
}
# Check TIMESTAMP format
if (lubridate::is.POSIXt(data$TIMESTAMP)) {
if (verbose) {message('TIMESTAMP is in the correct format')}
return(invisible(TRUE))
} else {
if (verbose) {warning('WARNING: TIMESTAMP is NOT in the correct format')}
return(invisible(FALSE))
}
} else {
# STEP 2
# Vector
# is all vector NA?
if (all(is.na(data))) {
if (verbose) {warning('WARNING: TIMESTAMP is all NAs')}
return(invisible(FALSE))
}
if (lubridate::is.POSIXt(data)) {
if (verbose) {message('TIMESTAMP is in the correct format')}
return(invisible(TRUE))
} else {
if (verbose) {warning('WARNING: TIMESTAMP is NOT in the correct format')}
return(invisible(FALSE))
}
}
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger,
'qc_is_timestamp', sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger,
'qc_is_timestamp', sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger,
'qc_is_timestamp', sep = '.'))})
}
################################################################################
#' Timezones dictionary
#'
#' Tranforms timezone ISO code to character vector compatible with lubridate and
#' POSIXct
#'
#' GMT time zones are used, as they are day saving light time (DST) agnostic,
#' and in that way the DST can setted if the metadata says so. GMT are sign
#' exchanged to be compatible with ISO.
#'
#' @family Quality Checks Functions
#'
#' @param tz Character vector with the ISO code of the timezone as provided in
#' \code{env_time_zone} variable in \code{environmental_md}
#'
#' @return A character vector with the timezone code compatible with lubridate
#' and as.POSIXct
#'
#' @export
# START
# Function declaration
qc_get_timezone <- function(tz, parent_logger = 'test') {
# Using calling handlers to manage errors
withCallingHandlers({
# STEP 0
# Argument checking
if (is.na(tz) | is.null(tz)) {
stop('Timezone not provided in environmental metadata')
}
# STEP 1
# Create the list with the codes
timezones <- list(
"1UTC-12:00, Y" = "Etc/GMT+12",
"2UTC-11:00, X" = "Etc/GMT+11",
"3UTC-10:00, W" = "Etc/GMT+10",
"4UTC-09:30, V†" = "Pacific/Marquesas",
"5UTC-09:00, V" = "Etc/GMT+9",
"6UTC-08:00, U" = "Etc/GMT+8",
"7UTC-07:00, T" = "Etc/GMT+7",
"8UTC-06:00, S" = "Etc/GMT+6",
"9UTC-05:00, R" = "Etc/GMT+5",
"11UTC-04:00, Q" = "Etc/GMT+4",
"12UTC-03:30, P†" = "Canada/Newfoundland",
"13UTC-03:00, P" = "Etc/GMT+3",
"14UTC-02:00, O" = "Etc/GMT+2",
"15UTC-01:00, N" = "Etc/GMT+1",
"16UTC±00:00, Z" = "Etc/GMT+0",
"17UTC+01:00, A" = "Etc/GMT-1",
"18UTC+02:00, B" = "Etc/GMT-2",
"19UTC+03:00, C" = "Etc/GMT-3",
"20UTC+03:30, C†" = "Asia/Tehran",
"21UTC+04:00, D" = "Etc/GMT-4",
"22UTC+04:30, D†" = "Asia/Kabul",
"23UTC+05:00, E" = "Etc/GMT-5",
"24UTC+05:30, E†" = "Asia/Kolkata",
"25UTC+05:45, E*" = "Asia/Katmandu",
"26UTC+06:00, F" = "Etc/GMT-6",
"27UTC+06:30, F†" = "Indian/Cocos",
"28UTC+07:00, G" = "Etc/GMT-7",
"29UTC+08:00, H" = "Etc/GMT-8",
"30UTC+08:30, H†" = "Asia/Pyongyang",
"31UTC+08:45, H*" = "Australia/Eucla",
"32UTC+09:00, I" = "Etc/GMT-9",
"33UTC+09:30, I†" = "Australia/Adelaide",
"34UTC+10:00, K" = "Etc/GMT-10",
"35UTC+10:30, K†" = "Australia/Lord_Howe",
"36UTC+11:00, L" = "Etc/GMT-11",
"37UTC+12:00, M" = "Etc/GMT-12",
"38UTC+12:45, M*" = "Pacific/Chatham",
"39UTC+13:00, M†" = "Etc/GMT-13",
"40UTC+14:00, M†" = "Etc/GMT-14"
)
# STEP 2
# Return the timezone name compatible with lubridate
return(timezones[[as.character(tz)]])
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger,
'qc_get_timezone', sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger,
'qc_get_timezone', sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger,
'qc_get_timezone', sep = '.'))})
}
################################################################################
#' Set the timezone of the TIMESTAMP
#'
#' Brute force convert of timezone
#'
#' When reading data from xlsx or csv, TIMESTAMP is readed as POSIXct and by
#' default the timezone is UTC. With this function timezone can be changed
#' without change the TIMESTAMP. This is made with the \code{force_tz} function
#' of the lubridate package.
#'
#' @family Quality Checks Functions
#'
#' @param data Data frame with the TIMESTAMP variable to set, or a POSIXct
#' vector.
#'
#' @param tz Character vector with the compatible name of the timezone, as the
#' one provided by the \code{\link{qc_get_timezone}} function.
#'
#' @return A data frame as the \code{data} provided, with the TIMESTAMP variable
#' associated to the timezone specified
#'
#' @export
# START
# Function declaration
qc_set_timezone <- function(data, tz, parent_logger = 'test') {
# Using calling handlers to manage errors
withCallingHandlers({
# STEP 0
# Argument checks
# is data a data frame?
if (!is.data.frame(data) & !is.vector(data) & class(data)[1] != 'POSIXct') {
stop('data is not a data frame or a POSIX vector')
}
# STEP 1
# Data frame
if (is.data.frame(data)) {
# 1.1 has data a TIMESTAMP variable
if (is.null(data$TIMESTAMP)) {
stop('data has not a TIMESTAMP variable')
}
# 1.2 Force the timezone
data$TIMESTAMP <- lubridate::force_tz(data$TIMESTAMP, tz)
# 1.3 Return the data
return(data)
} else {
# STEP 2
# Vector
# 2.1 force the timezone
data <- lubridate::force_tz(data, tz)
# 2.2 return the results
return(data)
}
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger,
'qc_set_timezone', sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger,
'qc_set_timezone', sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger,
'qc_set_timezone', sep = '.'))})
}
################################################################################
#' Convert known bad formats to correct TIMESTAMP format and set timezone
#'
#' Converting known bad TIMESTAMP formats to POSIXt and setting the correct
#' timezone
#'
#' When loading data from csv files, depending on the office version and
#' workflow to introduce TIMESTAMP and data, TIMESTAMP can result in the wrong
#' format (lost of seconds, for example). This function checks for known
#' formatting errors and try to fix them.
#'
#' @section Timezone:
#' This function also set the timezone attribute to the POSIXt TIMESTAMP.
#' It uses \code{\link{qc_get_timezone}} and \code{\link{qc_set_timezone}}
#' functions internally to get the timezone from the \code{env_time_zone}
#' variable, transforming it to a compatible timezone name and set it as a
#' POSIXt attribute.
#'
#' @family Data Loading Functions
#'
#' @param data Data frame containing TIMESTAMP variable. Also it can be a vector
#' with TIMESTAMP values
#'
#' @param env_md Data frame containing the environmental metadata, in order
#' to obtain the timezone information
#'
#' @return An object of the same type of input (data frame or vector) with the
#' fixed values of TIMESTAMP. If TIMESTAMP is already in format, a message
#' appears and none fix is made, returning data as entered.
#' If TIMESTAMP can not be fixed, an error is raised.
#'
#' @export
# START
# Function declaration
qc_as_timestamp <- function(data, env_md, parent_logger = 'test') {
# Using calling handlers to logging
withCallingHandlers({
# STEP 0
# Argument checking
# Data is a vector or a data frame
if (!is.data.frame(data) & !is.vector(data) & class(data)[1] != 'POSIXct') {
stop('Data is not a data frame or a vector')
}
# STEP 1
# Data frame
if (is.data.frame(data)) {
# Data contains a TIMESTAMP variable?
if (is.null(data$TIMESTAMP)) {
stop('Data have no TIMESTAMP variable')
}
timestamp <- data$TIMESTAMP
timezone <- qc_get_timezone(env_md$env_time_zone,
parent_logger = parent_logger)
# 1.1 if already in format, inform and return the data unaltered
if (qc_is_timestamp(data, verbose = FALSE,
parent_logger = parent_logger)) {
# 1.1.1 Set the timezone
res <- qc_set_timezone(data, timezone, parent_logger = parent_logger)
message(paste('TIMESTAMP already in format. Timezone set to ',
timezone, sep = ''))
return(res)
} else {
# 1.2 If not in format, try to fix it using the known bad formats
res <- lubridate::parse_date_time(
timestamp,
# orders, now we dont need to indicate no seconds, we can use truncate
c('dmY HMS', 'Ymd HMS'), truncated = 1,
tz = timezone
)
}
# 1.3 Check if the fix worked. If yes, message and return data
# with the new TIMESTAMP
if (qc_is_timestamp(res, verbose = FALSE,
parent_logger = parent_logger)) {
message('TIMESTAMP succesfully fixed. A sample: ', res[1])
data$TIMESTAMP <- res
return(data)
} else {
stop('Unable to format correctly the TIMESTAMP, please ',
'revise manually.')
}
} else {
# STEP 2
# Vector
# 2.1 If already in format, inform and return the data unaltered
timezone <- qc_get_timezone(env_md$env_time_zone,
parent_logger = parent_logger)
if (qc_is_timestamp(data, verbose = FALSE)) {
# 2.2 set the timezone
res <- qc_set_timezone(data, timezone, parent_logger = parent_logger)
message(paste('TIMESTAMP already in format. Timezone set to ',
timezone, sep = ''))
return(res)
} else {
# 2.3 If not in format, try to fix it using the known bad formats
res <- lubridate::parse_date_time(
data,
# orders, now we dont need to indicate no seconds, we can use truncate
c('dmY HMS', 'Ymd HMS'), truncated = 1,
tz = timezone
)
}
# 1.3 Check if the fix worked. If yes, message and return data
# with the new TIMESTAMP
if (qc_is_timestamp(res, verbose = FALSE, parent_logger = parent_logger)) {
message('TIMESTAMP succesfully fixed. A sample: ', res[1])
return(res)
} else {
stop('Unable to format correctly the TIMESTAMP, please ',
'revise manually.')
}
}
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger, 'qc_as_timestamp', sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger, 'qc_as_timestamp', sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger, 'qc_as_timestamp', sep = '.'))})
}
################################################################################
#' Fix TIMESTAMP formats
#'
#' Wrapper for \code{\link{qc_is_timestamp}} and \code{\link{qc_as_timestamp}}
#'
#' This function uses \code{\link{qc_is_timestamp}} and
#' \code{\link{qc_as_timestamp}} internally to check if the format is correct
#' and if not, try one of the known fixes to convert it in the correct
#' TIMESTAMP
#'
#' @family Quality Checks Functions
#'
#' @param data Data frame containing the TIMESTAMP variable. Also it can be a
#' vector with the TIMESTAMPS values.
#'
#' @param env_md Data frame containing the env_time_zone variable to retrieve
#' the timezone name
#'
#' @return An object of the same type of input (data frame or vector) with the
#' fixed values of TIMESTAMP. If TIMESTAMP is already in format, only timezone
#' is set, returning data as entered.
#' If TIMESTAMP can not be fixed, an error is raised.
#'
#' @export
# START
# Function declaration
qc_fix_timestamp <- function(data, env_md, parent_logger = 'test') {
# Using calling handlers to manage errors
withCallingHandlers({
# STEP 1
# Check if TIMESTAMP is correct and set the timezone
res <- qc_as_timestamp(data, env_md, parent_logger = parent_logger)
# STEP 2
# Return the result
return(res)
# if (qc_is_timestamp(data)) {
#
# # 1.1 If correct, set the timezone
# return(qc_as_timestamp(data, env_md))
# } else {
#
# # STEP 2
# # If not correct, fix it
# res <- qc_as_timestamp(data, env_md)
#
# # 2.1 and return it
# return(res)
# }
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger, 'qc_fix_timestamp', sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger, 'qc_fix_timestamp', sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger, 'qc_fix_timestamp', sep = '.'))})
}
################################################################################
#' Checking for NAs in the TIMESTAMP
#'
#' Simple function to check for NAs in the TIMESTAMP.
#'
#' NAs in TIMESTAMP generates problems in the further steps of QC, a function
#' checking for NAs and info about the location is needed in order to be
#' able to fix it
#'
#' @family Quality Checks Functions
#'
#' @param data Data frame containing the TIMESTAMP variable (sapflow or
#' environmental data)
#'
#' @return A data frame with the NAs info
#'
#' @export
# START
# Function declaration
qc_timestamp_nas <- function(data, parent_logger = 'test') {
# Using calling handlers to manage errors
withCallingHandlers({
# STEP 0
# Argument checks
if (!is.data.frame(data)) {
stop('Data provided is not a data frame')
}
if (is.null(data$TIMESTAMP)) {
stop('Data provided has not a TIMESTAMP variable')
}
# STEP 1
# Retrieving info about NAs in TIMESTAMP
if (!any(is.na(data$TIMESTAMP))) {
# 1.1 If no NAs, return TRUE
return(invisible(TRUE))
} else {
# 1.2 If NAs, return the NAs
res_df <- data %>%
dplyr::mutate(row_number = row.names(data)) %>%
dplyr::filter(is.na(TIMESTAMP))
# STEP 2
# Return the res_df
return(res_df)
}
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger,
'qc_timestamp_nas', sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger,
'qc_timestamp_nas', sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger,
'qc_timestamp_nas', sep = '.'))})
}
################################################################################
#' Helper function to get metadata timestep (environmental and sapflow).
#'
#' Get timestep value
#'
#' \code{\link{qc_timestamp_errors}} needs a value of timestep to be able to
#' identify deviations from the expected interval of time. This function can
#' extract the timestep for environmental or sapflow data.
#'
#' @family Quality Checks Functions
#'
#' @param metadata Data frame containing the timestep variable (pl_sens_timestep
#' or env_timestep)
#'
#' @return The time step value as numeric
#'
#' @export
# START
# Function declaration
qc_get_timestep <- function(metadata, parent_logger = 'test') {
# Using calling handlers to manage errors
withCallingHandlers({
# Helper function to check if all elements of a vector are equal, borrowed
# from H. Wickham in
# http://stackoverflow.com/questions/4752275/test-for-equality-among-all-elements-of-a-single-vector
zero_range <- function(x, tol = .Machine$double.eps ^ 0.5) {
# If length of the vector is 1, results is TRUE
if (length(x) == 1) {
return(TRUE)
}
# If not,
x <- range(x) / mean(x)
isTRUE(all.equal(x[1], x[2], tolerance = tol))
}
# STEP 0
# Argument checks
# is metadata a data frame?
if (!is.data.frame(metadata)) {
stop('metadata provided is not a data frame')
}
# we need pl_sens_timestemp or env_timestep variables
if (!any(c('pl_sens_timestep', 'env_timestep') %in% names(metadata))) {
# if (is.null(metadata$pl_sens_timestep) & is.null(metadata$env_timestep)) {
stop('Not timestep variables found in metadata provided')
}
# STEP 1
# Guess which variable is needed
# 1.1 env_timestep
if ('env_timestep' %in% names(metadata)) {
# if (is.null(metadata$pl_sens_timestep) & !is.null(metadata$env_timestep)) {
# check if all timestep values (in the case of pl_sens_timestep) are the same
timestep <- metadata$env_timestep
if (!zero_range(timestep)) {
stop('There are diferent timesteps in the metadata, please check manually')
} else {
return(timestep[[1]])
}
} else {
# check if all timestep values (in the case of pl_sens_timestep) are the same
timestep <- metadata$pl_sens_timestep
if (!zero_range(timestep)) {
stop('There are diferent timesteps in the metadata, please check manually')
} else {
return(timestep[[1]])
}
}
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger, 'qc_get_timestep', sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger, 'qc_get_timestep', sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger, 'qc_get_timestep', sep = '.'))})
}
################################################################################
#' TimeStamp errors localization
#'
#' Function to pinpoint errors in the timestamp
#'
#' \code{TIMESTAMP} variable can present continuity, date or another kind of
#' errors. This function checks for them. For that, from the timestep declared
#' in the metadata, a summary of intervals differing from it is
#' presented. Only intervals differing more than 59 seconds from the declared
#' timestep are reported.
#'
#' @family Quality Checks Functions
#'
#' @param data Data frame containing the data (sapflow or environmental). It
#' must have a \code{TIMESTAMP} variable.
#'
#' @param timestep Integer indicating the measures timestep (in minutes).
#'
#' @return A data frame summarizing the errors found, indicating the interval
#' and its duration (in seconds).
#'
#' @export
# START
# Function declaration
qc_timestamp_errors <- function(data, timestep = 15,
parent_logger = 'test') {
# Using calling handlers to logging
withCallingHandlers({
# STEP 0
# Arguments checking
# is data a data frame?
if(!is.data.frame(data)) {
stop('Data provided is not a data frame')
}
# have data a TIMESTAMP variable?
if(is.null(data$TIMESTAMP)) {
stop('TIMESTAMP variable is missing in the data provided')
}
# is timestep numeric?
if(!is.numeric(timestep)) {
stop('Provided timestep is not numeric')
}
# STEP 1
# Initiate required values
# 1.1 length in seconds of the expected interval
length_seconds <- timestep * 60
# STEP 2
# Create the results object
res <- dplyr::data_frame(Interval = lubridate::int_diff(data$TIMESTAMP),
Int_length = lubridate::int_length(Interval)) %>%
# step neede to maintain the interval format
dplyr::mutate(Interval = as.character(Interval)) %>%
# drop the length values equal to the timestep plus/minus 59 seconds
dplyr::filter(Int_length > (length_seconds + 59) | Int_length < (length_seconds - 59))
# STEP 3
# Return the results
return(res)
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger, 'qc_timestamp_errors', sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger, 'qc_timestamp_errors', sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger, 'qc_timestamp_errors', sep = '.'))})
}
################################################################################
#' Getting the \eqn{t_0} and \eqn{t_f} for trees or environmental data
#'
#' Summary of \eqn{t_0} and \eqn{t_f} for each tree or for each environmental
#' variable
#'
#' @family Quality Checks Functions
#'
#' @param data Data frame containing the sapflow values for each tree or the
#' environmental variables values. A \code{TIMESTAMP} variable must be present
#' in the dataset
#'
#' @return A data frame summarising the time intervals for each object(trees or
#' environmental variables)
#'
#' @export
# START
# Function declaration
qc_time_interval <- function(data, parent_logger = 'test') {
# Using calling handlers to manage errors
withCallingHandlers({
# STEP 0
# Argument checks
# Is data a data frame?
if (!is.data.frame(data)) {
stop('data provided is not a data frame')
}
# Is there a TIMESTAMP variable in data?
if (is.null(data$TIMESTAMP)) {
stop('data has not a TIMESTAMP variable')
}
# STEP 1
# Initialise the empty results object
res <- data.frame(
Object = 'Total',
t0 = data$TIMESTAMP[1],
tf = data$TIMESTAMP[length(data$TIMESTAMP)],
stringsAsFactors = FALSE
)
# STEP 2
# For loop to iterate each object and obtain the t0 and the tf
for (var in names(data)[-1]) {
# 2.0 create a standard eval object to allow quoted vars in the filter
# step
dots <- paste('!is.na(', var, ')', sep = '')
res <- dplyr::bind_rows(
res,
{data %>%
dplyr::select_('TIMESTAMP', var) %>%
# 2.1 Filter to avoid NAs
dplyr::filter_(.dots = dots) %>%
# 2.2 Summarise to obtain the first and last value od TIMESTAMP
dplyr::summarise(t0 = first(TIMESTAMP),
tf = last(TIMESTAMP)) %>%
# 2.3 Add the object name
dplyr::mutate(Object = var) %>%
# 2.4 Reorder variables
dplyr::select(Object, t0, tf)
}
)
}
# STEP 3
# Return the res object
return(res)
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger, 'qc_time_interval', sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger, 'qc_time_interval', sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger, 'qc_time_interval', sep = '.'))})
}
################################################################################
#' Check concordance between sapflow TIMESTAMP and env TIMESTAMP
#'
#' Are the sapflow and environmental TIMESTAMPS in concordance?
#'
#' This function uses \code{\link{qc_time_interval}} internally to perform
#' operations with time intervals. There is two ways of using this function:
#' \enumerate{
#' \item Directly from sapflow and environmental data, providing both datasets
#' \item From the results obtained from \code{\link{qc_time_interval}},
#' providing both data frames with the results.
#' }
#'
#' @family Quality Checks Functions
#'
#' @param sapf_data Data frame containing the sapflow data and its TIMESTAMP
#'
#' @param env_data Data frame containing the environmental data and its
#' TIMESTAMP
#'
#' @param sapf_intervals Data frame obtained from \code{\link{qc_time_interval}},
#' optional. It can be used if no sapf and env data is provided
#'
#' @param env_intervals Data frame obtained from \code{\link{qc_time_interval}},
#' optional. It can be used if no sapf and env data is provided
#'
#' @param plot Logical indicating if the result is presented in graphical mode
#' (ggplot2 object). Desfault to TRUE.
#'
#' @return A data frame summarising the results
#'
#' @export
# START
# Function declaration
qc_timestamp_concordance <- function(sapf_data = NULL, env_data = NULL,
sapf_intervals = NULL, env_intervals = NULL,
plot = FALSE, parent_logger = 'test') {
# Using calling handlers to manage errors
withCallingHandlers({
# STEP 0
# Arguments check
# All NULL??
if (all(is.null(sapf_data), is.null(env_data),
is.null(sapf_intervals), is.null(env_intervals))) {
stop('No data provided')
}
# STEP 1
# Raw data or intervals data?
# 1.1 Raw data
if (all(!is.null(sapf_data), !is.null(env_data))) {
# 1.1.1 generate the intervals
sapf_intervals <- qc_time_interval(sapf_data, parent_logger = parent_logger)
env_intervals <- qc_time_interval(env_data, parent_logger = parent_logger)
}
# 1.2 Intervals data (now all are intervals)
sapf_intervals$Object[1] <- 'Total_sapf'
env_intervals$Object[1] <- 'Total_env'
intervals_data <- dplyr::bind_rows(sapf_intervals, env_intervals)
# STEP 2
# Plot?
if (plot) {
intervals_plot <- intervals_data %>%
tidyr::gather(Time_point, Value, -Object) %>%
dplyr::mutate(Object = factor(Object, levels = rev(unique(Object)))) %>%
dplyr::group_by(Object, Time_point) %>%
ggplot(aes(x = Value, y = Object, colour = Object)) +
geom_line(size = 2) +
scale_colour_manual(values = c(rep('darkgreen',
length(env_intervals$Object)),
rep('steelblue',
length(sapf_intervals$Object)))) +
scale_x_datetime(date_breaks = '1 month') +
theme(legend.position = 'none')
# 2.1 return the plot
return(intervals_plot)
# 2.2 No plot
} else {
# 2.2.1 return the info data frame
return(intervals_data)
}
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger,
'qc_timestamp_concordance',
sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger,
'qc_timestamp_concordance',
sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger,
'qc_timestamp_concordance',
sep = '.'))})
}
################################################################################
#' Check for correct values of SWC
#'
#' This function informs about SWC values correctness
#'
#' SWC values sometimes come in percentage instead of cm3/cm3, so this function
#' checks three assumptions:
#'
#' \enumerate{
#' \item{All values between 0-2. This situation is ok (it can contain out of range
#' values, but in the same order of magnitude)}
#' \item{All values between 2-100. This situation occurs usually when values
#' are presented in percentage. they can be safely transformed dividing by 100}
#' \item{None of the former. This is odd, so big red lights and check manually.}
#' }
#'
#' @family Quality Checks Functions
#'
#' @param swc_values vector with the swc data
#'
#' @return A character indicating the assumption detected in the data
#'
#' @export
# START FUNCTION
# Function declaration
qc_swc_check <- function(swc_values, parent_logger = 'test') {
# Using calling handlers to manage errors
withCallingHandlers({
# STEP 0
# Argument checks
if (is.null(swc_values)) {
return('PASS')
}
# STEP 1
# 1.1 remove NAs
swc_values <- swc_values[!is.na(swc_values)]
# 1.2 Assumptions
if (all(swc_values >= 0 & swc_values < 2)) {
return('PASS')
} else {
if (all(swc_values >= 2 & swc_values <= 100)) {
return('WARNING')
} else {
return('ERROR')
}
}
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger,
'qc_swc_check',
sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger,
'qc_swc_check',
sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger,
'qc_swc_check',
sep = '.'))})
}
################################################################################
#' Fix the swc values (if possible)
#'
#' This function divides by 100 in the case the swc provided by the contributor
#' is in percentage
#'
#' To detect if the conversion is needed and/or possible,
#' \code{\link{qc_swc_check}} is used internally
#'
#' @family Quality Checks Functions
#'
#' @param env_data A dataframe with the environmental data
#'
#' @return A dataframe with the environmental data transformed if needed/possible.
#' If not, the same input dataframe.
#'
#' @export
# START FUNCTION
# Function declaration
qc_swc_fix <- function(env_data, parent_logger = 'test') {
# using calling handlers to manage errors
withCallingHandlers({
# STEP 0
# Check arguments
if (!is.data.frame(env_data)) {
stop('env_data provided is not a data.frame')
}
# STEP 1
# 1.1 Check shallow
if (qc_swc_check(env_data[['swc_shallow']]) == 'WARNING') {
env_data[['swc_shallow']] <- env_data[['swc_shallow']] / 100
}
# 1.2 Check deep
if (qc_swc_check(env_data[['swc_deep']]) == 'WARNING') {
env_data[['swc_deep']] <- env_data[['swc_deep']] / 100
}
# STEP 2
# Return the res
return(env_data)
# END FUNCTION
},
# handlers
warning = function(w){logging::logwarn(w$message,
logger = paste(parent_logger,
'qc_swc_fix',
sep = '.'))},
error = function(e){logging::logerror(e$message,
logger = paste(parent_logger,
'qc_swc_fix',
sep = '.'))},
message = function(m){logging::loginfo(m$message,
logger = paste(parent_logger,
'qc_swc_fix',
sep = '.'))})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.