#' Testing and preparing input data
#'
#' @param data A \code{data.frame} in either timestamp format or doy format.
#' @param tz Character string, indicates the time zone in which the measurements have been recorded.
#' @param tz.force Logical; if \code{TRUE}, the time zone is forced to "UTC" or re-labelled yet not shifted in time. "UTC" is required for internal processing (default = \code{FALSE}).
#' @param time.format Character string, indicates the format of the timestamp.
#' @param solar.time Logical; if \code{TRUE}, time is converted to solar time,
#' depending upon the location where the measurements have been taken.
#' If \code{FALSE}, the output is provided in "UTC" (default = \code{FALSE}).
#' @param long.deg Numeric, longitude in decimal degrees East to perform the solar time
#' conversion. Only required when \code{solar.time=TRUE}.
#' @param ref.add Logical; if \code{TRUE}, additional probes provided within data
#' are considered. The \eqn{\Delta T} values are then corrected by subtracting
#' the \eqn{\Delta T} measured between the reference probes from the \eqn{\Delta T}
#' measured between the heated and unheated probe (default = \code{FALSE}).
#' @param df Logical; if \code{TRUE}, output is provided in a \code{data.frame} format with
#' a timestamp and a value (\eqn{\Delta T} or \eqn{\Delta V}) column.
#' If \code{FALSE}, output is provided as a \code{zoo} object (default = \code{FALSE}).
#'
#'
#' @description Tests if the structure of the input matches the requirements of \code{TREX} functions
#' and specifies the time zone. The input has to be presented in one of two different \code{data.frame} formats.
#' i) Timestamp format: including a 1) timestamp of the measurements column (\code{character}), and
#' 2) value of \eqn{\Delta V} (or \eqn{\Delta T}; [\code{as.numeric}]). ii) DOY format: including a 1) year of measurements column \code{as.integer},
#' 2) day of the year (DOY) of measurement (\code{as.integer}), 3) hour of the measurement (\code{character}), and
#' 4) value of \eqn{\Delta V} (or \eqn{\Delta T}; [\code{as.numeric}]). TREX functions are applied on time series obtained from a set of
#' thermal dissipation probes. This includes the option where the thermal dissipation method (TDM) is
#' used with only a reference and heating probe, or when including addition reference probes (see \code{ref.add}).
#' These reference probe measurements can be added to the DOY or timestamp format in \eqn{\Delta V} (or \eqn{\Delta T}) (\code{as.numeric})
#' labelled \code{ref1}, \code{ref2}, etc. (depending on the number of reference probes). For this function the following
#' column names have to be present within the \code{data.frame}: "timestamp" or "year" & "doy" & "hour" = indicators
#' of time and "value" = TDM measurements (option "ref1", "ref2, ..., refn = reference probes).
#' After specifying the time zone (\code{tz}), one can select whether to standardize the temporal series to solar
#' time (see \code{solar.time}) by providing the longitude in decimal degrees at which the measurements were
#' obtained (in \code{long.deg}). All timestamps within the function are rounded to minute resolution and output can
#' be either provided in a \code{zoo} format (df = \code{FALSE}) or \code{data.frame} (\code{df = TRUE}; default is \code{FALSE}).
#' \strong{Note, that the output time series is always given in \code{UTC} time zone.}
#'
#' @usage is.trex(data, tz = 'UTC', tz.force = FALSE, time.format = '\%m/\%d/\%y \%H:\%M:\%S',
#' solar.time = TRUE, long.deg = 7.7459,
#' ref.add = FALSE, df = FALSE)
#'
#'
#'
#' @details To prevent errors occurring in subsequent \code{TREX} functions, it is advised to run this function
#' for checking the data structure and preparing it for further analyses. For the specific time zone see
#' \url{https://en.wikipedia.org/wiki/List_of_tz_database_time_zones} or for formatting see \code{\link{OlsonNames}()}.
#' The format of the timestamp has to be provided according to \url{https://www.stat.berkeley.edu/~s133/dates.html}.
#' For the method behind the solar time conversion, see the solar package (\url{https://CRAN.R-project.org/package=solaR}).
#' The longitude has to be provided in positive decimal degrees for study sites East from the Greenwich meridian and negative for sites to the West.
#'
#' @return A zoo object or data.frame in the appropriate format for other functionalities.
#' @export
#'
#' @examples
#' \dontrun{
#' #validating and structuring example data
#' raw <- example.data(type="doy")
#' input <- is.trex(raw,tz="GMT",time.format="%H:%M",
#' solar.time=TRUE,long.deg=7.7459,
#' ref.add=FALSE,df=FALSE)
#' head(raw)
#' str(input)
#' head(input)
#' plot(input)
#' }
is.trex <-
function(data,
tz = "UTC",
tz.force=FALSE,
time.format = "%m/%d/%y %H:%M:%S",
solar.time = TRUE,
long.deg = 7.7459,
ref.add = FALSE,
df = FALSE) {
#t
#data<-readRDS("D:/Documents/GU - POSTDOC/02_communication/Issues - Chris/mV.rds")
#time.format<-"%Y-%m-%d %H:%M:%S"
#solar.time=F
#df=F
#tz="EST"
#tz.force=F
# helpers
left <- function(string, char){
substr(string, 1,char)}
right <- function (string, char){
substr(string,nchar(string)-(char-1),nchar(string))
}
#data<-input
#tz="Etc/GMT-1"
#time.format="%Y-%m-%d %H:%M:%S"
#ref.add=FALSE
#df=FALSE
#solar.time=F
#ref.add=F
#type="timestamp"
#t= test
#data= example.data(type="timestamp")
#tz= "GMT"
#time.format="%H:%M"
#solar.time=TRUE
#ref.add=FALSE
#long.deg=7.7459
#df= FALSE
#d= default conditions
if (missing(tz)) {
tz = "GMT"
warning("No timezone specified : Using default setting (= GMT/UTC)")
}
if (missing(tz.force)) {
tz.force =F
}
if (missing(ref.add)) {
ref.add = F
}
if (missing(time.format)) {
time.format = "%d-%m-%Y %H:%M"
warning("No time.format specified : Using default setting (= %d-%m-%Y %H:%M)")
}
if (missing(solar.time)) {
solar.time = F
}
if (missing(df)) {
df = F
}
#e= errors
if (length(which(tz %in% base::OlsonNames())) == 0)
stop("Unused argument, please use a valid time zone.")
if (tz.force != T &
tz.force != F)
stop("Unused argument, tz.force needs to be TRUE|FALSE.")
if (tz.force == T &
solar.time == T)
stop("Unused argument, tz.force and solar.time cannot both be TRUE.")
if (solar.time != T &
solar.time != F)
stop("Unused argument, solar.time needs to be TRUE|FALSE.")
if (ref.add != T &
ref.add != F)
stop("Unused argument, ref.add needs to be TRUE|FALSE.")
if (df != T &
df != F)
stop("Unused argument, df needs to be TRUE|FALSE.")
if (length(which(c("timestamp", "doy") %in% colnames(data))) == 0)
stop("Incorrect data format, no doy|timestamp column present.")
type = NA
if (length(which("timestamp" %in% colnames(data))) == 0) {
type = "doy"
if (length(which("year" %in% colnames(data))) == 0)
stop("Incorrect data format, no year column present.")
if (length(which("hour" %in% colnames(data))) == 0)
stop("Incorrect data format, no hour column present.")
if (length(which("value" %in% colnames(data))) == 0)
stop("Incorrect data format, no value column present.")
} else{
type = "timestamp"
if (length(which("value" %in% colnames(data))) == 0)
stop("Incorrect data format, no value column present.")
if (length(which(nchar(as.character(data$timestamp)) > 21)) != 0) {
print(which(nchar(as.character(
data$timestamp
)) > 21))
stop("Incorrect data format, timestamp has too many characters in the above given lines.")
}
}
if (ref.add == T) {
if (length(which("ref" %in% substr(colnames(data), 1, 3))) == 0)
stop("Incorrect input data, no additional reference probes present.")
}
if (is.na(base::suppressWarnings(base::mean(c(data$value), na.rm = TRUE))) ==
T)
stop("Incorrect data format, value is not numeric.")
if (solar.time == T &
missing(long.deg) == T)
stop("Missing argument, solar.time needs long.deg in decimal degrees E.")
if (solar.time == T) {
if (is.numeric(long.deg) == FALSE |
long.deg > 180 |
long.deg < -180)
stop("Unused argument, long.deg needs to be numeric and between -180:180.")
}
#c= conversions
time.format.orig <- NA
if (type == "timestamp") {
data$timestamp <- as.character(data$timestamp)
time.format.orig <- time.format
}
if (type == "doy") {
#e
if (is.integer(data$doy) == F)
stop("Incorrect data format, doy is not integer.")
if (is.integer(data$year) == F)
stop("Incorrect data format, year is not integer.")
data$hour <- as.character(data$hour)
if ((min(nchar(data$hour)) > 3 &
max(nchar(data$hour)) < 6) == F) {
"Incorrect data format, invalid hour column."
}
if (min(nchar(data$hour)) == 4) {
data[which(nchar(data$hour) == 4), "hour"] <-
base::paste0("0", data[which(nchar(data$hour) == 4), "hour"])
}
timestamp <-
paste(base::as.Date(data$doy - 1, origin = paste0(data$year, "-01-01")),
data$hour,
sep = " ")
time.format.orig <- paste0("%Y-%m-%d ", time.format)
data$timestamp <- timestamp
data <- data[, -which(colnames(data) %in% c("year", "doy", "hour"))]
}
#w= warnings
if (length(which(base::duplicated(data$timestamp) == T)) != 0) {
warning("Double timestamp present, daylight saving could be present within the timestamp.")
}
#p= process
if (solar.time == T) {
timestamp <-
suppressWarnings(solaR::local2Solar(
base::as.POSIXct(
as.character(data$timestamp),
format = time.format.orig,
tz = tz
),
lon = long.deg
))
} else{
timestamp <-
base::as.POSIXct(
as.character(data$timestamp),
format = time.format.orig,
tz = tz
)
timestamp_orig<-timestamp
timestamp_test<-format(timestamp_orig,tz="UTC",usetz=FALSE)
timestamp_test2<-format(timestamp_orig,tz=tz,usetz=FALSE)
timestamp<-format(timestamp_orig,tz="UTC",usetz=TRUE)
if(tz.force==T){
hours_shift<-as.numeric(difftime(timestamp_test2[1],timestamp_test[1]))
timestamp<-base::as.POSIXct(timestamp)+(hours_shift*60*60)
}
}
#e
if (as.character(timestamp[1]) == "(NA NA)" |
is.na(timestamp[1]) == T)
stop("No timestamp present, time.format is likely incorrect.")
if (length(which(base::duplicated(timestamp) == T)) != 0) {
print(data[which(base::duplicated(timestamp) == T), ])
stop(
"Double timestamp present, either due to errors in the timestamp or issues with daylight saving (change tz)."
)
}
#p
if (ref.add == T) {
if (length(which(left(colnames(data), 3) == "ref")) == 1) {
value <- data$value - (data[, which(left(colnames(data), 3) == "ref")])
}
if (length(which(left(colnames(data), 3) == "ref")) > 1) {
value <-
data$value - (base::rowMeans(data[, which(left(colnames(data), 3) == "ref")]))
}
if (length(which(left(colnames(data), 3) == "ref")) == 0) {
stop("No reference probe measurements, missing ref1, ref2, refn columns.")
}
data <- data.frame(timestamp = data$timestamp, value = value)
}
#p
if (length(unique(timestamp)) == length(unique(left(timestamp,16)))) {
output.data <-
zoo::zoo(data$value, order.by = base::as.POSIXct(paste0(left(timestamp,16),":00"),tz="UTC"))
} else{
agg <-
stats::aggregate(output.data, by = base::as.POSIXct(paste0(left(timestamp,16),":00"),tz="UTC"), mean)
output.data <- zoo::zoo(agg, order.by = base::as.POSIXct(zoo::index(agg),tz="UTC"))
}
#o= output
if (df == T) {
output.data <-
data.frame(timestamp = as.character(zoo::index(output.data)),
value = as.numeric(as.character(output.data)))
output.data$timestamp <- as.character(output.data$timestamp)
output.data$value <- as.numeric(output.data$value)
}
return(output.data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.