## =============================================================================
#' Set Up Folder Structure
#'
#' Folder structure recommended for eddy covariance data processing and
#' archivation.
#'
#' The purpose is to standardize the locations for metadata and post-processing
#' inputs required to run the proposed workflow as well as to store data and
#' metadata in levels corresponding to processing stage. The folder structure is
#' not required to succesfully apply the workflow but simplifies its use.
#' Adapted from: openeddy - structure_dirs
#'
#' Data processing stages:
#' \itemize{
#' \item Level 0: Raw files with measured high frequency eddy covariance data
#' and relevant metadata or instrument setup files.
#' \item Level 1: Processing software setup and output files and a logbook.
#' \item Level 2: Quality checking results and documentation, definition of
#' ecosystem boundary, storage flux processing and files used as inputs for
#' Level 3 data.
#' \item Level 3: gap-filling output and its documentation, summary of the
#' computed fluxes and meteorological data including their aggregation.
#' }
#'
#' @param root A character string defining the root of created folder structure.
#' @param create_dirs A logical value. Indicates whether directories should be
#' created.
#' @param ... Further arguments to be passed to \code{dir.create} function.
#'
#' @return A named list with paths to folder structure directories.
#' Corresponding directories are created as a function side effect if
#' \code{create_dirs = TRUE} (default).
#'
#' @export
structure_dirs <- function(root = ".", create_dirs = FALSE, ...) {
# With dir.create(recursive = TRUE, ...) all paths not needed to create dirs
# but needed in order to make the dir accessible with path in the list
l <- list(
logbook = "/logbook/log.txt",
level0 = "/level_0/",
eddypro_setup = "/level_0/eddypro_setup/",
site_setup = "/level_0/site_setup/",
raw_data = "/level_0/raw_data/",
level1 = "/level_1/",
post_proc = "/level_1/post_processing/",
eddypro_output = "/level_1/post_processing/eddypro_output_data/",
combined_output = "/level_1/post_processing/combined_output_data/",
prelim_data = "/level_1/post_processing/prelim_data/",
prelim_reports = "/level_1/prelim_reports/",
qc_input = "/level_1/qc_input_data/",
footprint = "/level_1/footprint/",
fp_halfhour = "/level_1/footprint/halfhourly/",
fp_grid = "/level_1/footprint/grid/",
fp_average = "/level_1/footprint/average/",
level2 = "/level_2/",
quality_control = "/level_2/quality_control/",
precheck = "/level_2/quality_control/precheck/",
qc_output = "/level_2/quality_control/qc_output_data/",
qc_flags = "/level_2/quality_control/qc_flags/",
qc_reports = "/level_2/quality_control/qc_reports/",
fetch = "/level_2/fetch_filter/",
storage = "/level_2/storage_flux/",
gf_input = "/level_2/gf_input_data/",
level3 = "/level_3/",
gap_fill = "/level_3/gap_fill/",
gf_output = "/level_3/gap_fill/gf_output_data",
gf_plots = "/level_3/gap_fill/plots/",
gf_reports = "/level_3/gap_fill/gf_reports/",
ustar_filter = "/level_3/ustar_filter/",
summary = "/level_3/summary/",
summary_plots = "/level_3/summary/plots/")
l <- lapply(l, function(x) paste0(root, x))
if (create_dirs) invisible(lapply(l, dir.create, recursive = TRUE, ...))
l
}
## =============================================================================
#' Initialize processing logbook
#'
init_log <- function(path, site, year) {
file_path <- paste0(path, "/", site, "_", year, "_processing_log.txt")
if (file.exists(file_path)) {
stop(paste(
"A logbook already exists in this location.",
"To create a new logbook, the current logbook must be manually deleted."
), call. = FALSE)
}
init <- c(
paste("Logbook for", site, year, "dataset."),
paste("Created:", Sys.time()),
""
)
writeLines(init, file_name, sep = "\n")
}
## =============================================================================
#' Write to processing logbook
#'
#' @param script
#' @param output
#' @param docu
#' @param notes
#' @param path
write_log <- function(script, output, docu, notes, path = paths$logbook) {
entry <- c()
if (!missing(script)) {
entry <- c(
entry, paste0("Ran script: ", basename(script)),
"Full script path:", script
)
}
if (!missing(output)) {
# Add numbers to strings
for (i in 1:length(output)) {
output[i] <- paste0("[", i, "] ", output[i])
}
entry <- c(entry, "Output files created:", output)
}
if (!missing(docu)) {
# Add numbers to strings
for (i in 1:length(docu)) {
docu[i] <- paste0("[", i, "] ", docu[i])
}
entry <- c(entry, "With documentation:", docu)
}
if (!missing(notes)) {
entry <- c(entry, "Notes:", notes)
}
# Get logbook
log <- readLines(path)
# Amend logbook with new entry, save back in original location
writeLines(c(log, "", as.character(Sys.time()), entry), path, sep = "\n")
cat("Logged entry for ", as.character(Sys.time()), ".", sep = "")
}
## =============================================================================
#' (Internal) Find Rows or Columns with Only NAs Over Array Margins
#'
#' \code{all_na} returns a logical vector or array or list indicating whether
#' there are only \code{NA} values in selected margins and therefore e.g.
#' statistics like \code{max} or \code{min} do not produce useful results.
#'
#' @param x An array, including a matrix.
#' @param margin A vector giving the subscripts which the function will be
#' applied over. E.g., for a matrix \code{1} indicates rows, \code{2}
#' indicates columns, \code{c(1, 2)} indicates rows and columns. Where
#' \code{x} has named dimnames, it can be a character vector selecting
#' dimension names.
#'
#' @family NA handlers
#'
#' @seealso \code{\link{NA}} for general information about NAs and
#' \code{\link{apply}} for \code{apply} description.
#'
#' @examples
#' xx <- matrix(1:20, nrow = 4)
#' xx[2, ] <- NA
#' all_na(xx, 2) # All columns have at least one non-missing value
#' all_na(xx, 1) # Second row has all values missing
#' ## Skip the all_na row in apply()
#' apply(xx[!all_na(xx, 1), ], 1, max, na.rm = TRUE)
all_na <- function(x, margin) {
apply(x, margin, function(x) all(is.na(x)))
}
## =============================================================================
#' Object Attributes Varnames and Units
#'
#' \code{varnames} and \code{units} are useful attributes that can store
#' original variable names (\code{varnames}) and units of measurement
#' (\code{units}) of each column in a data frame or of an atomic type. These
#' attributes can be extracted or assigned by following functions.
#'
#' Functions check whether the extracted or assigned attributes contain elements
#' with \code{NULL}, \code{NA}, \code{""} values or if length of each element is
#' higher than \code{1}. In these cases, such elements are substituted with
#' \code{"-"}.
#'
#' @return
#' For \code{varnames} and \code{units}, a character vector.
#' For \code{varnames<-} and \code{units<-}, the updated object \code{x}.
#'
#' @param x A data frame or an atomic type.
#' @param names A logical value. Applies only in case of data frames. If
#' \code{TRUE}, attributes are extracted with corresponding column names.
#' @param value An atomic type that represents \code{varnames} or \code{units}.
#' The length must be \code{1} if \code{x} is an atomic type or equal to
#' number of columns in \code{x} if \code{x} is a data frame.
#'
varnames <- function(x, names = FALSE) {
if (is.data.frame(x)) {
varnames <- lapply(x, attr, "varnames")
varnames <- lapply(varnames, function(x) if (
is.null(x) || x %in% c("", NA) || (length(x) != 1))
"-" else as.character(x))
varnames <- unlist(varnames, use.names = names)
return(varnames)
} else if (is.atomic(x)) {
varnames <- attr(x, "varnames")
varnames <- dplyr::if_else(
is.null(varnames) || varnames %in% c("", NA) || (length(varnames != 1)),
"-",
as.character(varnames)
)
return(varnames)
} else stop("'x' must be a data frame or an atomic type.")
}
#' @rdname varnames
`varnames<-` <- function(x, value) {
check_input(value, "atomic")
if (is.data.frame(x)) {
len <- ncol(x)
if (len != length(value)) {
stop("Length of 'value' not equal to number of columns in 'x'.")
}
value <- lapply(value, function(x) if (
is.null(x) || x %in% c("", NA) || (length(x) != 1))
"-" else as.character(x))
value <- unlist(value, use.names = FALSE)
for (i in seq_len(len)) {
attr(x[, i], "varnames") <- value[i]
}
return(x)
} else if (is.atomic(x)) {
if (length(value) != 1) {
stop("length of 'value' must be 1 for atomic type 'x'")
}
value <- dplyr::if_else(
is.null(value) || value %in% c("", NA),
"-",
as.character(value)
)
attr(x, "varnames") <- value
return(x)
} else stop("'x' must be a data frame or an atomic type.")
}
#' @rdname varnames
units <- function(x, names = FALSE) {
if (is.data.frame(x)) {
units <- lapply(x, attr, "units")
units <- lapply(units, function(x) if (
is.null(x) || x %in% c("", NA) || (length(x) != 1))
"-" else as.character(x)
)
units <- unlist(units, use.names = names)
return(units)
} else if (is.atomic(x)) {
units <- attr(x, "units")
units <- dplyr::if_else(
is.null(units) || units %in% c("", NA) || (length(units) != 1),
"-",
as.character(units)
)
return(units)
} else stop("'x' must be a data frame or an atomic type.")
}
#' @rdname varnames
`units<-` <- function(x, value) {
check_input(value, "atomic")
if (is(x, "difftime")) {
base::units(x) <- value
return(x)
}
if (is.data.frame(x)) {
len <- ncol(x)
if (len != length(value)) {
stop("Length of 'value' not equal to number of columns in 'x'")
}
value <- lapply(value, function(x) if (
is.null(x) || x %in% c("", NA) || (length(x) != 1))
"-" else as.character(x))
value <- unlist(value, use.names = FALSE)
for (i in seq_len(len)) {
attr(x[, i], "units") <- value[i]
}
return(x)
} else if (is.atomic(x)) {
if (length(value) != 1) {
stop("Length of 'value' must be 1 for atomic type 'x'.")
}
value <- dplyr::if_else(
is.null(value) || value %in% c("", NA),
"-",
as.character(value)
)
attr(x, "units") <- value
return(x)
} else stop("'x' must be a data frame or an atomic type.")
}
## =============================================================================
#' Import Data with Units
#'
#' Reads tabular data from a file and represents them as data frame. Attributes
#' \code{varnames} (representing variable names) and \code{units} (representing
#' units of measurement or space efficient metadata) are assigned to each
#' column.
#'
#' \code{read_data} extends the possibilities of \code{\link{read.table}} so it
#' can also read units of measurement. However, it uses default arguments of
#' \code{\link{read.csv}} to accomodate loading of data for the most common
#' input type. \code{read_data} also sets useful defaults common for eddy
#' covariance (\emph{eddy}) data. Missing values are often reported as
#' \code{"-9999.0"} or \code{"-9999"} by post-processing software, therefore
#' \code{na.strings = c("NA", "-9999.0", "-9999")} is used as default.
#'
#' Attribute \code{varnames} contains original variable name without automated
#' corrections/simplifications. This provides control over conversion of
#' original column names and keeps variable names of vectors when they are
#' separated from the original data frame.
#'
#' Units are expected to be one line below the header in the input file. Instead
#' of units of measurement, it is possible to include any space efficient
#' metadata that is relevant to the respective variables. E.g. format of
#' timestamp or structure of coded variable. One line below units and further in
#' the input file is the region with data. Any missing values or blank fields
#' (converted to empty strings) in the line interpreted as units will be
#' substituted by \code{units_fill} string instead.
#'
#' The automated check for \code{"-10000"} values in the data region is provided
#' by \code{check_input = TRUE} (default) and produces error message if the
#' value is found. The \code{"-10000"} values can be introduced to the dataset
#' by rounding \code{"-9999"} values due to the incorrect file conversion or
#' data manipulation. Using \code{check_input = FALSE} will skip the check
#' (this could improve performance for large input files).
#'
#' Adapted from: openeddy - read_eddy
#'
#' @return A data frame is produced with additional attributes \code{varnames}
#' and \code{units} assigned to each respective column.
#'
#' @param file The file name with input data to be read. It can be a file name
#' inside the current working directory, \emph{relative} or \emph{absolute}
#' path or \code{\link{connection}}. See \code{\link{read.table}} for more
#' detailed description. Connections to anonymous file or clipboard are not
#' allowed. To read from clipboard use \code{"clipboard"} string instead of
#' connection.
#' @param header A logical value indicating whether the names of variables are
#' included as the first line of the input file. If \code{FALSE}, column names
#' and variable names of attribute \code{varnames} will be automatically
#' generated.
#' @param units A logical value indicating whether the units for respective
#' variables are included one line above the data region in the input file. If
#' \code{FALSE}, the \code{units} attribute of each column will be set to
#' \code{units_fill} string representing missing values.
#' @param sep A character that separates the fields of input. Default separator
#' for CSV files is \code{","}. See \code{\link{read.table}} for other
#' options.
#' @param dec
#' @param quote A character string that contains the quoting characters.
#' @param units_fill A character string that represents missing value of
#' \code{units} attribute.
#' @param nrows An integer specifying the maximum number of rows to read in.
#' Negative and other invalid values are ignored.
#' @param skip An integer. The number of lines to skip in the input file before
#' reading data.
#' @param na.strings A character vector of strings representing \code{NA} values
#' in the input file. Blank fields are also considered to be missing values in
#' logical, integer, numeric and complex fields.
#' @param fill
#' @param check_input A logical value that determines if values in the input
#' will be checked for erroneous \code{"-10000"} value. If \code{TRUE}
#' (default), any encountered \code{"-10000"} value in the data will trigger
#' an error message.
#' @param correct A logical value that determines if units and varnames should
#' undergo standard formatting corrections. Defaults to \code{TRUE} and prints
#' a message if names have been properly corrected.
#' @param comment.char A character that is interpreted as comment or empty
#' string to turn off this behaviour.
#' @param col_classes
#' @param tz
#' @param ... Further arguments passed to \code{read.table}
#'
#' @export
read_data <- function(file, header = TRUE, units = TRUE, sep = ",", dec = ".",
quote = "\"", units_fill = "-", nrows = -1, skip = 0,
na.strings = c("NA", "-9999.0", "-9999"), fill = TRUE,
check_input = TRUE, correct = TRUE, comment.char = "",
col_classes = NA, tz = metadata$tz_name, ...) {
if (!missing(file) && inherits(file, "connection")) {
if (summary(file)$description == "") {
stop("Connection to anonymous file is not allowed.")
}
if (summary(file)$description == "clipboard") {
stop("Connection to clipboard is not allowed; use 'clipboard' string.")
}
}
read_header <- header
units_fill <- as.character(units_fill)
check_input(units_fill, "length_1")
var_units <- read.table(
file,
header = read_header, sep = sep,
quote = quote, dec = dec, na.strings = na.strings,
colClasses = "character", nrows = 1, skip = skip,
fill = fill, comment.char = comment.char, ...
)
if (header) {
orig_varnames <- read.table(
file,
header = FALSE, sep = sep, quote = quote, dec = dec,
na.strings = na.strings, colClasses = "character", nrows = 1,
skip = skip, fill = fill, comment.char = comment.char, ...
)
}
if (correct) {
var_units <- var_units %>% correct("units")
unit_names <- orig_varnames %>% unlist(use.names = FALSE) %>% correct()
names(var_units) <- unit_names %>% make.unique(sep = "_")
message(
"Units and varnames re-formatted using default corrections. ",
"See documentation of 'correct()' for details."
)
}
if (header && units) {
read_header <- FALSE
skip <- skip + 2
} else if (header | (!header && !units)) {
var_units[] <- units_fill
} else {
skip <- 1 + skip
}
data <- read.table(
file,
header = read_header, sep = sep, quote = quote,
dec = dec, na.strings = na.strings,
colClasses = col_classes, nrows = nrows, skip = skip,
fill = fill, comment.char = comment.char, ...
)
if (header && units) colnames(data) <- names(var_units)
# Create timestamp from date and time columns if none exists
orders <- c("Ymd HM", "mdy HM", "Ymd HMS", "mdy HMS")
if (!"timestamp" %in% names(data)) {
data <- data %>%
dplyr::mutate(
timestamp = lubridate::parse_date_time(paste(date, time), orders, tz)
) %>%
dplyr::select(timestamp, dplyr::everything())
orig_varnames <- c("timestamp", orig_varnames)
var_units <- c("-", var_units)
} else {
data <- data %>%
dplyr::mutate(
timestamp = lubridate::parse_date_time(timestamp, orders, tz)
) %>%
dplyr::select(timestamp, dplyr::everything())
}
if (units) var_units[var_units %in% c("", NA)] <- units_fill
#browser()
for (i in seq_len(ncol(data))) {
#varnames(data[, i]) <- if (header) orig_varnames[[i]] else colnames(data)[i]
#units(data[, i]) <- var_units[i]
varnames(data[i]) <- if (header) orig_varnames[[i]] else colnames(data)[i]
units(data[i]) <- var_units[[i]]
}
# Recode dates and times from timestamp
data <- data %>%
dplyr::mutate(
date = lubridate::date(timestamp),
time = format(timestamp, "%H:%M")
)
if (check_input) {
check <- as.vector(data == -10000)
if (any(check[!is.na(check)])) {
message("Missing data incorrectly replaced by -10000 in the input.")
}
}
if (!is(data$timestamp, "POSIXct") | anyNA(data$timestamp)) {
warning(
"Timestamp could not parse to POSIXct. ",
"Check the format and convert without re-reading using ",
"'strptime_eddy()'."
)
}
return(data)
}
## =============================================================================
#' Combine Multiple EddyPro Output Files into One Dataset
#'
#' Takes files created by different EddyPro processing sessions and merges into
#' one dataset without dropping any columns.
#'
#' @return A data frame with columns bound by name each with the attributes
#' 'units' and 'varnames'.
#'
#' @param ... Zero or more objects of class data.frame that have been read using
#' \code{\link{read_data}}.
#'
#' @export
combine_data <- function(..., fill = "-") {
inputs <- rlang::list2(...)
# Store full units and varnames attribute since they will be dropped
units <- list()
varnames <- list()
for (i in seq_along(inputs)) {
units[[i]] <- tidyflux::units(inputs[[i]], names = TRUE)
varnames[[i]] <- varnames(inputs[[i]], names = TRUE)
}
names(units) <- rlang::ensyms(...)
units_full <- units[[1]]
names(varnames) <- rlang::ensyms(...)
varnames_full <- varnames[[1]]
for (i in 2:length(units)) {
units_full <- suppressWarnings(dplyr::bind_rows(units_full, units[[i]]))
varnames_full <- suppressWarnings(
dplyr::bind_rows(varnames_full, varnames[[i]])
)
}
# Replace NA units/varnames with "-"
units_full <- units_full %>% replace(is.na(.), fill)
varnames_full <- varnames_full %>% replace(is.na(.), fill)
# Test for equality among input data frames in units/varnames of each variable
for (i in seq_along(units_full)) {
if (!length(unique(units_full[, i])) == 1) {
warning(
"Units of variable ", names(units_full[i]),
" are not identical across all input data frames."
)
}
if (!length(unique(varnames_full[, i])) == 1) {
warning(
"Varnames of variable ", names(varnames_full[i]),
" are not identical across all input data frames."
)
}
}
units_full <- units_full %>% dplyr::slice(1) %>% unlist()
varnames_full <- varnames_full %>% dplyr::slice(1) %>% unlist()
#browser()
# Bind all data frames together
out <- suppressWarnings(dplyr::bind_rows(...))
# Define helper function for getting a count of duplicate timestamps
dupes <- function(data) {
data %>%
dplyr::add_count(timestamp) %>%
dplyr::ungroup() %>%
dplyr::summarize(n = sum(n > 1, na.rm = TRUE)) %>%
dplyr::pull(n)
}
# If there are duplicate timestamps, choose which ones to keep
if (dupes(out) > 0) {
# First remove records that explicitly do not contain data
if ("file_records" %in% names(out)) {
out <- out %>%
dplyr::filter(!is.na(file_records))
} else if ("vin_1" %in% names(out)) {
out <- out %>%
dplyr::filter(!is.na(vin_1))
}
# For any remaining duplicates, keep the one with the most non-NA data
if (dupes(out) > 0) {
out <- out %>%
dplyr::mutate(n_na = rowSums(is.na(.))) %>%
dplyr::arrange(n_na) %>%
dplyr::group_by(timestamp) %>%
dplyr::slice(1) %>%
dplyr::select(-n_na) %>%
dplyr::ungroup()
if (dupes(out) > 0) {
# Ensure no duplicates are left
dupes_left <- out %>%
dplyr::add_count(timestamp) %>%
dplyr::filter(n > 1) %>%
dplyr::distinct(timestamp) %>%
dplyr::pull(timestamp) %>%
dplyr::ungroup()
out <- out %>%
dplyr::distinct(timestamp, .keep_all = TRUE)
# Produce warning since this arbitrarily chooses which records to keep
warning(
"Duplicate records with timestamp ", dupes_left,
"could not be removed using logical methods. ",
"Kept records were chosen arbitrarily."
)
}
}
}
# Assign units and varnames back to the combined data frame
for (i in seq_len(ncol(out))) {
varnames(out[[i]]) <- varnames_full[[i]]
units(out[[i]]) <- units_full[[i]]
}
#tidyflux::units(full) <- units_full
#varnames(full) <- varnames_full
# Check if timestamp forms a regular sequence
#check_times(full$timestamp)
out
}
## =============================================================================
#' Export Eddy Covariance Data
#'
#' Facilitates printing object \code{x} also with its units of measurement (or
#' space efficient metadata) to a file or \code{\link{connection}}.
#'
#' \code{write_data} extends the possibilities of \code{write.table} so the
#' units of measurement can also be written. However, it uses default arguments
#' of \code{write.csv} to provide flexibility for the user and to accomodate for
#' the most common case. The character string \code{"-9999"} is typically used
#' to represent missing values in eddy covariance data.
#'
#' Storing \code{varnames} and \code{units} attributes is practical mostly
#' within data frames and vectors. Attribute \code{varnames} extracted from each
#' data frame column represents names of respective variables and its main
#' purpose is to keep variable names of isolated vectors. Attribute \code{units}
#' extracted from each column represents units of measurement (or space
#' efficient metadata) of respective variables that are written one line above
#' data region. If the \code{varnames} or \code{units} attribute of given column
#' is \code{NULL}, of length not equal to 1, or contains missing value or empty
#' string, it is not considered meaningful. In that case the default column name
#' produced by \code{\link{as.data.frame}} is used instead (considered only if
#' \code{x} is supplied as vector) and unit of measurement is substituted with
#' \code{units_fill} string. \code{units_fill} can be an empty string.
#'
#' Units of measurement are considered to be part of the output header and
#' therefore \code{col.names} and \code{quote} arguments have the effect on the
#' way they are written.
#'
#' Adapted from: openeddy - write_eddy
#'
#' @param x A data frame to be written with optional attributes \code{units} and
#' \code{varnames} assigned to each column. If not a data frame it is
#' converted by \code{\link{as.data.frame}}.
#' @param file A character string naming a file to write or a
#' \code{\link{connection}} that is open for writing. \code{""} results in
#' writing to the console.
#' @param append A logical value, used only if \code{file} is not a
#' \code{connection}. If \code{TRUE}, output is written below the content of
#' the file. If \code{FALSE}, the content of the file is overwritten.
#' @param quote A logical value (\code{TRUE} or \code{FALSE}) or a numeric
#' vector. If \code{TRUE}, columns of class character or factor will be
#' surrounded by double quotes. If a numeric vector, its elements should mark
#' the indices of character or factor columns to quote. In both cases, row and
#' column names and units are quoted if present. If \code{FALSE}, no quoting
#' is performed.
#' @param sep A character used as the field separator of each row.
#' @param units_fill A character string representing missing values of
#' \code{units} attribute in the output.
#' @param na A character string representing missing data values in the output.
#' @param col.names Either a logical value (\code{TRUE}, \code{FALSE} or
#' \code{NA}) or a character vector. If \code{TRUE}, column names of \code{x}
#' will be included in the output. If a character vector, its elements will be
#' used as column names. If \code{x} is supplied as vector, an attempt is made
#' to extract meaningful variable name from its attribute \code{varnames}.
#' @param ... Further arguments passed to \code{\link{write.table}}.
#'
#' @export
write_data <- function(x, file = "", append = FALSE, quote = TRUE, sep = ",",
include_units = TRUE, fill = "-", na = "-9999",
col.names = TRUE, use_varnames = FALSE, ...) {
check_input(x, "data_frame")
if (isTRUE(col.names) | is.na(col.names) | is.character(col.names)) {
if (include_units) {
fill <- as.character(fill)
check_input(fill, "length_1")
units <- lapply(x, attr, "units")
units <- lapply(units, function(x) if (
is.null(x) || x %in% c("", NA) || (length(x) != 1)) {
fill
} else {
as.character(x)
})
header <- as.data.frame(units, stringsAsFactors = FALSE)
} else header <- x
if (use_varnames) {
if (any(varnames(x) %in% c("-", "--"))) {
nonames <- which(varnames(x) %in% c("-", "--"))
varnames(x)[nonames] <- names(x)[nonames]
}
names(header) <- varnames(x)
}
q_header <- if_else(isTRUE(quote) | is.numeric(quote), TRUE, quote)
if (append) warning("Appending column names to file.")
write.table(
header, file, append = append, quote = q_header, sep = sep, na = na,
row.names = FALSE, col.names = col.names, qmethod = "double", ...
)
append <- TRUE
}
if (include_units) {
write.table(
x, file, append = append, quote = quote, sep = sep, na = na,
row.names = FALSE, col.names = FALSE, qmethod = "double", ...
)
}
}
## =============================================================================
#' Apply Storage Flux Correction
#'
#' Correction of matter or energy flux (\code{flux}) with storage computed using
#' discrete (one point) approach (\code{st}) or profile measurement of CO2
#' concentration (\code{stp}).
#'
#' If both storage estimates are available, \code{stp} takes priority. If both
#' \code{st} and \code{stp} estimates are \code{NA}, original flux value is
#' kept. \code{flux}, \code{st} and \code{stp} (if not NULL) must have the same
#' length.
#'
#' Adapted from: openeddy - add_st
#'
#' @return A vector with attributes \code{varnames} and \code{units} is
#' produced. \code{varnames} value is set by \code{name_out} argument.
#' \code{units} value is extracted from \code{flux} vector by
#' \code{\link{units}} or set to default \code{"-"}.
#'
#' @param flux A numeric vector with flux values.
#' @param st A numeric vector with storage computed using discrete (one point)
#' approach.
#' @param stp A numeric vector with storage computed using profile measurement
#' of gas concentration.
#'
#' @export
add_storage <- function(x, st, stp = NULL) {
check_input(x, "numeric")
check_input(st, "numeric")
if (length(x) != length(st)) {
stop("'x' and 'st' must be of the same length.")
}
if (!is.null(stp)) {
if (length(x) != length(stp)) {
stop("'stp' must be of the same length as 'x'.", call. = FALSE)
}
check_input(stp, "numeric")
}
out <- x
out[!is.na(st)] <- x[!is.na(st)] + st[!is.na(st)]
if (!is.null(stp)) {
out[!is.na(stp)] <- x[!is.na(stp)] + stp[!is.na(stp)]
}
varnames(out) <- paste0(
"add_storage(", attr(x, "varnames"), ", ", attr(st, "varnames"), ")"
)
tidyflux::units(out) <- tidyflux::units(x)
return(out)
}
## =============================================================================
#' Expand Vector to Full Year
#'
#' Generates a vector with half-hourly time steps of full year, stamped in the
#' middle of time unit.
#' Adapted from: REddyProc - fExpandToFullYear
#'
#' @param x A data vector to be expanded.
#' @param timestamp The data timestamp in POSIX time format.
#' @param year Year (e.g. to plot).
#' @param DTS The number of daily time steps (i.e. 24 or 48).
#' @param tz Time zone used, advised to keep default.
#'
#' @details
#' Function to expand vectors to full year, e.g. to plot in correct time format.
#'
#' @return
#' The expanded time and data vector as a data frame.
#'
expand_year <- function(x, timestamp, year, dts, tz = get_tz(timestamp)) {
year_exp <- as.numeric(format(timestamp, "%Y"))
# Check if year within time span of data set
if (sum(year == year_exp) == 0) {
stop("Year ", year, " not within time span of this dataset.")
}
full_year <- create_timesteps(year, dts, tz = tz)
time_year <- timestamp[(year == year_exp)]
data_year <- x[(year == year_exp)]
if (sum(!is.na(data_year)) == 0) {
out <- data.frame(dplyr::bind_cols(
timestamp = full_year,
data = rep(NA, length(full_year))
))
warning(
"Variable '", attr(x, "varnames"), "' contains no data for year ",
year, "."
)
} else if (length(time_year != length(full_year))) {
out <- dplyr::inner_join(
dplyr::bind_cols(timestamp = full_year),
dplyr::bind_cols(timestamp = time_year, data = data_year),
by = "timestamp", all = TRUE, sort = TRUE
)
} else {
out <- data.frame(dplyr::bind_cols(timestamp = time_year, data = x))
}
out$timestamp <- .POSIXct(out$timestamp, tz = tz)
attr(out$data, "varnames") <- attr(x, "varnames")
attr(out$data, "units") <- attr(x, "units")
return(out)
}
## =============================================================================
#' Generate Year-long Half-hourly Time Step Vector
#'
#' Generate vector with half-hourly time steps of full year, stamped in the
#' center of time unit.
#' Adapted from: REddyProc - fFullYearTimeSteps
#'
#' @param data A data frame to be converted.
#' @param DTS The number of daily time steps (24 or 48).
#' @param tz The time zone used to store data (advised to keep GMT to avoid
#' daytime shifting issues).
#'
#' @return
#' A vector with time steps of full year in POSIX format.
#'
create_timesteps <- function(year, dts = 48, tz = "GMT", shift_by = 720 / dts) {
if (!dts %in% c(24, 48)) {
stop("Only implemented for 24 or 48 daily time steps, not ", dts, ".")
}
format <- "%Y-%m-%d-%H-%M"
start <- paste(year, 1, 1, 0, shift_by, sep = "-")
end <- paste(year + 1, 1, 1, 0, 30 - shift_by, sep = "-")
# timestamp vector with half-hourly timestamps
out <- seq(
strptime(start, format, tz),
strptime(end, format, tz),
(24 / dts * 60 * 60)
)
return(out)
}
## =============================================================================
#' Set File Name with Path
#'
#' Set file name with path and check if directory and/or file exists.
#' Adapted from: REddyProc - fSetFile
#'
#' @param file_name File name as a string.
#' @param dir Directory as a string.
#' @param IO Input / output flag, TRUE for input, FALSE for output.
#'
#' @return
#' The name of the file with its complete path.
#'
set_file <- function(file_name, dir, IO) {
str <- check_value(dir, "str")
# Check if directory exists
if (IO && str && (file.access(dir, mode = 4) != 0)) {
stop("Directory does not exist: ", dir)
}
# Make directory if mode is output
if (!IO && str && (file.access(dir, mode = 0) != 0)) {
dir.create(dir)
message("Directory created: ", dir)
if (file.access(dir, mode = 2) != 0) {
stop("Directory could not be created: ", dir)
}
}
# Set file name accordingly
file <- if (str) file.path(dir, file_name) else file_name
# If input file, check if file exists
if (IO && (file.access(file, mode = 4) != 0)) {
stop("File does not exist or has no read permission: ", file)
}
return(file)
}
## =============================================================================
#' Create dataset with daily means/sums
#'
#' @param data
#' @param timestamp
#' @param mean_vars
#' @param sum_vars
#' @param omit_vars
#'
#' @return
#' @export
#'
#' @examples
summarize_daily <- function(data, timestamp = timestamp, mean_vars = NULL,
sum_vars = NULL, omit_vars = NULL) {
timestamp <- rlang::enquo(timestamp)
if (!missing(mean_vars)) mean_vars <- rlang::enquo(mean_vars)
if (!missing(sum_vars)) sum_vars <- rlang::enquo(sum_vars)
if (!missing(omit_vars)) omit_vars <- rlang::enquo(omit_vars)
all_units <- tidyflux::units(data, "names")
data <- data %>%
dplyr::mutate(
!!rlang::as_name(timestamp) := !!timestamp - 900,
date = lubridate::date(!!timestamp)
) %>%
dplyr::select(-!!timestamp) %>%
{if (!is.null(omit_vars)) {
dplyr::select(., -!!omit_vars)
} else .}
means <- data %>%
{if (!is.null(mean_vars)) {
dplyr::select(., date, !!mean_vars)
} else .} %>%
{if (!is.null(sum_vars)) {
dplyr::select(., -!!sum_vars)
} else .} %>%
dplyr::group_by(date) %>%
dplyr::summarize_all(list(mean), na.rm = TRUE)
sums <- data %>%
{if (!is.null(sum_vars)) {
dplyr::select(., date, !!sum_vars)
} else .} %>%
dplyr::group_by(date) %>%
dplyr::summarize_all(list(sum), na.rm = TRUE)
#tidyflux::units(sums) <- sums_units
#varnames(sums) <- paste0("sum(", names(sums), ")")
dplyr::full_join(means, sums)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.