#' @title Flux data import function. Also has options to pre-process data inputs.
#'
#' @description
#'
#' Imports flux data from multiple files, processes according to optional input
#' parameters, and ouputs an R object according to additional optional
#' parameters.
#'
#' See 'details' for detailed notes on parameter use, supported file and
#' datatypes, and function operation.
#'
#' @param files Character vector of filenames.
#' @param time_format Format of timestamp. See 'details'.
#' @param file_extensions Format of imported data file. See 'details'.
#' @param data_structures Data format. See 'details'.
#' @param minimum_interval Minimum file length, in minutes, to import.
#' @param verbose Prints extra debugging info.
#'
#' @details
#'
#' \code{files}:
#'
#' A list of filenames, in the current working directory. Timestamp vector
#' must be present in each file, and must be the only non-numeric
#' column.
#'
#' The default, 'all', will search the current working directory
#' for all files that have supported file extensions
#' (.txt, .csv, and .dat) and import all appropriate files. While useful
#' for bulk file input, it's recommended to list the files out
#' individually if there are not too many to help sanitize data inputs.
#'
#' \code{datatypes}:
#'
#' Type of sapflux data, one per file. If length 1, is recycled
#' for each file in the input list. Otherwise, its length must
#' match the 'files' length exactly, and it is assumed that datatypes
#' are ordered to match the input file list as it would appear to
#' list.files().
#'
#' \code{time_format}:
#' Same as 'datatypes', but for timestamp format to be fed to
#' 'strptime()' and 'as.POSIXct()'.
#'
#' \code{file_extensions}:
#' Works the same as time_format, but for imported file extensions.
#'
#' \code{dat_structures}:
#' Same as time_format and datatypes, but for more specific datatypes. Not
#' all 'dat_structures' values work for all 'file_extensions', if the order
#' is off or you've provided a bad input it will return an error. Value
#' doesn't matter for .csv files.
#'
#' Currently accepted data structures: "campbell" and "tab_delimited".
#'
#' Additional notes:
#'
#' Campbell CR1000 files have additional metadata associated with them
#' along with the data of interest. Usually, this metadata isn't
#' especially relevant. FluxImport will discard additional
#' metadata and return only a matrix of data values and associated
#' timestamps.
#'
#' @export
#' @family preprocess
#'
#' @examples
#' # If logger applied a multiplier for you, use 'datatype = 'temperatures''
#' ImportRawFlux(files = c(myfil1.dat, myfile2.dat),
#' dat_structures = "campbell", datatypes = "temperatures")
ImportRawFlux <- function(
files = "all",
datatypes = "voltages",
time_formats = "%Y-%m-%d %H:%M:%S",
time_columns = 1,
data_structures = "CampbellSci",
minimum_interval = 120,
verbose = FALSE
) {
# Variable aliases:
dt <- datatypes
tf <- time_formats
tc <- time_columns
ds <- data_structures
mi <- minimum_interval
vb <- verbose
# Input checks
stopifnot(
class(files) == 'character' && is.vector(files),
class(tf) == 'character' && length(tf) == 1,
class(ds) == 'character',
class(vb) == 'logical' && length(vb) == 1
)
# Find files to work over:
message('Checking for file list in current working directory...')
if (length(files) == 1 && files == 'all') {
message('Importing all files in current working directory...')
# The regular expression keeps breaking. One of these definitions for
# aft is working, depending on the day.
#aft <- "\\.txt$ | \\.dat$ | \\.csv$"
aft <- list('.txt', '.csv', '.dat')
files <- list.files()[unlist(lapply(aft, function(x) grep(x, list.files())))]
}
if (length(files) < 1) stop('Could not find files.')
# Replicate input variable length if == 1, then check for correct length.
for (i in c('dt', 'tf', 'tc', 'ds')) { # Vars are shorthand, check aliases
if (length(get(i)) == 1) {
assign(i, rep(get(i), length(files)))
stopifnot(length(get(i)) == length(files))
}
}
# Import files:
raw_data <- list()
for (j in 1:length(files)) {
# Read in data:
fl <- files[[j]]
message(paste('Read:', fl))
argums <- c(list(file = fl), SetFileInfo(ds[j]))
header <- SetHeaderInfo(fl, ds[j])(fl)
jdat <- do.call(read.table, argums)
if (vb) {
cat('j_data head:\n')
print(head(jdat))
}
# Check for time column:
# 'read.table' can't handle full columns of NaN, will import as character.
# This
if (class(jdat[, tc[j]]) != 'character') stop('Cant find time column.')
if (table(sapply(jdat, class))['character'] > 1) {
jdat[, -tc[j]] <- lapply(jdat[, -tc[j]], as.numeric)
jdat[, -tc[j]] <- lapply(jdat[, -tc[j]], function(x) ifelse(is.nan(x), NA, x))
# Sometimes this breaks/doesn't work not sure why, stop() here is QC.
if (table(sapply(jdat, class))['character'] > 1) {
stop(cat('Too many character columns in', fl))
}
}
# Create a timestamp vector. ####
# tryCatch() is used here to halt on warnings thrown by
# striptime(), avoiding cryptic or silent data import errors.
jt <- .POSIXct(list())
tryCatch(expr = {
jt <- as.POSIXct(strptime(jdat[, tc[j]], format = tf[j]))
}, warning = function(cond) {
fail <- cond
invisible()
}, error = function(cond) {
fail <- cond
invisible()
}, finally = {
if (exists('fail')) {
print(fail)
stop('Failed to format time correctly, see warning')
} else {
# Check time interval
ji <- abs(as.numeric(difftime(
time1 = jt[1], time2 = jt[length(jt)], units = 'mins'
)))
if (vb) cat('time interval:\n', ji, jt[1], jt[length(jt)], '\n')
if (ji < mi) {
cat(fl, 'is insufficient length, skipped')
next
}
jdat[, tc[j]] <- jt
}
})
# Move time column to column 1:
jdat <- data.frame(jdat[, tc[j]], jdat[, -tc[j]])
colnames(jdat)[1] <- 'time'
jdat <- jdat[order(jdat[, 1]), ]
raw_data <- c(raw_data, list(jdat))
}
# Wrap-up and return. ####
flux <- new(Class = 'flux')
flux@source_files <- files
flux@datatype <- dt
flux@raw_data <- raw_data
flux@time_id <- tc
message('Done importing!')
log_message <- paste('Imported', length(files), 'files.')
flux@log <- c(flux@log, log_message)
return(flux)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.