#' @title Binding function for post-import raw flux data
#'
#' @description
#'
#' Sapflow data needs much pre-processing before primary analysis
#' and scaling - this function binds raw data files together.
#'
#' @param flux Input data of class "flux"
#' @param drop_tolerance Tolerance, as %, for dropping data that
#' has no associated time vector.
#'
#' @return Returns the modified 'flux' object back.
#'
#' @export
#' @family preprocess
#' @examples
#' flux <- FluxProcess(flux = mydata)
BindRawFlux <- function(flux, drop_tolerance = 1) {
# Input validation & defaults ####
if (length(flux@metadata) < 1) stop('Need metadata for binding.')
validObject(flux)
params <- LoadDefaults(flux = flux)
sf <- flux@source_files
meta <- flux@metadata
meta <- meta[which(meta$INCLUDE == 'TRUE'), ]
if (length(flux@data) > 0) message('Overwriting data slot.')
# Creating alises
tl <- drop_tolerance / 100
dt <- flux@datatype
# Check datatypes - must all be the same BEFORE merging!
if (length(unique(dt)) != 1) stop('Datatypes must be identical.')
dt <- dt[1]
dout <- data.frame()
tags <- unique(meta$BIND)
cf <- 0
for (h in 1:length(tags)) {
# Find the filename(s) associated with the data that will be bound.
if (sum(!is.na(tags)) < 1) {
fl <- sf
} else {
fl <- meta$FILE[which(meta$BIND == tags[h])]
#fl <- sf[grep(pattern = pat, x = sf)]
}
fl <- sf[which(sf %in% fl)]
if (length(fl) < 1) {
warning(cat('No files included for bind tag ', tags[h], '\n'))
next
}
# Advance the file list length counter.
cf <- cf + length(fl)
# Find the raw data associated with the current file name.
hdat <- flux@raw_data[which(sf == fl)]
# Create the time merge vector, in POSIXct:
tmat <- matrix(data = NA, ncol = 1, nrow = 0)
tmat <- data.frame(tmat, stringsAsFactors = F)
# Find common time vector for all raw files in the bind pool:
for (i in hdat) {
ti <- i[, 1]
tmat <- merge(x = tmat, y = ti, by = 1, all = T, sort = T)
}
# Clean up the common time vector and convert to POSIXct:
tmat <- na.omit(unlist(tmat, use.names = F))
tmat <- data.frame(as.POSIXct(tmat, origin = '1970-01-01 00:00:00'))
# Preallocate binding variables:
message(paste('Binding data... ', tags[h]))
hout <- data.frame()
# Set up NA counter:
np <- numeric()
tp <- numeric()
# Bind data / tally NA counters
# Will this loop work with columns of different names?
for (i in hdat) {
np <- append(np, sum(is.na(i)) - length(i))
tp <- append(tp, length(i))
browser()
hout <- plyr::rbind.fill(hout, i)
}
# Sum NA tallys
np <- sum(np, na.rm = T)
tp <- sum(tp, na.rm = T)
# Duplicated rows wreck things later. This isn't an ideal solution:
dup <- hout$time[which(duplicated(hout$time) == T)]
if (length(dup) > 0) hout <- hout[-which(hout$time %in% dup), ]
if (sum(is.na(hout$time)) > 0) stop('NAs in time vec, needs debug')
#
tmat <- merge(x = tmat, y = hout, by = 1, all = T, sort = T)
colnames(tmat)[1] <- 'time'
# Fix column names. Won't work if the filenames from the fl vector
# have column names that aren't identical.
cn <- meta[which(meta['FILE'] == fl[1]), 'COLUMN_NAME']
for (i in 2:length(fl)) {
cn_t <- meta['COLUMN_NAME'][which(meta['FILE'] == fl[i])]
if (cn_t != cn) stop('Column names on tagged files need to be identical')
}
### USE METADATA TO CHECK COLUMN NAMES AGAINST TREATMENTS!
### This will remove the need for the 'signals'
browser()
colnames(tmat) <- cn
if (ncol(dout) < 1) {
dout <- data.frame(tmat[, 1])
dout <- merge(x = dout, y = tmat, by = 1, all = T, sort = T)
colnames(dout) <- cn
} else {
dout <- merge(x = dout, y = tmat, by = 1, all = T, sort = T)
}
}
if (length(cf) < 1) stop('Failed to bind any files.')
message('Wrapup...')
fd <- abs(cf - length(sf))
if (fd > 0) warning(cat(fd, 'files counted twice during merging.\n'))
if (fd < 0) warning(cat(fd, 'files excluded by given tags.\n'))
# Pull the NAs out from time vector and drop associated data
tk <- nrow(dout)
dr <- which(is.na(dout[, 1]))
if (length(dr) > 0) dout <- dout[-which(is.na(dout[, 1])), ]
if (nrow(dout) < (1 - tl) * tk) stop('Lost >1% of data from NAs in time vec.')
time <- .POSIXct(integer())
time <- dout[, 1]
dout <- as.matrix(dout[, -1])
data_tags <- as.list(colnames(dout))
# Logging, clean-up and return ####
no <- length(dout) - sum(is.na(dout))
nc <- round(100 - (no / np * 100), 2)
tc <- round(100 - (nrow(dout) / tp * 100), 2)
flux@time <- time
flux@data <- dout
flux@data_tags <- data_tags
if (nc > 0) {
log_message <- paste(
'BindRawFlux cut', np - no, 'points, or', nc, '% of total.')
flux@log <- c(flux@log, log_message)
}
return(flux)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.