#' Prints summary data and plots small segments of sapflux or VWC data.
#'
#' @description
#' This function rapidly reads files and provides information that assists
#' in sensor troubleshooting. Data that is out-of-range for what is reasonable
#' for the measurement is removed, and tallies are made of all columns in the
#' data that have no viable data at all. Plots are printed for further visual
#' examination - some non-viable data will be left within the range of normal
#' data, even if sensors are totally broken.
#'
#' @param file Filename, as a character.
#' @param filetype Type of file imported. Can be "csv, "tab-delim",
#' or "campbell".
#' @param datatype Sapflux or VWC data?
#' @param time.format Format of the timestamp column (column 1!)
#' @param timeframe How much of the most recent data to assess? In days.
#'
#' @details
#'
#' Ask B. McNellis for further details on this.
#'
#' @export
#' @examples
#' test <- CheckSensors(file = "PPINE_03_PSI_650.dat", datatype = "VWC")
CheckSensors <- function(file, filetype = "campbell", datatype = "sapflux",
time.format = "%Y-%m-%d %H:%M:%S",
timeframe = 14) {
# Input validity checks
cat("\n")
cat("Input file:", file, "\n")
cat("Data type:", datatype, "\n")
stopifnot(
length(file) == 1,
class(file) == "character",
filetype %in% c("campbell", "csv", "tab.delim"),
class(time.format) == "character",
length(time.format) == 1,
datatype %in% c("sapflux", "VWC"),
class(timeframe) == "numeric",
length(timeframe) == 1
)
# Read in file, based on filetype. Copy/pasted from ImportRawFlux.R
i <- 1
file <- list(file)
if (filetype[i] == "campbell") {
row.skip <- 1
file.sep <- ","
file.header <- FALSE
file.row.names <- 2
header <- read.table(
file = file[[i]], header = FALSE, sep = ",",
nrow = 4, fill = TRUE, quote = "", colClasses = "character"
)
header <- apply(header, 2, function(x) {
gsub(pattern = '"', replacement = "", x = x)
})
}
if (filetype[i] == "csv") {
row.skip <- 0
file.sep <- ","
file.header <- TRUE
file.row.names <- NULL
}
if (filetype[i] == "tab.delim") {
row.skip <- 0
file.sep <- "\t"
file.header <- TRUE
file.row.names <- FALSE
}
data.check <- read.table(
file[[i]], sep = file.sep,
header = file.header, skip = row.skip,
#row.names = file.row.names,
na.strings = c(NA, NaN, "", " ", " ", " "),
stringsAsFactors = FALSE
)
# Modified the row skipping for the campbell files in order to grab the
# column names from the header. It's 4 for the sapflux function, 1 here...
if (filetype == "campbell") {
cols <- data.check[1, ]
data.check <- data.check[-c(1:3), -2]
cols <- cols[, 3:length(cols)]
stopifnot(length(cols) == (ncol(data.check) - 1))
}
if (class(data.check[, 1]) != "character") {
stop("First column needs to be timestamp")
}
if (table(sapply(data.check, class))["character"] > 1) {
sub <- lapply(data.check[, 2:ncol(data.check)], as.numeric)
sub <- lapply(sub, function(x) {
ifelse(is.nan(x), NA, x)
})
data.check <- data.frame(data.check[, 1], sub, stringsAsFactors = FALSE)
# Keep the stop in case the above process breaks for some reason.
#stop(cat("Too many character columns in", file))
}
# Change time vector class
time <- strptime(data.check[, 1], format = time.format)
time <- as.POSIXct(time)
if (length(time) < 1000) {
indx <- length(time) - 1
} else {
indx <- 1000
}
intv.range <- vector(mode = "numeric", length = indx)
for (i in 1:indx) {
intv.i <- difftime(time[i + 1], time[i], units = "secs")
intv.i <- as.numeric(intv.i)
intv.range[i] <- intv.i
}
intv <- min(intv.range)
total.intv <- difftime(time[length(time)], time[1])
file.length <- round(as.numeric(total.intv), digits = 2)
cat("Total file length:", file.length, units(total.intv), "\n")
if (file.length < 10) {
return(message("Not enough data to report on!"))
}
nrec <- (86400 * timeframe) / intv
if (nrec > length(time)) {
nrec <- length(time) - 1
}
time <- time[(length(time) - nrec):length(time)]
data.check <- data.check[, 2:ncol(data.check)]
data.check <- data.check[(nrow(data.check) - nrec):nrow(data.check), ]
data.check <- abs(as.matrix(data.check))
# Kill all the out-of-bounds data, depending on datatype
if (datatype == "sapflux") {
message("Data bounds on sapflux assumed to be in volts, not degrees C.")
upper <- 2
lower <- 0
ylab <- "Voltages (V)"
}
if (datatype == "VWC") {
message("VWC checking requires 'VWC' in data column names.")
vwcs <- grep("VWC", cols)
cat("Dropped", ncol(data.check) - length(vwcs), "columns.\n")
data.check <- data.check[, vwcs]
upper <- 1
lower <- 0
ylab <- "VWC, l H20/l soil"
}
data.check <- ifelse(data.check > upper, NA, data.check)
data.check <- ifelse(data.check < lower, NA, data.check)
empty <- apply(data.check, 2, function(x) {
table(is.na(x))["TRUE"] == nrow(data.check)
})
empty <- which(empty == "TRUE")
viabl <- ncol(data.check) - length(empty)
cat("Viable columns:", viabl, "\n")
if (viabl < 1) {
return(message("No data to report on for this file!"))
}
cat("Columns", empty, "have no viable data.\n")
not.empty <- apply(data.check, 2, function(x) {
round(table(is.na(x))["FALSE"] / length(x), digits = 2)
})
cat("Fraction of each column with data:\n", as.numeric(not.empty), "\n")
for (i in 1:ncol(data.check)) {
empty <- table(is.na(data.check[, i]))
if (!is.na(empty["TRUE"])) {
if (as.numeric(empty["TRUE"]) == nrow(data.check)) {
next
}
}
plot(time, data.check[, i],
ylab = ylab, xlab = "Time",
main = paste(file, "\nColumn:", i))
}
return(data.frame(time, data.check))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.