#' @title Flux metadata import/integrity check function
#'
#' 6/6/2018 CHANGES:
#'
#' I'm generalizing the tags.
#' Tree/plant ID is now 'SAMPLE'.
#' Sub-tree, i.e. 'stem' is now 'SUB_SAMPLE'.
#' Sub-subsample, i.e. 'probe' is now 'SUB_SAMPLE_REPLICATE'
#' 'Port' is also kind of confusing, replacing with 'COLUMN_NAME'
#' Getting rid of 'wire', that's just going to cause confusion.
#' 'Species', 'plot', and 'site' are being converted to 'TREATMENT_X',
#' where X is 1-5.
#'
#' dbh.large is now DBH_1, and dbh.small is DBH_2.
#'
#' date.removed is 'DATE_UNINSTALL'
#'
#' Next step: write a template generation function that pastes file names and column names,
#' or whatver, and spits out a relevant CSV.
#'
#'
#' @description
#'
#' Sapflux metadata is required to interpret the signals returned by the
#' datalogger during data collection, and the 'flux' class objects
#' require metadata to be formatted a particular way - this function serves
#' to check the integrity of the metadata relative to what's required by
#' the class, and also guides import/formatting from a filename or
#' dataframe present in the current working environment.
#'
#' @param metadata Filename of metadata OR data frame
#'
#' @param check_only Skip import and just perform a metadata integrity check.
#' Defaults to FALSE.
#'
#' @details
#'
#' The function checks for the following columns:
#'
#' \code{date.install} : Date the port began data collection.
#'
#' \code{date.removed} : Date the port stopped collecting data.
#'
#' \code{port.tag} : The datalogger port tag. Must be a unique value,
#' and values must be matched with a unique value
#' in the imported data.
#'
#' \code{stem.tag} : The ID for the stem associated with the probe.
#' Not required to be unique.
#'
#' \code{plant.tag} : The ID for the plant associated with the probe.
#' Not required to be unique.
#'
#' \code{diam.at.probe} : Stem diameter at probe placement.
#'
#' @family preprocess
#' @examples
#' MetaDataImport(metadata = foo.csv)
#' # in console, if successful: Finished metadata!
MetaDataImport <- function(metadata, check.only = FALSE,
time.format = "%m/%d/%Y",
diam = "circle", diam.fill = FALSE,
rewrite = FALSE) {
return.env <- new.env()
# Input integrity checks ####
stopifnot(
class(check.only) == "logical" & length(check.only) == 1,
class(metadata) %in% c("character", "data.frame")
)
# Resolve metadataname/data information ####
if (class(metadata) == "character") {
stopifnot(length(metadata) == 1)
if (metadata %in% list.files() == FALSE) {
stop("Can't find the file in the working directory")
}
file.extension <- unlist(strsplit(metadata, "\\."))
file.extension <- file.extension[length(file.extension)]
if (file.extension == "csv") {
metadata.import <- read.csv(metadata, stringsAsFactors = FALSE,
na.strings = c("NA", "", " ", "NaN"))
}
if (file.extension == "txt") {
metadata.import <- read.table(file = metadata, header = TRUE,
sep = "\t", stringsAsFactors = FALSE,
row.names = NULL)
}
} else {
stop("Metadata input must be a filename - dataframes to be
supported later")
}
stopifnot(
exists("metadata.import"),
class(metadata.import) == "data.frame"
)
orig.metadata <- metadata.import
assign(x = "metadata.export", value = metadata.import, envir = return.env)
# Check metadata integrity ####
catch.fail <- FALSE
metadata.import <- get(x = "metadata.export", envir = return.env)
# Special check for diameters:
if (!("diameter" %in% colnames(metadata.import))) {
if (diam == "circle") {
answ <- "no"
} else if (diam == "ellipse") {
answ <- "yes"
} else {
answ <- readline(prompt = "Using max/min diameter? ")
}
if (tolower(answ) %in% c("yes", "y")) {
stopifnot(
"dbh.large" %in% colnames(metadata.import),
"dbh.small" %in% colnames(metadata.import)
)
diam <- c("dbh.large", "dbh.small")
} else {
diam <- "diameter"
}
}
# Class checks
check.cols <- c("date.install", "date.removed", diam)
if (any(check.cols %in% colnames(metadata.import)) == FALSE) {
stop("You broke the metadata in a predictable way that isn't fixed yet.")
}
if (!("diameter" %in% check.cols)) {
check.cols <- c(check.cols, "diameter")
}
for (i in check.cols) {
if (!(i %in% colnames(metadata.import))) {
next
}
i.col <- metadata.import[[i]]
# Special case for diameter checks:
if (i %in% c("diameter", "dbh.large", "dbh.small")) {
NAcount <- sum(is.na(i.col))
i.col <- suppressWarnings(as.numeric(i.col))
if (sum(is.na(i.col)) > NAcount) {
stop("Numeric conversion dropped diameters - check inputs")
catch.fail <- TRUE
}
if (i %in% c("dbh.large", "dbh.small")) {
if ("diameter" %in% colnames(metadata.import)) {
stop("Multiple diameters - check inputs")
}
if (i == "dbh.large") {
diameter <- EllipseToCircle(
large = i.col, small = metadata.import[["dbh.small"]])
} else if (i == "dbh.small") {
diameter <- EllipseToCircle(
large = metadata.import[["dbh.large"]], small = i.col)
}
metadata.import <- cbind(metadata.import, diameter)
metadata.import <- metadata.import[, -which(
colnames(metadata.import) %in% c("dbh.large", "dbh.small")
)]
next
}
if (NAcount > 0) {
if (diam.fill == TRUE) {
answ <- "yes"
} else if (diam.fill == FALSE) {
answ <- "no"
} else {
answ <- readline("Missing diams - fill in with averages? ")
}
if (tolower(answ) %in% c("yes", "y")) {
NAdiam.count <- sum(is.na(metadata.import[["diameter"]]))
mean.diam <- mean(i.col, na.rm = TRUE)
metadata.import[["diameter"]] <- ifelse(
is.na(i.col), mean.diam, i.col
)
diam <- "diameter"
} else {
stop("Diameters incomplete - check inputs")
}
}
if (max(metadata.import$diameter, na.rm = TRUE) > 200) {
head(metadata.import$diameter)
answ <- readline(
prompt = "Diameters too large, should be in centimeters -
are they in millimeters? "
)
if (tolower(answ) %in% c("yes, y")) {
metadata.import$diameter <- metadata.import$diameter / 10
} else {
stop("Import failed - diameters too large")
}
}
next
} # end diameter special case
if (class(i.col) != "character") {
stop(paste("Column", i, "failed to import as either",
"POSIXt or character - correct column defined?"))
}
while(!inherits(i.col, "POSIXct")) {
i.col <- strptime(x = i.col, format = time.format)
i.col <- as.POSIXct(i.col)
}
metadata.import[[i]] <- i.col
}
assign("metadata.export", metadata.import, envir = return.env)
tryCatch(expr = {
stopifnot(
# Duplicated column check:
anyDuplicated(x = colnames(metadata.import)) == FALSE,
# Column name checks:
"port.tag" %in% colnames(metadata.import),
"plant.tag" %in% colnames(metadata.import),
"plot.tag" %in% colnames(metadata.import),
"stem.tag" %in% colnames(metadata.import),
"species.tag" %in% colnames(metadata.import),
"site.tag" %in% colnames(metadata.import),
"date.install" %in% colnames(metadata.import),
"date.removed" %in% colnames(metadata.import),
"diameter" %in% colnames(metadata.import),
# Column class checks:
inherits(x = metadata.import$date.install, what = "POSIXct"),
inherits(x = metadata.import$date.removed, what = "POSIXct"),
class(metadata.import$diameter) == "numeric"
)
}, error = function(cond) {
if (check.only == TRUE) {
stop(cond)
catch.fail <- TRUE
} else {
# Parameter checks...
metadata.import <- get(x = "metadata.export", envir = return.env)
message("Metadata verificaton failed.")
fix.prompt <- readline(prompt =
"Do you want to attempt to fix it from console? ")
if (tolower(fix.prompt) %in% c("yes", "y") == FALSE) {
warning("Exiting error handler")
stop(cond)
catch.fail <- TRUE
} else {
# Metadata checks:
col.tags <- c(
"port.tag",
"plant.tag",
"plot.tag",
"stem.tag",
"site.tag",
"species.tag",
"date.install",
"date.removed",
# Diam includes 'diameter', 'dbh.large', 'dbh.small'
diam
)
for (i in col.tags) {
while (i %in% colnames(metadata.import) == FALSE) {
cnm <- paste(
1:length(colnames(metadata.import)), "=",
colnames(metadata.import), ","
)
mdh <- head(metadata.import)
colnames(mdh) <- cnm
print(mdh)
rename.col <- readline(
prompt = paste("Which column represents", i,
"in your metadata? ", sep = " ")
)
rename.col <- as.numeric(rename.col)
if (any(length(rename.col) != 1,
!is.numeric(rename.col))) {
stop("Bad readline input, input must be length-1 integer")
catch.fail <- TRUE
}
colnames(metadata.import)[rename.col] <- i
}
assign("metadata.export", metadata.import, envir = return.env)
}
metadata.import <- get(x = "metadata.export", envir = return.env)
while (anyDuplicated(x = colnames(metadata.import)) == TRUE) {
print(head(
metadata.import)[, duplicated(x = colnames(metadata.import))]
)
dup.col.drop <- readline(
prompt = "Remove which duplicated column? (# in import data)"
)
dup.col.drop <- as.numeric(dup.col.drop)
if (length(dup.col.drop != 1) | is.numeric(dup.col.drop) == FALSE) {
stop("Bad readline input - must be length-1 numeric")
catch.fail <- TRUE
}
metadata.import <- metadata.import[, -dup.col.drop]
}
}
assign(x = "metadata.export", value = metadata.import, envir = return.env)
invisible(catch.fail)
}
}, finally = {
if (catch.fail == TRUE) {
stop("Failed to verify metadata input")
}
}) # end tryCatch
# Last modifications and return ####
metadata.import <- get(x = "metadata.export", envir = return.env)
metadata.import$port.tag <- as.character(metadata.import$port.tag)
metadata.import$plot.tag <- as.character(metadata.import$plot.tag)
metadata.import$species.tag <- as.character(metadata.import$species.tag)
if (check.only == FALSE) {
# 'save' option execution'
save <- FALSE
if (any(
!identical(x = metadata.import, y = orig.metadata),
!identical(x = colnames(metadata.import), y = colnames(orig.metadata))
)) {
if (rewrite == FALSE) {
save <- FALSE
} else if (rewrite == TRUE) {
save <- TRUE
} else {
save <- readline(
prompt = "Metadata changed - overwrite old file? TRUE or FALSE: "
)
}
}
if (save) {
if (class(metadata) == "data.frame") {
assign(x = metadata, value = metadata.import, envir = .GlobalEnv)
}
if (class(metadata) == "character") {
write.csv(
x = metadata.import, file = paste("new_", metadata, sep = ""),
row.names = FALSE
)
cat("Writing", metadata)
}
}
message("Finished metadata!")
return(metadata.import)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.