R/oldMetaDataImport.R

#' @title Flux metadata import/integrity check function
#'
#' 6/6/2018 CHANGES:
#'
#' I'm generalizing the tags.
#' Tree/plant ID is now 'SAMPLE'.
#' Sub-tree, i.e. 'stem' is now 'SUB_SAMPLE'.
#' Sub-subsample, i.e. 'probe' is now 'SUB_SAMPLE_REPLICATE'
#' 'Port' is also kind of confusing, replacing with 'COLUMN_NAME'
#' Getting rid of 'wire', that's just going to cause confusion.
#' 'Species', 'plot', and 'site' are being converted to 'TREATMENT_X',
#' where X is 1-5.
#'
#' dbh.large is now DBH_1, and dbh.small is DBH_2.
#'
#' date.removed is 'DATE_UNINSTALL'
#'
#' Next step: write a template generation function that pastes file names and column names,
#' or whatver, and spits out a relevant CSV.
#'
#'
#' @description
#'
#' Sapflux metadata is required to interpret the signals returned by the
#' datalogger during data collection, and the 'flux' class objects
#' require metadata to be formatted a particular way - this function serves
#' to check the integrity of the metadata relative to what's required by
#' the class, and also guides import/formatting from a filename or
#' dataframe present in the current working environment.
#'
#' @param metadata       Filename of metadata OR data frame
#'
#' @param check_only     Skip import and just perform a metadata integrity check.
#'                       Defaults to FALSE.
#'
#' @details
#'
#' The function checks for the following columns:
#'
#' \code{date.install}  : Date the port began data collection.
#'
#' \code{date.removed}  : Date the port stopped collecting data.
#'
#' \code{port.tag}      : The datalogger port tag. Must be a unique value,
#'                        and values must be matched with a unique value
#'                        in the imported data.
#'
#' \code{stem.tag}      : The ID for the stem associated with the probe.
#'                        Not required to be unique.
#'
#' \code{plant.tag}     : The ID for the plant associated with the probe.
#'                        Not required to be unique.
#'
#' \code{diam.at.probe} : Stem diameter at probe placement.
#'
#' @family preprocess
#' @examples
#' MetaDataImport(metadata = foo.csv)
#' # in console, if successful: Finished metadata!
MetaDataImport <- function(metadata, check.only = FALSE,
                           time.format = "%m/%d/%Y",
                           diam = "circle", diam.fill = FALSE,
                           rewrite = FALSE) {
  return.env <- new.env()
  # Input integrity checks ####
  stopifnot(
    class(check.only) == "logical" & length(check.only) == 1,
    class(metadata) %in% c("character", "data.frame")
  )
  # Resolve metadataname/data information ####
  if (class(metadata) == "character") {
    stopifnot(length(metadata) == 1)
    if (metadata %in% list.files() == FALSE) {
      stop("Can't find the file in the working directory")
    }
    file.extension <- unlist(strsplit(metadata, "\\."))
    file.extension <- file.extension[length(file.extension)]
    if (file.extension == "csv") {
      metadata.import <- read.csv(metadata, stringsAsFactors = FALSE,
                         na.strings = c("NA", "", " ", "NaN"))
    }
    if (file.extension == "txt") {
      metadata.import <- read.table(file = metadata, header = TRUE,
                           sep = "\t", stringsAsFactors = FALSE,
                           row.names = NULL)
    }
  } else {
    stop("Metadata input must be a filename - dataframes to be
         supported later")
  }
  stopifnot(
    exists("metadata.import"),
    class(metadata.import) == "data.frame"
    )
  orig.metadata <- metadata.import
  assign(x = "metadata.export", value = metadata.import, envir = return.env)
  # Check metadata integrity ####
  catch.fail <- FALSE
  metadata.import <- get(x = "metadata.export", envir = return.env)
  # Special check for diameters:
  if (!("diameter" %in% colnames(metadata.import))) {
    if (diam == "circle") {
      answ <- "no"
    } else if (diam == "ellipse") {
      answ <- "yes"
    } else {
      answ <- readline(prompt = "Using max/min diameter? ")
    }
    if (tolower(answ) %in% c("yes", "y")) {
      stopifnot(
        "dbh.large" %in% colnames(metadata.import),
        "dbh.small" %in% colnames(metadata.import)
      )
      diam <- c("dbh.large", "dbh.small")
    } else {
      diam <- "diameter"
    }
  }
  # Class checks
  check.cols <- c("date.install", "date.removed", diam)
  if (any(check.cols %in% colnames(metadata.import)) == FALSE) {
    stop("You broke the metadata in a predictable way that isn't fixed yet.")
  }
  if (!("diameter" %in% check.cols)) {
    check.cols <- c(check.cols, "diameter")
  }
  for (i in check.cols) {
    if (!(i %in% colnames(metadata.import))) {
      next
    }
    i.col <- metadata.import[[i]]
    # Special case for diameter checks:
    if (i %in% c("diameter", "dbh.large", "dbh.small")) {
      NAcount <- sum(is.na(i.col))
      i.col <- suppressWarnings(as.numeric(i.col))
      if (sum(is.na(i.col)) > NAcount) {
        stop("Numeric conversion dropped diameters - check inputs")
        catch.fail <- TRUE
      }
      if (i %in% c("dbh.large", "dbh.small")) {
        if ("diameter" %in% colnames(metadata.import)) {
          stop("Multiple diameters - check inputs")
        }
        if (i == "dbh.large") {
          diameter <- EllipseToCircle(
            large = i.col, small = metadata.import[["dbh.small"]])
        } else if (i == "dbh.small") {
          diameter <- EllipseToCircle(
            large = metadata.import[["dbh.large"]], small = i.col)
        }
        metadata.import <- cbind(metadata.import, diameter)
        metadata.import <- metadata.import[, -which(
          colnames(metadata.import) %in% c("dbh.large", "dbh.small")
        )]
        next
      }
      if (NAcount > 0) {
        if (diam.fill == TRUE) {
          answ <- "yes"
        } else if (diam.fill == FALSE) {
          answ <- "no"
        } else {
          answ <- readline("Missing diams - fill in with averages? ")
        }
        if (tolower(answ) %in% c("yes", "y")) {
          NAdiam.count <- sum(is.na(metadata.import[["diameter"]]))
          mean.diam <- mean(i.col, na.rm = TRUE)
          metadata.import[["diameter"]] <- ifelse(
            is.na(i.col), mean.diam, i.col
          )
          diam <- "diameter"
        } else {
          stop("Diameters incomplete - check inputs")
        }
      }
      if (max(metadata.import$diameter, na.rm = TRUE) > 200) {
        head(metadata.import$diameter)
        answ <- readline(
          prompt = "Diameters too large, should be in centimeters -
          are they in millimeters? "
        )
        if (tolower(answ) %in% c("yes, y")) {
          metadata.import$diameter <- metadata.import$diameter / 10
        } else {
          stop("Import failed - diameters too large")
        }
      }
      next
  } # end diameter special case
    if (class(i.col) != "character") {
      stop(paste("Column", i, "failed to import as either",
                 "POSIXt or character - correct column defined?"))
    }
    while(!inherits(i.col, "POSIXct")) {
      i.col <- strptime(x = i.col, format = time.format)
      i.col <- as.POSIXct(i.col)
    }
    metadata.import[[i]] <- i.col
  }
  assign("metadata.export", metadata.import, envir = return.env)
  tryCatch(expr = {
    stopifnot(
      # Duplicated column check:
      anyDuplicated(x = colnames(metadata.import)) == FALSE,
      # Column name checks:
      "port.tag" %in% colnames(metadata.import),
      "plant.tag" %in% colnames(metadata.import),
      "plot.tag" %in% colnames(metadata.import),
      "stem.tag" %in% colnames(metadata.import),
      "species.tag" %in% colnames(metadata.import),
      "site.tag" %in% colnames(metadata.import),
      "date.install" %in% colnames(metadata.import),
      "date.removed" %in% colnames(metadata.import),
      "diameter" %in% colnames(metadata.import),
      # Column class checks:
      inherits(x = metadata.import$date.install, what = "POSIXct"),
      inherits(x = metadata.import$date.removed, what = "POSIXct"),
      class(metadata.import$diameter) == "numeric"
    )
  }, error = function(cond) {
    if (check.only == TRUE) {
      stop(cond)
      catch.fail <- TRUE
    } else {
      # Parameter checks...
      metadata.import <- get(x = "metadata.export", envir = return.env)
      message("Metadata verificaton failed.")
      fix.prompt <- readline(prompt =
               "Do you want to attempt to fix it from console? ")
      if (tolower(fix.prompt)  %in% c("yes", "y") == FALSE) {
        warning("Exiting error handler")
        stop(cond)
        catch.fail <- TRUE
      } else {
        # Metadata checks:
        col.tags <- c(
          "port.tag",
          "plant.tag",
          "plot.tag",
          "stem.tag",
          "site.tag",
          "species.tag",
          "date.install",
          "date.removed",
          # Diam includes 'diameter', 'dbh.large', 'dbh.small'
          diam
          )
        for (i in col.tags) {
          while (i %in% colnames(metadata.import) == FALSE) {
            cnm <- paste(
              1:length(colnames(metadata.import)), "=",
              colnames(metadata.import), ","
            )
            mdh <- head(metadata.import)
            colnames(mdh) <- cnm
            print(mdh)
            rename.col <- readline(
              prompt = paste("Which column represents", i,
                             "in your metadata? ", sep = " ")
            )
            rename.col <- as.numeric(rename.col)
            if (any(length(rename.col) != 1,
                    !is.numeric(rename.col))) {
              stop("Bad readline input, input must be length-1 integer")
              catch.fail <- TRUE
            }
            colnames(metadata.import)[rename.col] <- i
          }
          assign("metadata.export", metadata.import, envir = return.env)
        }
        metadata.import <- get(x = "metadata.export", envir = return.env)
        while (anyDuplicated(x = colnames(metadata.import)) == TRUE) {
          print(head(
            metadata.import)[, duplicated(x = colnames(metadata.import))]
          )
          dup.col.drop <- readline(
            prompt = "Remove which duplicated column? (# in import data)"
          )
          dup.col.drop <- as.numeric(dup.col.drop)
          if (length(dup.col.drop != 1) | is.numeric(dup.col.drop) == FALSE) {
            stop("Bad readline input - must be length-1 numeric")
            catch.fail <- TRUE
          }
          metadata.import <- metadata.import[, -dup.col.drop]
        }
      }
      assign(x = "metadata.export", value = metadata.import, envir = return.env)
      invisible(catch.fail)
    }
  }, finally = {
    if (catch.fail == TRUE) {
      stop("Failed to verify metadata input")
    }
  }) # end tryCatch
  # Last modifications and return ####
  metadata.import <- get(x = "metadata.export", envir = return.env)
  metadata.import$port.tag <- as.character(metadata.import$port.tag)
  metadata.import$plot.tag <- as.character(metadata.import$plot.tag)
  metadata.import$species.tag <- as.character(metadata.import$species.tag)
  if (check.only == FALSE) {
    # 'save' option execution'
    save <- FALSE
    if (any(
      !identical(x = metadata.import, y = orig.metadata),
      !identical(x = colnames(metadata.import), y = colnames(orig.metadata))
      )) {
      if (rewrite == FALSE) {
        save <- FALSE
      } else if (rewrite == TRUE) {
        save <- TRUE
      } else {
        save <- readline(
          prompt = "Metadata changed - overwrite old file? TRUE or FALSE: "
        )
      }
    }
    if (save) {
      if (class(metadata) == "data.frame") {
        assign(x = metadata, value = metadata.import, envir = .GlobalEnv)
      }
      if (class(metadata) == "character") {
        write.csv(
          x = metadata.import, file = paste("new_", metadata, sep = ""),
          row.names = FALSE
          )
        cat("Writing", metadata)
      }
    }
    message("Finished metadata!")
    return(metadata.import)
  }
}
bmcnellis/sapflux documentation built on May 12, 2019, 10:27 p.m.