R/oldBindRawFlux.R

#' @title Binding function for post-import raw flux data
#'
#' @description
#'
#' Deprecated June 5th. Won't work for analysis using versions > 0.2.3.
#'
#' Sapflow data needs much pre-processing before primary analysis
#' and scaling - this function binds raw data files together,
#' assuming that all files are from the same datalogger.
#'
#' @param flux  Input data of class "flux"
#'
#' @return Returns the modified 'flux' object back.
#'
#' @family preprocess
#' @examples
#' flux <- FluxProcess(flux = mydata)
oldBindRawFlux <- function(flux) {
  # Input validation & defaults ####
  validObject(flux)
  params <- LoadDefaults(flux = flux)
  # Data binding ####
  if (length(slot(object = flux, name = "data")) > 0) {
    overwrite <- readline(prompt = "Data exists! Overwrite?")
    if (!(tolower(overwrite) %in% c("y", "yes"))) {
      stop("Won't overwrite!")
    }
  }
  metadata <- slot(object = flux, name = "metadata")
  data.list <- slot(object = flux, name = "raw.data")
  data.tags <- slot(object = flux, name = "data.tags")
  # Create the time merge vector, in POSIXct: ####
  full.time <- matrix(data = NA, ncol = 1, nrow = 0)
  full.time <- as.data.frame(data.frame(full.time))
  for (i in data.list) {
    i.time <- i[, 1]
    full.time <- merge(x = full.time, y = i.time,
                       by = 1, all = TRUE, sort = TRUE)
  }
  full.time <- na.omit(unlist(full.time, use.names = FALSE))
  full.time <- as.POSIXct(full.time, origin = "1970-01-01 00:00:00")
  # Merge the data tags vector ####
  # If 'Reduce' breaks then maybe just do an lapply(), I only used it
  # because it's a new and exciting function...
  same.data.tags <- Reduce(function(x, y) {
    x && identical(y, data.tags[[1]])
    }, init = TRUE, data.tags)
  if (same.data.tags) {
    tag.name <- names(data.tags)[1]
    data.tags <- list(data.tags[[1]])
    names(data.tags) <- tag.name
  } else {
    stop("BindRawFlux can't handle complex data.tag inputs yet -
         they need to be identical for each data file")
  }
  # Bind the data ####
  message("Binding data...")
  data <- data.frame(full.time)
  data.binder <- data.frame(NULL)
  ndata.prev <- vector(mode = "numeric", length = 0)
  total.prev <- vector(mode = "numeric", length = 0)
  for (i in data.list) {
    ndata.prev <- append(ndata.prev, table(is.na(i))[["FALSE"]])
    total.prev <- append(total.prev, nrow(i))
    data.binder <- plyr::rbind.fill(data.binder, i)
  }
  ndata.prev <- sum(ndata.prev, na.rm = TRUE)
  total.prev <- sum(total.prev, na.rm = TRUE)
  # Duplicated rows wreck things later. This isn't an ideal solution:
  dup_times <- data.binder$time[which(duplicated(data.binder$time) == TRUE)]
  if (length(dup_times) > 0) {
    data.binder <- data.binder[-which(data.binder$time %in% dup_times), ]
  }
  if (!is.na(table(is.na(data.binder$time))["TRUE"])) {
    stop("NA's popped up in BindRawFlux time vector, needs debugging.")
  }
  data <- merge(x = data, y = data.binder,
                     by = 1, all = TRUE, sort = TRUE)
  # Pull the NAs out from time vector and drop associated data
  time.check <- nrow(data)
  data.drops <- which(is.na(data[, 1]))
  if (length(data.drops) > 0) {
    data <- data[-which(is.na(data[, 1])), ]
    if (nrow(data) < 0.99 * time.check) {
      stop("Lost >1% of data from NA omission on time vector")
    }
  }
  time <- .POSIXct(integer())
  time <- data[, 1]
  data <- as.matrix(data[, -1])
  data.tags <- lapply(data.tags, function(x) {
    x[-1]
  })
  if (slot(object = flux, name = "datatype") == "voltages") {
    data <- apply(X = data, MARGIN = c(1,2), FUN = abs)
  }
  # Logging, clean-up and return ####
  ndata.post <- (table(is.na(data)))["FALSE"]
  ndata.chng <- round(100 - ((ndata.post / ndata.prev) * 100), 2)
  total.post <- nrow(data)
  total.chng <- round(100 - ((total.post / total.prev) * 100), 2)
  slot(object = flux, name = "time") <- time
  slot(object = flux, name = "data") <- data
  slot(object = flux, name = "data.tags") <- data.tags
  if (ndata.chng > 0) {
    log.message <- paste(
      "BindRawFlux removed", ndata.prev - ndata.post,
      "data points, corresponding to", ndata.chng,
      "% of total."
      )
    slot(flux, "log") <- c(slot(flux, "log"), log.message)
  }
  if (total.chng > 0) {
    log.message <- paste(
      "Additionally,", total.prev - total.post,
      "rows were dropped as duplicate - a",
      total.chng, "% change."
    )
    slot(flux, "log") <- c(slot(flux, "log"), log.message)
  }
  return(flux)
}
bmcnellis/sapflux documentation built on May 12, 2019, 10:27 p.m.