R/oldImportRawFlux.R

#' @title Flux data import function. Also has options to pre-process data inputs.
#'
#' @description
#'
#' Deprecated June 5th. Won't work for analysis using versions > 0.2.3.
#'
#' Imports flux data from multiple files, processes according to optional input
#' parameters, and ouputs an R object according to additional optional
#' parameters.
#'
#' See 'details' for detailed notes on parameter use, supported file and
#' datatypes, and function operation.
#'
#' @param metadata.file     A .csv of metadata. See \link{MetaDataImport}
#'                          for details.
#'
#' @param file              List of single-length characters OR matrix.
#'                          See 'details'.
#'
#' @param filetype          Single-length Character.
#'                          Type of file to be imported. Defaults to 'campbell'
#'
#' @param time.format       Time format for conversion to POSIXlt.
#'
#' @param datatype          Single-length Character.
#'                          Type of data being imported. Defaults to "voltages"
#'
#' @param minimum.interval  Smallest time interval allowed in an import file,
#'                          used to drop junk downloads. Defaults to 2 hours.
#'
#' @details
#' \code{file}:
#'
#'      A list of filenames, in the current working directory. Alternatively,
#'      a dataframe present in the current global environment. Timestamp vector
#'      must be present in each file, and must be the only non-numeric
#'      column. If not the first column, input is requested to reformat.
#'
#'      The default, \code{"all"}, will search the current working directory
#'      for all files that have supported file extensions
#'      (.txt, .csv, and .dat) and import all appropriate files. While useful
#'      for bulk file input, it's recommended to list the files out
#'      individually if there are not too many to help sanitize data inputs.
#'
#' \code{filetype}: Defaults to "csv".
#'
#'      Campbell's .dat and .csv files often have a header or leading rows
#'      that make import difficult using R's normal import functions, and
#'      this function should deal with those idiosyncrasies appropriately.
#'
#' \code{time.format}:
#'
#'      "%Y-%m-%d %H:%M:%S" is the default timestamp format.
#'      e.g., "2006-05-12 16:27:00" for May 12th, 2016, 4:27 PM.
#'
#'      May be either a length-one character or character vector
#'      specifying the time format for each file in order of
#'      importation.
#'
#' \code{datatype}: Defaults to "voltages"
#'
#'
#' Additional notes:
#'
#'      Campbell CR1000 files have additional metadata associated with them
#'      along with the data of interest. Usually, this metadata isn't
#'      especially relevant. FluxImport will discard additional
#'      metadata and return only a matrix of data values and associated
#'      timestamps.
#'
#'
#' @family preprocess
#'
#' @examples
#' # If your datalogger has already applied a multiplier for you, use
#' # datatype = 'temperatures':
#' FluxImport(metadata.file = what.csv, file = list(myfil1.csv, myfile2.csv),
#'            filetype = "campbell", datatype = "temperatures")
oldImportRawFlux <- function(metadata.file = NULL, file = "all",
                       filetype = "csv", time.format = "%Y-%m-%d %H:%M:%S",
                       datatype = "voltages", minimum.interval = 120,
                       diam = "circle", diam.fill = FALSE, rewrite = FALSE
                       ) {
  # Pertinent warnings:
  cat("Version note: for sapflux version",
      as.character(packageVersion("sapflux")),", data tags are assumed to be",
      "integer sequences of column numbers for each data file.\n")
  # Check Input Validity ####
  stopifnot(
    class(time.format) == "character",
    class(datatype) == "character"
    )
  message("Checking for file list in current working directory...")
  if (length(
    list.files(pattern = as.character(paste("^", file, "$", sep = "")))
    ) < 1) {
    message("No files found, checking global environment...")
    if (!is.matrix(get(file))) {
      stop("No valid data found for import.")
    }
  }
  # Special check case for the default 'file' value
  if (all(length(file) == 1 & file == "all")) {
    message("Importing all files in current working directory...")
    file <- as.list(
      list.files(pattern = c("\\.txt$", "\\.dat$", "\\.csv$"),
                 ignore.case = TRUE)
      )
  } else {
    # Check case for all other 'file' values
    stopifnot(is.list(file))
  }
  # Initialize the file reading ####
  flux.return <- new(Class = "flux")
  slot(object = flux.return, name = "datatype") <- datatype
  params <- LoadDefaults(flux.return)
  data.tags <- vector(mode = "list", length = length(file))
  if (is.list(file)) {
    slot(object = flux.return, name = "source.files") <-
      unlist(file)
  } else {
    slot(object = flux.return, name = "source.files") <-
      deparse(substitute(file))
  }
  # Import metadata ####
  if (length(metadata.file) > 0) {
    message("Reading metadata...")
    # diam = "circle", diam.fill = FALSE, rewrite = FALSE
    slot(obect = flux.return, name = "metadata") <- MetaDataImport(
      metadata = metadata.file, check.only = FALSE, diam = diam,
      diam.fill = diam.fill, rewrite = rewrite)
  } else {
    message("No metadata declared! Can't scale flux data")
    slot(object = flux.return, name = "metadata") <- data.frame(NULL)
  }
  metadata <- slot(object = flux.return, name = "metadata")
  # Import data: ####
  if (is.list(file)) {
    data <- list()
    if (length(time.format) == 1) {
      rep.time.format <- rep(x = time.format, times = length(file))
      time.format <- rep.time.format
    } else {
      stopifnot(length(time.format) == length(file))
    }
    ndata <- vector(mode = "numeric", length = 0)
    for (i in 1:length(file)) {
      message(paste("Read:", file[[i]]))
      if (length(filetype) == 1) {
        rep.filetype <- rep(x = filetype, times = length(file))
      } else {
        stopifnot(length(filetype) == length(file))
      }
      filetype <- rep.filetype
      if (filetype[i] == "campbell") {
        row.skip <- 4
        file.sep <- ","
        file.header <- FALSE
        file.row.names <- 2
        header <- read.table(
          file = file[[i]], header = FALSE, sep = ",",
          nrow = 4, fill = TRUE, quote = "", colClasses = "character"
        )
        header <- apply(header, 2, function(x) {
          gsub(pattern = '"', replacement = "", x = x)
        })
      }
      if (filetype[i] == "csv") {
        row.skip <- 0
        file.sep <- ","
        file.header <- TRUE
        file.row.names <- NULL
      }
      if (filetype[i] == "tab.delim") {
        row.skip <- 0
        file.sep <- "\t"
        file.header <- TRUE
        file.row.names <- FALSE
      }
      # Read in data ####
      i.data <- read.table(
        file[[i]], sep = file.sep,
        header = file.header, skip = row.skip,
        row.names = file.row.names,
        na.strings = c(NA, NaN, "", " ", "  ", "   "),
        stringsAsFactors = FALSE
      )
      # 'read.table' can't handle full columns of NaN, will import as character.
      ndata <- append(ndata, table(is.na(i.data))[["FALSE"]])
      if (table(sapply(i.data, class))["character"] > 1) {
        i.data.sub <- lapply(i.data[, 2:ncol(i.data)], as.numeric)
        i.data.sub <- lapply(i.data.sub, function(x) {
          ifelse(is.nan(x), NA, x)
        })
        i.data <- data.frame(i.data[, 1], i.data.sub, stringsAsFactors = FALSE)
        # Keep the stop in case the above process breaks for some reason.
        #stop(cat("Too many character columns in", file))
      }
      while(class(i.data[, 1]) != "character") {
        print(head(i.data))
        new.time <- suppressWarnings(as.numeric(readline(
          prompt = "Timestamp is not the first column - enter in appropriate
          column number for the timestamp vector in this data file"
        )))
        if (is.na(new.time)) {
          stop("Bad prompt input.")
        }
        i.data <- cbind(i.data[,new.time], i.data[, -new.time])
      }
      # Create a timestamp vector. ####
      # tryCatch() is used here to halt on warnings thrown by
      # striptime(), avoiding cryptic or silent data import errors.
      tryCatch(expr = {
        assign(x = "i.time",
               value = strptime(i.data[, 1],
                                format = time.format[i]))
        i.time <- as.POSIXct(i.time)
      },
      warning = function(cond) {
        time.failure <- cond
        invisible()
      }, error = function(cond) {
        time.failure <- cond
        invisible()
      }, finally = {
        if (exists("time.failure")) {
          print(time.failure)
          stop("Failed to format time correctly, see warning")
        } else {
          # Check time interval
          i.time.interval <- abs(as.numeric(difftime(
            time1 = i.time[1], time2 = i.time[length(i.time)], units = "mins"
          )))
          if (i.time.interval < minimum.interval) {
            cat(file[[i]], "is insufficient length to import, skipped")
            next
          }
          i.data[, 1] <- i.time
        }
      })
      colnames(i.data)[1] <- "time"
      # Add the new data iteration to the master data list
      i.data <- i.data[order(i.data[, 1]), ]
      data <- c(data, list(i.data))
    } # End 'i' loop
  } # End data import for list
  if (is.data.frame(file)) {
    data <- list(file)
  }
  # Metadata-associated processes ####
  if (length(metadata.file) > 0) {
    for (i in 1:length(data)) {
      # Filling data tags:
      if (FALSE) {
        # TODO: Not quite sure how to implement this yet
        # Check input validity:
        while (all(colnames(data[[i]]) %in% metadata$port.tag) == FALSE) {
          missing.tags <- which(
            colnames(data[[i]]) %in% metadata$port.tag == FALSE)
          cat("Column names for data from file:\n", file[[i]], "\n")
          print(head(data[[i]]))
          message("Port tags in metadata:")
          print(metadata$port.tag)
          new.column.tag <- readline(
            prompt = paste("Column", missing.tags[1], "is missing from",
                           "metadata - what is the actual port tag?", sep = " ")
          )
          colnames(data[[i]])[
            which(colnames(data[[i]])) == missing.tags[1]] <- new.column.tag
        }
      } else {
        colnames(data[[i]])[1] <- "time"
        colnames(data[[i]])[2:ncol(data[[i]])] <- 1:(ncol(data[[i]]) - 1)
        data.tags[[i]] <- colnames(data[[i]])
        names(data.tags)[i] <- "port.tag"
      }
    }
  } else {
    message("No metadata - skipping some preprocessing")
  }
  # Wrap-up and return! ####
  slot(object = flux.return, name = "raw.data") <- data
  slot(object = flux.return, name = "data.tags") <- data.tags
  message("Done importing!")
  # ndata is total length of all data rows, from above import section
  log.message <- paste("Imported", length(file), "files for a total of",
                       sum(ndata, na.rm = TRUE), "raw data points")
  slot(flux.return, "log") <- c(slot(flux.return, "log"), log.message)
  return(flux.return)
}
bmcnellis/sapflux documentation built on May 12, 2019, 10:27 p.m.