R/getspec.R

Defines functions getspec

Documented in getspec

#' Import spectra files
#'
#' Finds and imports spectra files from a folder. Currently works
#' for reflectance files generated in Ocean Optics SpectraSuite (USB2000,
#' USB4000 and Jaz spectrometers), CRAIC software (after exporting) and
#' Avantes (after exporting).
#'
#' @param where (required) folder in which files are located.
#' @param ext file extension to be searched for, without the "."
#' (defaults to "txt").
#' @param lim a vector with two numbers determining the wavelength limits to be
#' considered (defaults to 300 and 700).
#' @param decimal character to be used to identify decimal plates
#' (defaults to ".").
#' @param sep column delimiting characters to be considered in addition to the
#' default (which are: tab, space, and ";")
#' @param subdir should subdirectories within the \code{where} folder be
#' included in the search? (defaults to \code{FALSE}).
#' @param subdir.names should subdirectory path be included in the name of the
#' spectra? (defaults to \code{FALSE}).
#' @param cores Number of cores to be used. If greater than 1, import will use
#'  parallel processing (not available in Windows).
#' @param fast deprecated argument. use \code{cores} for parallel processing
#'  instead.
#' @return A data frame, of class \code{rspec}, containing individual imported
#' spectral files as columns.
#' Reflectance values are interpolated to the nearest wavelength integer.
#'
#' @export
#'
#' @importFrom pbmcapply pbmclapply
#'
#' @examples \dontrun{
#' getspec('examplespec/', lim = c(400, 900))
#' getspec('examplespec/', ext = 'ttt')}
#'
#' @author Rafael Maia \email{[email protected]@zips.uakron.edu}
#' @author Hugo Gruson \email{[email protected]@normalesup.org}
#'
#' @references Montgomerie R (2006) Analyzing colors. In: Hill G, McGraw K (eds)
#' Bird coloration. Harvard University Press, Cambridge, pp 90-147.


#clumsy: if subdir=T, column name includes subdir name (desired?)

getspec <- function(where = getwd(), ext = 'txt', lim = c(300, 700), decimal = ".",
           sep=NULL, subdir = FALSE, subdir.names = FALSE,
           cores=getOption("mc.cores", 2L), fast){

  # check deprecated arguments
  if(!missing(fast))
    stop('argument "fast" is deprecated. see ?getspec for more information.', call.=FALSE)

  # allow multiple extensions
  extension <- paste0("\\.", ext, "$", collapse = "|")

  # get file names
  file_names <- list.files(where, pattern = extension, recursive = subdir, include.dirs = subdir)
  files <- paste(where, '/', file_names, sep = '')

  if(subdir.names){
  	file_names <- gsub(extension, '', file_names)}else{
  	file_names <- gsub(extension, '', basename(file_names))
  	 }

  if(length(file_names) == 0){
  	stop('No files found. Try a different extension value for argument "ext"')
  	}

  # Wavelength range
  range <- seq(lim[1],lim[2])

  # Build shell of final output
  final <- matrix(nrow = length(range), ncol = length(file_names) + 1)
  final[, 1] <- range

  # vector of corrupt files
  corrupt <- rep(FALSE, length(files))

  # define separators
  seps <- paste0(c("\\\t|\\;| ",sep), collapse="|\\")


  # Setting a progress bar
  #progbar <- txtProgressBar(min = 0, max = length(files), style = 2)

  # On Windows, set cores to be 1
    if (cores > 1 && .Platform$OS.type == "windows") {
      cores <- 1
      message('Parallel processing not available in Windows; "cores" set to 1.\n')
    }

    if(length(files) <= cores)
      cores <- 1

  # if ProcSpec, check if xml2 is installed and loaded
  if(any(grepl("\\.ProcSpec$", files))){
    if (!requireNamespace("xml2", quietly = TRUE))
      stop('"xml2" package needed for to import .ProcSpec files. Please install it.',
      call. = FALSE)

    if(!isNamespaceLoaded("xml2"))
      requireNamespace("xml2")
  }

  # message with number of spectra files being imported
  message(length(files), ' files found; importing spectra:')

  gsp <- function(ff){
    if(grepl("\\.ProcSpec$", ff)){
      # ProcSpec files differ too much from the other formats and need their own
      # function.

      tempframe <- parse_procspec(ff)

    }else{

      # read in raw file
      raw <- scan(file = ff, what = '', quiet = TRUE, 
                  dec = decimal, sep = '\n', skipNul = TRUE)

      # rough fix for 'JazIrrad' files that have a stram of calibration data at the end
      if(length(grep('Begin Calibration Data', raw)) > 0)
         raw <- raw[1:grep('Begin Calibration Data', raw) - 1]

      #ToDo we can actually use this raw string to import metadata if we want

      # TEMPORARY PLACEHOLDER TO DEAL WITH NaN & Inf VALUES IN SPEC
      # remove NaN & inf
      #    if(length(grep('\\tnan', raw)) + length(grep('\\tinf', raw)) > 0){
      #      raw <- gsub('\\tnan', '\t0.0', raw)
      #      raw <- gsub('\\tinf', '\t0.0', raw)
      #      corrupt <- TRUE
      #    }

      # substitute separators for a single value to be used in split
      raw <- gsub(seps, ";", raw)

      # remove multiply occuring split character
      raw <- gsub(paste0("(;)\\1+"), "\\1", raw)

      # remove split character from first or last occurence
      raw <- gsub('^;|;$', '', raw)

      # convert decimal value to point
      raw <- gsub(decimal, '.', raw, fixed = TRUE)

      # exclude lines that have text
      #raw <- raw[!grepl('[A-Da-dF-Zf-z]', raw)]

      # exclude any line that doesn't start with a number
      scinum <- "-?[[:digit:]]+\\.?[[:digit:]]*((E|e)(-|+)?[[:digit:]]+)?"
      raw <- raw[grepl(paste0("^", scinum, ";"), raw)]

      # split on separators
      rawsplit <- strsplit(raw, ";")

      rawsplit <- do.call(rbind, rawsplit)

      if(dim(rawsplit)[2] < 2)
        stop('could not separate columns, choose a different value for "sep" argument', call.=FALSE)

      # convert to numeric, check for NA
      suppressWarnings(class(rawsplit) <- 'numeric')

      # remove columns where all values are NAs (due to poor tabulation)
      rawsplit <- rawsplit[, !apply(rawsplit, 2, function(x) all(is.na(x)))]

      # use only first and last column
      tempframe <- rawsplit[, c(1, dim(rawsplit)[2])]
    }

    # interpolate
    interp <- do.call(cbind, approx(tempframe[, 1], tempframe[, 2], xout = range))

    # check if there are any NA left, assign as corrupt if so
    #if(any(is.na(interp)))
    #  corrupt[i] <- TRUE

    # add to final table
    #final[, i+1] <- interp[, 2]

    interp[ ,2]

    #setTxtProgressBar(progbar, i)
  }


  tmp <- pbmclapply(files, function(x)
    tryCatch(gsp(x),
    error = function(e) NULL,
    warning = function(e) NULL
    ), mc.cores=cores)

  specnames <- gsub(extension, '', file_names)

  if(any(unlist(lapply(tmp, is.null)))){
  	whichfailed <- which(unlist(lapply(tmp, is.null)))
  	# stop if all files are corrupt
  	if(!length(whichfailed) < length(files))
  	  stop('Could not import spectra, check input files and function arguments', call.=FALSE)

    # if not, import the ones remaining
    warning('Could not import one or more files:\n',
      paste0(files[whichfailed], '\n'),
      call.=FALSE)

    specnames <- specnames[-whichfailed]
  }


  tmp <- do.call(cbind, tmp)

  final <- cbind(range, tmp)

  colnames(final) <- c('wl', specnames)
  final <- as.data.frame(final)
  class(final) <- c('rspec', 'data.frame')

if(any(corrupt)){
  	cat('\n')
  	warning('the following files contain character elements within wavelength and/or reflectance values: - check for corrupt or otherwise poorly exported files. Verify values returned.')
}

  # Negative value check
  if(length(final[final < 0]) > 0){
    warning(paste("\nThe spectral data contain ", length(final[final < 0]), " negative value(s), which may produce unexpected results if used in models. Consider using procspec() to correct them."), call.=FALSE)
  }

  final
  }

Try the pavo package in your browser

Any scripts or data that you put into this service are public.

pavo documentation built on July 16, 2018, 9:03 a.m.