grainsize: Grain-size data analysis functions

Documented in read_CamSizerXT

#' Import CamSizerXT measurement files.
#' 
#' This function imports ASCII-output of the Retsch CamSizerXT device.
#' 
#' @param file \code{Character} value, file name to be imported.
#' 
#' @param output \code{Character} value, primary output parameter used 
#' for plotting. One out of \code{p3}, \code{Q3}, \code{xFe3}, 
#' \code{xMa3}, \code{xc3}, \code{xFe_min3}, \code{xMa_min3}, 
#' \code{xc_min3}, \code{xFe_max3}, \code{xMa_max3}, \code{xc_max3}. 
#' The specified parameter must be present in the data set. Only the
#' data for the first measured data set is used. Default is \code{p3}.
#' 
#' @return \code{List} object with imported data. Elements \code{x} and 
#' \code{y} are grain size class limits and measured content, 
#' respectively. The object \code{data} contains a list with all 
#' measured parameters. Object \code{meta} contains meta data.
#' 
#' The CamSizerXT software allows exporting measurement data
#' as ACII-files. Therefore use Menu > Evaluation > Daily Report, 
#' select the desired measurement file, toggle all measured 
#' parameters of interest and use menu File > Export (as *.xle). 
#' This creates an Excel-readable file, which however cannot be 
#' imported to R, yet. So it needs to be opened in Excel and 
#' exported as *.txt-file. In summery, please do not change the 
#' files. it is the ASCII-file directly generated by Excel that
#' can be imported to R.
#' The output is a list object with four elements. \code{x} and 
#' \code{y} can be directly used to plot the object and will 
#' contain the primary size definition as set in the input parameters,
#' by default \code{p3}, and the respective amount of the parameter. The 
#' element \code{data} is a list object. The CamSizer XT supports more 
#' than one output file per sample. And thus, several definitions of grain 
#' size are possible (see \code{x$meta$size.definition}). For each of the 
#' size definitions as series of grain size and shape parameters are 
#' measured. The measured parameters are stored in 
#' \code{x$meta$parameters} and as column names in the data object. Finally,
#' a meta data object is returned.
#' 
#' @author Michael Dietze
#' @keywords grainsize
#' @examples
#' 
#' ## uncomment to use or use example data set just below
#' ## load example data
#' # x <- read.CamSizerXT(file = "CamSizerXT.txt")
#'
#' ## read example data set
#' data(CamSizerXT, envir = environment())
#' 
#' ## plot default data set parmeters (p3 vs. class limits)
#' plot(CamSizerXT, type = "l", log = "x")
#' 
#' ## show some data
#' CamSizerXT$meta$ID
#' CamSizerXT$meta$parameters
#' 
#' ## plot grain size distributions of the four size definitions
#' plot(NA, 
#'      xlim = range(CamSizerXT$x), 
#'      ylim = range(CamSizerXT$y), 
#'      main = "Dune sand, p3-distribution",
#'      xlab = "Grain size", 
#'      ylab = "Amount",
#'      log = "x")
#' 
#' for(i in 1:length(CamSizerXT$meta$size.definition)) {
#'   lines(x = CamSizerXT$x, 
#'         y = CamSizerXT$data[[i]][,1], 
#'         col = i)
#' }
#' 
#' legend(x = "topleft", 
#'        legend = CamSizerXT$meta$size.definition, 
#'        col = 1:4, 
#'        lty = 1)
#' 
#' ## plot grain size distribution and symmetry
#' plot(CamSizerXT, 
#'      main = "Dune sand",
#'      xlab = "Grain size (micrometres)", 
#'      ylab = "Amount (%), Symmetry index * 3",
#'      xlim = c(100, 1000),
#'      log = "x",
#'      type = "l")
#' 
#' lines(x = CamSizerXT$x, 
#'       y = CamSizerXT$data[[1]][,14] * 3, 
#'       col = "grey")
#' 
#' @export read_CamSizerXT
read_CamSizerXT <- function(
  file,
  output = "p3"
){
  
  ## check input data
  if(file.exists(file) == FALSE) {
    stop("File does not exist!")
  }
  
  ## read and prepare header part
  header.raw <- readLines(con = file, n = 10, warn = FALSE)[-1]
  header.raw <- gsub(x = header.raw, pattern = "\t\t", replacement = "")
  header.raw <- gsub(x = header.raw, pattern = "\t", replacement = ";")
  header.raw <- strsplit(x = header.raw, split = ";")

  ## check if file is of XT-export-format
  if(grepl(x = header.raw[1], pattern = ".rdf") == FALSE) {
    stop("File appears to be no CamSizerXT output file!")
  }
  
  ## get number of contained measurement files
  n.files <- 0
  while(length(header.raw[[n.files + 1]]) == 5) {
    n.files <- n.files + 1
  }
  
  ## convert header variable to data frame
  header <- matrix(nrow = n.files, ncol = 5)
  for(i in 1:n.files) {
    header[i,] <- header.raw[[i]]
  }
  header <- as.data.frame(header)
  
  ## extract sample ID and size definitions
  header.sub <- strsplit(x = as.character(header[, 4]), 
                         split = "__")

  ID <- header.sub[[1]][1]
  
  size.definition <- unlist(header.sub)[seq(from = 2, 
                                            to = length(header.sub) * 2, 
                                            by = 2)]
  size.definition <- substr(x = size.definition, 
                            start = 1, 
                            stop = 6)
  
  ## extract measurement task
  task <- as.character(header[1, 5])
  
  ## extract measurement date
  date <- paste(as.character(header[1, 2]), 
                as.character(header[1, 3]))
  date <- as.POSIXct(x = strptime(x = date, 
                                  format = "%d.%m.%Y %H:%M"))
  
  ## extract source file names
  file.names <- as.character(header[,4])
  
  
  ## read data set
  data.raw <- read.table(file = file, 
                         skip = 5, 
                         sep = "\t", 
                         dec = ",", 
                         fill = TRUE)

  data.raw <- as.matrix(data.raw)

  ## assign parameter names and units
  col.names.raw <- as.character(data.raw[1,seq(from = 1, 
                                               to = ncol(data.raw), 
                                               by = 2)])
  col.units.raw <- as.character(data.raw[1,seq(from = 2, 
                                               to = ncol(data.raw), 
                                               by = 2)])
  col.units <- col.units.raw[col.names.raw != ""]
  col.names <- col.names.raw[col.names.raw != ""]  
  
  ## isolate data part
  data.raw <- data.raw[-c(1:2, nrow(data.raw)),]
  
  ## assign class limits
  class.limits <- as.numeric(as.character(c(data.raw[2,1],
                                            data.raw[2:nrow(data.raw),2])))
  
  ## assign global data set
  data.global <- matrix(data = as.numeric(data.raw[,3:ncol(data.raw)]), 
                        nrow = length(class.limits), 
                        byrow = FALSE)
  
  ## create data list object
  data <- list(NA)  
  for(i in 1:n.files) {
    index.local <- seq(from = i, 
                       to = ncol(data.global), 
                       by = n.files)
    data.local <- data.global[,index.local]
    colnames(data.local) <- col.names[2:length(col.names)]
    data[[length(data) + 1]] <- data.local
  }
  data[[1]] <- NULL
  
  ## assign default output variable
  if(sum(col.names == output) != 1) {
    stop("Parameter for output not supported!")
  }

  output.default <- data[[1]][,colnames(data[[1]]) == output]
  
  return(list(x = class.limits,
              y = output.default,
              data = data,
              meta = list(
                ID = ID,
                unit = units,
                size.definition = size.definition,
                parameters = names,
                date = date,
                measurement.task = task,
                filename = file.names
                )))
}