R/remove-outl-spectra.R

Defines functions plotMIR remove_outliers

Documented in remove_outliers

#' @title Remove outlier spectra
#' @description Remove outlier spectra based on the
#' \code{pcout()} function of the \code{mvoutlier} package.
#' @usage remove_outliers(list_spectra, remove = TRUE)
#' @param list_spectra List that contains averaged
#' spectral information
#' in list element \code{MIR_mean} (data.frame) and metadata in
#' \code{data_meta} (data.frame).
#' @param remove logical expression (\code{TRUE} or \code{FALSE})
#' that specifies weather spectra shall be removed.
#' If \code{rm = FALSE}, there will be no outlier removal
#' @return Returns list \code{spectra_out} that contains:
#' \itemize{
#'  \item \code{MIR_mean}: Outlier removed MIR spectra as
#'  data.frame object. If \code{remove = FALSE},
#'  the function will
#'  return almost identical list identical to \code{list_spectra},
#'  except that the first \code{indices} column of the spectral
#'  data frame \code{MIR_mean} is removed
#'  (This is done for both options
#'  \code{remove = TRUE} and \code{remove = FALSE}).
#'  \item \code{data_meta}: metadata data.frame, identical
#'  as in the \code{list_spectra} input list.
#'  \item \code{plot_out}: (optional) ggplot2 graph
#'  that shows all spectra (absorbance on x-axis and wavenumber
#'  on y-axis) with outlier marked, if
#'  \code{remove = TRUE}.
#' }
#' @details This is an optional function if one wants to remove
#' outliers.
#' @export
remove_outliers <- function(list_spectra, remove = TRUE) {
  # Outlier detection
  # Use the mvoutlier package and pcout function to identify
  # multivariate outliers
  wfinal01 <- ID <-  NULL
  if (remove == TRUE) {
    # Remove the 'indices' column
    list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1]
    out <- mvoutlier::pcout(list_spectra$MIR_mean, makeplot = T,
      outbound = 0.05) # parameters should be adapted
    # Plot outlying spectra
    plot_out <- plotMIR(
      list_spectra$MIR_mean[
        order(out$wfinal01, decreasing = T), ],
      col = as.factor(out$wfinal01[order(out$wfinal01,
        decreasing = T)])) +
      ggplot2::scale_colour_brewer("outlier", palette = "Set1")
    out_id <- as.character(
      list_spectra$data_meta$ID[!as.logical(out$wfinal01)]
    )
    # Remove  outliers
    MIR_mean <- list_spectra$MIR_mean[
      ! list_spectra$data_meta$ID %in% out_id, ]
    # rep ID and country name
    data_meta <- list_spectra$data_meta[
      ! list_spectra$data_meta$ID %in% out_id, ]
    spectra_out <- list(MIR_mean = MIR_mean,
      data_meta = data_meta,
      plot_out = plot_out)
  } else {
    # Remove the 'indices' column
    list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1]
    spectra_out <- list(MIR_mean = list_spectra$MIR_mean,
      data_meta = list_spectra$data_meta)
  }
  spectra_out
}

## plotMIR function of Antoine Stevens; don't export this
## function to the NAMESPACE
plotMIR <- function(spc, group = NULL, col = NULL,
  linetype = NULL, wr = NULL, brk = NULL,
  ylab = "Absorbance", xlab = "Wavenumber /cm-1",
  by = NULL, by.wrap = T, ...){
  # Function to plot spectra, based on the ggplot2 package
  # spc = spectral matrix, with colnames = wavelengths
  # group = grouping variable, usually the id's of the sample
  # wr = wavelength range to plot
  # brk = breaks of the x-axis
  # by = factor variable for which the mean and sd of
  # each level will be computed and plotted (optional)
  # Requires packages ggplot2; data.table; reshape2
  # Workaround to pass R CMD check:
  # http://stackoverflow.com/questions/9439256/how-can-i-handle-r-cmd-check-no-visible-binding-for-global-variable-notes-when
  # Setting the variables to NULL first
  variable <- value <- colour  <- NULL
  spc <- as.data.frame(spc)
  if (!is.null(wr))
    spc <- spc[, as.numeric(colnames(spc)) >= min(wr) &
        as.numeric(colnames(spc)) <= max(wr)]
  if (is.null(brk))
    brk  <- pretty(as.numeric(colnames(spc)), n = 10)
  if (!is.null(by)) {
    spc$by <- by
    spc <- data.table::data.table(spc, check.names = F)
    mean.spc <- reshape2::melt(
      spc[, lapply(data.table::.SD, mean), by = by],
      id.vars = "by"
    )
    sd.spc <- reshape2::melt(
      spc[, lapply(data.table::.SD, sd), by = by],
      id.vars = "by"
    )
    mean.spc$min <- mean.spc$value - sd.spc$value
    mean.spc$max <- mean.spc$value + sd.spc$value
    mean.spc$variable <-  as.numeric(
      as.character(mean.spc$variable)
    )
    if (by.wrap) {
      p <- ggplot2::ggplot(data = mean.spc) +
        ggplot2::geom_ribbon(
          ggplot2::aes(x = variable, ymin = min, ymax = max),
          fill = "grey", col = "black", size = 0.15)  +
        ggplot2::theme_bw()
      p <-  p +  ggplot2::geom_line(
        ggplot2::aes(x = variable, y = value),
        size = 0.25) +
        ggplot2::facet_wrap(~ by) +
        ggplot2::labs(x = xlab, y = ylab) +
        ggplot2::scale_x_reverse(breaks = brk)
    } else {
      p <- ggplot2::ggplot(data = mean.spc,
        ggplot2::aes(x = variable, y = value, group = by, col = by)) +
        ggplot2::geom_line(size = 0.25)  +
        ggplot2::labs(x = xlab, y = ylab) +
        ggplot2::scale_x_reverse(breaks = brk) +
        ggplot2::theme_bw()
    }
    return(p)
  } else {
    if (is.null(group))
      group  <- as.character(1:nrow(spc))
    spc$group <- group
    spc$colour <- col
    spc$linetype <- linetype
    id.var  <- colnames(spc)[
      grep("group|colour|linetype",colnames(spc))]
    tmp <- reshape2::melt(spc, id.var = id.var)
    tmp$variable <- as.numeric(as.character(tmp$variable))
    p <- ggplot2::ggplot(tmp,
      ggplot2::aes(variable, value, group = group)) +
      ggplot2::labs(x = xlab, y = ylab) +
      ggplot2::theme_bw() +
      ggplot2::scale_x_reverse(breaks = brk)
    if (is.null(col) & is.null(linetype))
      p <- p + ggplot2::geom_line(
        ggplot2::aes(colour = group))
    else if (!is.null(col) & is.null(linetype))
      p <- p + ggplot2::geom_line(
        ggplot2::aes(colour = colour))
    else if (is.null(col) & !is.null(linetype))
      p <- p + ggplot2::geom_line(
        ggplot2::aes(colour = group,
        linetype = linetype))
    else  p <- p + ggplot2::geom_line(
      ggplot2::aes(colour = colour,
      linetype = linetype))
    return(p)
  }
}
philipp-baumann/simplerspec documentation built on Oct. 3, 2023, 12:13 p.m.