R/rbind.fill.R
In hyperSpec: Work with Hyperspectral Data, i.e. Spectra + Meta Information (Spatial, Time, Concentration, ...)

Documented in quickdf rbind.fill

### plyr has rbind.fill for matrices now. We keep with our version to avoid the
### dependency, but do not export it anymore.


##' Quick data frame.
##' Experimental version of \code{\link{as.data.frame}} that converts a
##' list to a data frame, but doesn't do any checks to make sure it's a
##' valid format. Much faster.
##'
##' @param list list to convert to data frame
##' @keywords internal
quickdf <- function(list) {
  if (is.matrix (list [[1]]))
    n <- nrow (list [[1]])
  else
    n <- length (list [[1]])

  structure(list,
    class = "data.frame",
    row.names = seq_len(n))
}

##' Bind matrices by row, and fill missing columns with NA
##'
##' The matrices are bound together using their column names or the column indices (in that order of
##' precedence.) Numeric columns may be converted to character beforehand, e.g. using format.  If a
##' matrix doesn't have colnames, the column number is used (via \code{\link[base]{make.names}(unique
##' = TRUE)}).
##'
##' Note that this means that a column with name \code{"X1"} is merged with the first column of a
##' matrix without name and so on.
##'
##' Vectors are converted to 1-column matrices prior to rbind.
##'
##' Matrices of factors are not supported. (They are anyways quite inconvenient.) You may convert
##' them first to either numeric or character matrices. If a character matrix is merged with a
##' numeric, the result will be character.
##'
##' Row names are ignored.
##'
##' The return matrix will always have column names.
##'
##' @author C. Beleites
##' @seealso   \code{\link[base]{rbind}}, \code{\link[base]{cbind}}, \code{plyr::rbind.fill()}
##' @keywords manip
##' @rdname rbind.fill
##' @examples
##'  A <- matrix (1:4, 2)
##'  B <- matrix (6:11, 2)
##'  A
##'  B
##'  hyperSpec:::rbind.fill.matrix (A, B)
##'
##'  colnames (A) <- c (3, 1)
##'  A
##'  hyperSpec:::rbind.fill.matrix (A, B)
##'
##'  hyperSpec:::rbind.fill.matrix (A, 99)
##'
##' @return a matrix
##' @method rbind.fill matrix
rbind.fill.matrix <- function (...){
  matrices <- list (...)

  ## check the arguments
  tmp <- unlist (lapply (matrices, is.factor))
  if (any  (tmp))
    stop ("Input ", paste (which (tmp), collapse = ", "),
          " is a factor and needs to be converted first to either numeric or character.")

  tmp <- ! unlist (lapply (matrices, is.matrix))
  matrices [tmp] <- lapply (matrices [tmp], as.matrix)

  ## if the matrices have column names, use them
  lcols <- lapply (matrices, .cols)
  cols  <- unique (unlist (lcols))

  ## the new row positions
  pos <- unlist (lapply (matrices, nrow)) # Hadley, for me nrow is about twice as fast as
                                          # .row_names_info (for matrices), the other way round for
                                          # data.frame

  ## preallocate the new spectra matrix
  result <- matrix (NA, nrow = sum (pos), ncol = length (cols))

  ## make an index vector for the row positions
  pos <- c (0, cumsum (pos))

  ## fill in the new matrix
  for (i in seq_along (matrices)){
    icols <- match (lcols[[i]], cols)
    result [(pos [i] + 1) : pos [i + 1], icols] <- matrices [[i]]
  }

  colnames (result) <- cols

  result
}

.cols <- function (x){
  cln <- colnames (x)
  if (is.null (cln))
    cln <- make.names (seq_len (ncol (x)), unique = TRUE)
  cln
}

##' Combine objects by row, filling in missing columns.
##' \code{rbind}s a list of data frames filling missing columns with NA.
##'
##' This is an enhancement to \code{\link{rbind}} which adds in columns
##' that are not present in all inputs, accepts a list of data frames, and
##' operates substantially faster
##'
##' @param ... data frames/matrices to row bind together
##' @keywords manip
##' @rdname rbind.fill
##' @examples
##' #' rbind.fill(mtcars[c("mpg", "wt")], mtcars[c("wt", "cyl")])
rbind.fill <- function(...) {
  dfs <- list(...)
  if (length(dfs) == 0) return(list())
  if (is.list(dfs[[1]]) && !is.data.frame(dfs[[1]])) {
    dfs <- dfs[[1]]
  }
  dfs <- dfs [!sapply (dfs, is.null)]  # compact(dfs) -> dependency plyr.

  if (length(dfs) == 1) return(dfs[[1]])

  # About 6 times faster than using nrow
  rows <- unlist(lapply(dfs, .row_names_info, 2L))
  nrows <- sum(rows)

  # Build up output template -------------------------------------------------
  vars <- unique(unlist(lapply(dfs, base::names)))   # ~ 125,000/s
  output <- rep(list(rep(NA, nrows)), length(vars))  # ~ 70,000,000/s
  names(output) <- vars

  seen <- rep(FALSE, length(output))
  names(seen) <- vars

  ## find which cols contain matrices
  matrixcols <- unique (unlist (lapply (dfs, function (x)
                                        names (x) [sapply (x, is.matrix)])
                                ))
  seen [matrixcols] <- TRUE             # class<- will fail if the matrix is not protected by I
                                        # because 2 dims are needed

  for(df in dfs) {
    if (all(seen)) break  # Quit as soon as all done

    matching <- intersect(names(df), vars[!seen])
    for(var in matching) {
      value <- df[[var]]
      if (is.factor(value)) {
        output[[var]] <- factor(output[[var]])
      } else {
        class(output[[var]]) <- class(value)
      }
    }
    seen[matching] <- TRUE
  }

  # Set up factors
  factors <- names(output)[unlist(lapply(output, is.factor))]
  for(var in factors) {
    all <- unique(lapply(dfs, function(df) levels(df[[var]])))  # is that unique needed?
    levels(output[[var]]) <- unique(unlist(all))
  }

  # care about matrices
  # the trick is to supply a n by 0 matrix for input without column of that name
  for (var in matrixcols){
    df <- lapply (dfs, .get.or.make.matrix, var)
    output [[var]] <- I (do.call (rbind.fill.matrix, df))
  }

  # Compute start and end positions for each data frame
  pos <- matrix(cumsum(rbind(1, rows - 1)), ncol = 2, byrow = TRUE)

  for(i in seq_along(rows)) {
    rng <- pos[i, 1]:pos[i, 2]
    df <- dfs[[i]]

    for(var in setdiff (names (df), matrixcols)) {
      output[[var]][rng] <- df[[var]]
    }
  }

  quickdf(output)
}

.get.or.make.matrix <- function (df, var){
  tmp <- df [[var]]
  if (is.null (tmp))
    tmp <- I (matrix (integer (), nrow = nrow (df)))
  tmp
}

Any scripts or data that you put into this service are public.

hyperSpec documentation built on May 29, 2024, 7:28 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

hyperSpec
Work with Hyperspectral Data, i.e. Spectra + Meta Information (Spatial, Time, Concentration, ...)

R/rbind.fill.R
In hyperSpec: Work with Hyperspectral Data, i.e. Spectra + Meta Information (Spatial, Time, Concentration, ...)

Defines functions rbind.fill quickdf

Documented in quickdf rbind.fill

Try the hyperSpec package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hyperSpec Work with Hyperspectral Data, i.e. Spectra + Meta Information (Spatial, Time, Concentration, ...)

R/rbind.fill.R In hyperSpec: Work with Hyperspectral Data, i.e. Spectra + Meta Information (Spatial, Time, Concentration, ...)

Defines functions rbind.fill quickdf

Documented in quickdf rbind.fill

Try the hyperSpec package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hyperSpec
Work with Hyperspectral Data, i.e. Spectra + Meta Information (Spatial, Time, Concentration, ...)

R/rbind.fill.R
In hyperSpec: Work with Hyperspectral Data, i.e. Spectra + Meta Information (Spatial, Time, Concentration, ...)