Nothing
### plyr has rbind.fill for matrices now. We keep with our version to avoid the
### dependency, but do not export it anymore.
##' Quick data frame.
##' Experimental version of \code{\link{as.data.frame}} that converts a
##' list to a data frame, but doesn't do any checks to make sure it's a
##' valid format. Much faster.
##'
##' @param list list to convert to data frame
##' @keywords internal
quickdf <- function(list) {
if (is.matrix (list [[1]]))
n <- nrow (list [[1]])
else
n <- length (list [[1]])
structure(list,
class = "data.frame",
row.names = seq_len(n))
}
##' Bind matrices by row, and fill missing columns with NA
##'
##' The matrices are bound together using their column names or the column indices (in that order of
##' precedence.) Numeric columns may be converted to character beforehand, e.g. using format. If a
##' matrix doesn't have colnames, the column number is used (via \code{\link[base]{make.names}(unique
##' = TRUE)}).
##'
##' Note that this means that a column with name \code{"X1"} is merged with the first column of a
##' matrix without name and so on.
##'
##' Vectors are converted to 1-column matrices prior to rbind.
##'
##' Matrices of factors are not supported. (They are anyways quite inconvenient.) You may convert
##' them first to either numeric or character matrices. If a character matrix is merged with a
##' numeric, the result will be character.
##'
##' Row names are ignored.
##'
##' The return matrix will always have column names.
##'
##' @author C. Beleites
##' @seealso \code{\link[base]{rbind}}, \code{\link[base]{cbind}}, \code{plyr::rbind.fill()}
##' @keywords manip
##' @rdname rbind.fill
##' @examples
##' A <- matrix (1:4, 2)
##' B <- matrix (6:11, 2)
##' A
##' B
##' hyperSpec:::rbind.fill.matrix (A, B)
##'
##' colnames (A) <- c (3, 1)
##' A
##' hyperSpec:::rbind.fill.matrix (A, B)
##'
##' hyperSpec:::rbind.fill.matrix (A, 99)
##'
##' @return a matrix
##' @method rbind.fill matrix
rbind.fill.matrix <- function (...){
matrices <- list (...)
## check the arguments
tmp <- unlist (lapply (matrices, is.factor))
if (any (tmp))
stop ("Input ", paste (which (tmp), collapse = ", "),
" is a factor and needs to be converted first to either numeric or character.")
tmp <- ! unlist (lapply (matrices, is.matrix))
matrices [tmp] <- lapply (matrices [tmp], as.matrix)
## if the matrices have column names, use them
lcols <- lapply (matrices, .cols)
cols <- unique (unlist (lcols))
## the new row positions
pos <- unlist (lapply (matrices, nrow)) # Hadley, for me nrow is about twice as fast as
# .row_names_info (for matrices), the other way round for
# data.frame
## preallocate the new spectra matrix
result <- matrix (NA, nrow = sum (pos), ncol = length (cols))
## make an index vector for the row positions
pos <- c (0, cumsum (pos))
## fill in the new matrix
for (i in seq_along (matrices)){
icols <- match (lcols[[i]], cols)
result [(pos [i] + 1) : pos [i + 1], icols] <- matrices [[i]]
}
colnames (result) <- cols
result
}
.cols <- function (x){
cln <- colnames (x)
if (is.null (cln))
cln <- make.names (seq_len (ncol (x)), unique = TRUE)
cln
}
##' Combine objects by row, filling in missing columns.
##' \code{rbind}s a list of data frames filling missing columns with NA.
##'
##' This is an enhancement to \code{\link{rbind}} which adds in columns
##' that are not present in all inputs, accepts a list of data frames, and
##' operates substantially faster
##'
##' @param ... data frames/matrices to row bind together
##' @keywords manip
##' @rdname rbind.fill
##' @examples
##' #' rbind.fill(mtcars[c("mpg", "wt")], mtcars[c("wt", "cyl")])
rbind.fill <- function(...) {
dfs <- list(...)
if (length(dfs) == 0) return(list())
if (is.list(dfs[[1]]) && !is.data.frame(dfs[[1]])) {
dfs <- dfs[[1]]
}
dfs <- dfs [!sapply (dfs, is.null)] # compact(dfs) -> dependency plyr.
if (length(dfs) == 1) return(dfs[[1]])
# About 6 times faster than using nrow
rows <- unlist(lapply(dfs, .row_names_info, 2L))
nrows <- sum(rows)
# Build up output template -------------------------------------------------
vars <- unique(unlist(lapply(dfs, base::names))) # ~ 125,000/s
output <- rep(list(rep(NA, nrows)), length(vars)) # ~ 70,000,000/s
names(output) <- vars
seen <- rep(FALSE, length(output))
names(seen) <- vars
## find which cols contain matrices
matrixcols <- unique (unlist (lapply (dfs, function (x)
names (x) [sapply (x, is.matrix)])
))
seen [matrixcols] <- TRUE # class<- will fail if the matrix is not protected by I
# because 2 dims are needed
for(df in dfs) {
if (all(seen)) break # Quit as soon as all done
matching <- intersect(names(df), vars[!seen])
for(var in matching) {
value <- df[[var]]
if (is.factor(value)) {
output[[var]] <- factor(output[[var]])
} else {
class(output[[var]]) <- class(value)
}
}
seen[matching] <- TRUE
}
# Set up factors
factors <- names(output)[unlist(lapply(output, is.factor))]
for(var in factors) {
all <- unique(lapply(dfs, function(df) levels(df[[var]]))) # is that unique needed?
levels(output[[var]]) <- unique(unlist(all))
}
# care about matrices
# the trick is to supply a n by 0 matrix for input without column of that name
for (var in matrixcols){
df <- lapply (dfs, .get.or.make.matrix, var)
output [[var]] <- I (do.call (rbind.fill.matrix, df))
}
# Compute start and end positions for each data frame
pos <- matrix(cumsum(rbind(1, rows - 1)), ncol = 2, byrow = TRUE)
for(i in seq_along(rows)) {
rng <- pos[i, 1]:pos[i, 2]
df <- dfs[[i]]
for(var in setdiff (names (df), matrixcols)) {
output[[var]][rng] <- df[[var]]
}
}
quickdf(output)
}
.get.or.make.matrix <- function (df, var){
tmp <- df [[var]]
if (is.null (tmp))
tmp <- I (matrix (integer (), nrow = nrow (df)))
tmp
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.