BY <- function(x, ...) UseMethod("BY")
BY.default <- function(x, g, FUN, ..., use.g.names = TRUE, sort = .op[["sort"]], reorder = TRUE,
expand.wide = FALSE, parallel = FALSE, mc.cores = 1L,
return = c("same", "vector", "list")) {
# If matrix, dispatch to matrix method
# if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("BY", unclass(x)))
if(!is.function(FUN)) FUN <- match.fun(FUN)
aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply
simplify <- switch(return[1L], same = 1L, vector = 2L, list = 3L, stop("BY.default only supports same, vector and list output!"))
g <- GRP(g, return.groups = use.g.names, sort = sort, call = FALSE)
# Computing result: unsimplified
if(!missing(...) && g[[1L]] > 1L && length(ln <- whichv(vlengths(dots <- list(...), FALSE), length(x)))) {
asl <- lapply(dots[ln], gsplit, g)
if(length(dots) > length(ln)) {
mord <- dots[-ln]
if(is.null(names(mord)) && is.null(names(asl))) warning("If some arguments have the same length as the data (vectors) while others have length 1 (scalars), please ensure that at least one of the two have keywords e.g. argname = value. This is because the latter are passed to the 'MoreArgs' argument of .mapply, and thus the order in which arguments are passed to the function might be different from your top-level call. In particular, .mapply will first pass the vector valued arguments followed by the scalar valued ones.")
} else mord <- NULL
res <- .mapply(FUN, c(list(gsplit(x, g)), asl), mord)
} else res <- aplyfun(gsplit(x, g), FUN, ...)
# Returning raw or wide result
if(simplify == 3L || expand.wide) {
if(use.g.names) names(res) <- GRPnames(g, FALSE)
if(simplify == 3L) return(res)
return(do.call(rbind, res))
}
# If using names and function also assigns names e.g. quantile()
if(use.g.names && length(names(res[[1L]]))) {
names(res) <- GRPnames(g, FALSE)
res <- unlist(res, recursive = FALSE, use.names = TRUE)
if(reorder && length(res) == length(g[[2L]]) && !isTRUE(g$ordered[2L]))
warning("result is same length as x but the grouping is not sorted and the function used added names. Thus BY cannot decisively distinguish whether you are using a transformation function like scale() or a summary function like quantile() that computes a vector of statistics. The latter is assumed and the result is not reordered. To receive reordered output without constructed names set use.g.names = FALSE")
} else { # Function does not assign names... or not using group names...
res <- unlist(res, FALSE, FALSE)
if(length(res) == g[[1L]]) {
if(use.g.names) names(res) <- GRPnames(g, FALSE)
} else if(length(res) == length(g[[2L]])) {
if(reorder) res <- .Call(C_greorder, res, g)
if(length(names(x)) && (reorder || isTRUE(g$ordered[2L]))) # Making sure we don't assign wrong names..
names(res) <- names(x)
}
}
if(simplify == 1L) return(copyMostAttributes(res, x)) # here needs to be copyMostAttributes... otherwise overwrites names
res
}
# Experimental: But not really faster and also risky because vapply checks types and types may differ...
# copysplaplfun <- function(x, g, FUN, ...) {
# sx <- gsplit(x, g)
# if(length(sx) > 100000L && length(r1 <- match.fun(FUN)(sx[[1L]], ...)) == 1L)
# return(copyMostAttributes(vapply(sx, FUN, r1, ..., USE.NAMES = FALSE), x))
# copyMostAttributes(unlist(lapply(sx, FUN, ...), FALSE, FALSE), x)
# }
copysplaplfun <- function(x, g, FUN, ...) copyMostAttributes(unlist(lapply(gsplit(x, g), FUN, ...), FALSE, FALSE), x)
copysplmaplfun <- function(x, g, FUN, asl, mord) copyMostAttributes(unlist(.mapply(FUN, c(list(gsplit(x, g)), asl), mord), FALSE, FALSE), x)
splaplfun <- function(x, g, FUN, ...) unlist(lapply(gsplit(x, g), FUN, ...), FALSE, FALSE)
splmaplfun <- function(x, g, FUN, asl, mord) unlist(.mapply(FUN, c(list(gsplit(x, g)), asl), mord), FALSE, FALSE)
BY.data.frame <- function(x, g, FUN, ..., use.g.names = TRUE, sort = .op[["sort"]], reorder = TRUE,
expand.wide = FALSE, parallel = FALSE, mc.cores = 1L,
return = c("same", "matrix", "data.frame", "list")) {
if(!is.list(x)) stop("x needs to be a list")
if(!is.function(FUN)) FUN <- match.fun(FUN)
aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply
return <- switch(return[1L], same = 1L, matrix = 3L, data.frame = 2L, list = 0L,
stop("Unknown return option!"))
g <- GRP(g, return.groups = use.g.names, sort = sort, call = FALSE)
n <- length(g[[2L]])
if(!missing(...) && g[[1L]] > 1L && length(ln <- whichv(vlengths(dots <- list(...), FALSE), n))) {
asl <- lapply(dots[ln], gsplit, g)
if(length(dots) > length(ln)) {
mord <- dots[-ln]
if(is.null(names(mord)) && is.null(names(asl))) warning("If some arguments have the same length as the data (vectors) while others have length 1 (scalars), please ensure that at least one of the two have keywords e.g. argname = value. This is because the latter are passed to the 'MoreArgs' argument of .mapply, and thus the order in which arguments are passed to the function might be different from your top-level call. In particular, .mapply will first pass the vector valued arguments followed by the scalar valued ones.")
} else mord <- NULL
multi <- TRUE
} else multi <- FALSE
# Just plain list output
if(return == 0L) {
if(multi) {
if(expand.wide) return(aplyfun(x, function(y) do.call(rbind, .mapply(FUN, c(list(gsplit(y, g, use.g.names)), asl), mord))))
return(aplyfun(x, function(y) .mapply(FUN, c(list(gsplit(y, g, use.g.names)), asl), mord)))
}
if(expand.wide) return(aplyfun(x, function(y) do.call(rbind, lapply(gsplit(y, g, use.g.names), FUN, ...))))
return(aplyfun(x, function(y) lapply(gsplit(y, g, use.g.names), FUN, ...)))
}
ax <- attributes(x)
# Wider output (for multiple summary statistics like quantile())
if(expand.wide) {
if(return < 3L) { # Return a data.frame
splitfun <- if(multi) function(y) .Call(Cpp_mctl, do.call(rbind, .mapply(FUN, c(list(gsplit(y, g)), asl), mord)), TRUE, 0L) else
function(y) .Call(Cpp_mctl, do.call(rbind, lapply(gsplit(y, g), FUN, ...)), TRUE, 0L)
res <- unlist(aplyfun(x, splitfun), recursive = FALSE, use.names = TRUE)
if(return == 1L) {
isDTl <- inherits(x, "data.table")
ax[["names"]] <- names(res)
ax[["row.names"]] <- if(use.g.names && !isDTl && length(gn <- GRPnames(g))) gn else
.set_row_names(.Call(C_fnrow, res))
} else {
isDTl <- FALSE
ax <- list(names = names(res),
row.names = if(use.g.names && length(gn <- GRPnames(g))) gn else .set_row_names(.Call(C_fnrow, res)),
class = "data.frame")
}
return(condalcSA(res, ax, isDTl))
} else { # Return a matrix
attributes(x) <- NULL
splitfun <- if(multi) function(y) do.call(rbind, .mapply(FUN, c(list(gsplit(y, g)), asl), mord)) else
function(y) do.call(rbind, lapply(gsplit(y, g), FUN, ...))
res <- do.call(cbind, aplyfun(x, splitfun))
cn <- dimnames(res)[[2L]]
namr <- rep(ax[["names"]], each = ncol(res)/length(x))
dimnames(res) <- list(if(use.g.names) GRPnames(g) else NULL,
if(length(cn)) paste(namr, cn, sep = ".") else namr)
return(res)
}
}
# No expand wide (classical result)
matl <- return == 3L
isDTl <- !matl && return != 2L && inherits(x, "data.table") # is data table and return data.table
rownam <- ax[["row.names"]]
attributes(x) <- NULL
# Returning plain data frame
if(return == 2L) ax <- list(names = ax[["names"]], row.names = rownam, class = "data.frame")
# Using group names...
if(use.g.names && !isDTl && !is.null(g$groups)) {
res <- vector("list", length(x))
res1 <- if(multi) .mapply(FUN, c(list(gsplit(x[[1L]], g)), asl), mord) else lapply(gsplit(x[[1L]], g), FUN, ...)
if(length(names(res1[[1L]]))) { # We apply a function that assigns names (e.g. quantile())
names(res1) <- GRPnames(g)
res1 <- unlist(res1, recursive = FALSE, use.names = TRUE)
rn <- names(res1)
names(res1) <- NULL
if(reorder && length(res1) == n && !isTRUE(g$ordered[2L])) {
warning("nrow(result) is same as nrow(x) but the grouping is not sorted and the function used added names. Thus BY cannot decisively distinguish whether you are using a transformation function like scale() or a summary function like quantile() that computes a vector of statistics. The latter is assumed and the result is not reordered. To receive reordered output without constructed names set use.g.names = FALSE")
reorder <- FALSE
}
} else { # function doesn't assign names, different options.
res1 <- unlist(res1, FALSE, FALSE)
if(length(res1) == g[[1L]]) rn <- GRPnames(g)
else if(matl) {
rn <- if(length(res1) == n && is.character(rownam) && rownam[1L] != "1" && (reorder || isTRUE(g$ordered[2L]))) rownam else NULL
} else { # Important to check length(rn) below (simply keeps ax[["row.names"]])
rn <- if(length(res1) != n || !(reorder || isTRUE(g$ordered[2L]))) .set_row_names(length(res1)) else NULL
}
}
# Finish computing results...
if(matl) {
res[[1L]] <- res1
if(length(res) > 1L) res[-1L] <- if(multi) aplyfun(x[-1L], splmaplfun, g, FUN, asl, mord) else aplyfun(x[-1L], splaplfun, g, FUN, ...)
res <- do.call(cbind, res)
} else {
res[[1L]] <- copyMostAttributes(res1, x[[1L]])
if(length(res) > 1L) res[-1L] <- if(multi) aplyfun(x[-1L], copysplmaplfun, g, FUN, asl, mord) else aplyfun(x[-1L], copysplaplfun, g, FUN, ...)
}
# Not using group names...
} else {
if(matl) {
res <- if(multi) do.call(cbind, aplyfun(x, splmaplfun, g, FUN, asl, mord)) else do.call(cbind, aplyfun(x, splaplfun, g, FUN, ...))
rn <- if(nrow(res) == n && is.character(rownam) && rownam[1L] != "1" && (reorder || isTRUE(g$ordered[2L]))) rownam else NULL
} else {
res <- if(multi) aplyfun(x, copysplmaplfun, g, FUN, asl, mord) else aplyfun(x, copysplaplfun, g, FUN, ...) # isDTL ? -> Not needed as data.tables cannot have character row-names anyway.
rn <- if(.Call(C_fnrow, res) != n || !(reorder || isTRUE(g$ordered[2L]))) .set_row_names(.Call(C_fnrow, res)) else NULL
}
}
# reorder result if necessary, without dimnames...
if(reorder && fnrow(res) == n && !isTRUE(g$ordered[2L])) {
ind <- .Call(C_greorder, seq_len(n), g)
res <- if(matl) res[ind, , drop = FALSE] else .Call(C_subsetDT, res, ind, seq_along(res), FALSE)
}
if(matl) {
dimnames(res) <- list(rn, ax[["names"]])
return(res)
}
if(length(rn)) ax[["row.names"]] <- rn
return(condalcSA(res, ax, isDTl))
}
BY.list <- function(x, ...) BY.data.frame(x, ...)
BY.matrix <- function(x, g, FUN, ..., use.g.names = TRUE, sort = .op[["sort"]], reorder = TRUE,
expand.wide = FALSE, parallel = FALSE, mc.cores = 1L,
return = c("same", "matrix", "data.frame", "list")) {
if(!is.matrix(x)) stop("x needs to be a matrix")
if(!is.function(FUN)) FUN <- match.fun(FUN)
aplyfun <- if(parallel) function(...) parallel::mclapply(..., mc.cores = mc.cores) else lapply
return <- switch(return[1L], same = 3L, matrix = 2L, data.frame = 1L, list = 0L,
stop("Unknown return option!"))
g <- GRP(g, return.groups = use.g.names, sort = sort, call = FALSE)
n <- nrow(x)
if(!missing(...) && g[[1L]] > 1L && length(ln <- whichv(vlengths(dots <- list(...), FALSE), n))) {
asl <- lapply(dots[ln], gsplit, g)
if(length(dots) > length(ln)) {
mord <- dots[-ln]
if(is.null(names(mord)) && is.null(names(asl))) warning("If some arguments have the same length as the data (vectors) while others have length 1 (scalars), please ensure that at least one of the two have keywords e.g. argname = value. This is because the latter are passed to the 'MoreArgs' argument of .mapply, and thus the order in which arguments are passed to the function might be different from your top-level call. In particular, .mapply will first pass the vector valued arguments followed by the scalar valued ones.")
} else mord <- NULL
multi <- TRUE
} else multi <- FALSE
# Just plain list output
if(return == 0L) {
xln <- .Call(Cpp_mctl, x, TRUE, 0L) # Named list from matrix
if(multi) {
if(expand.wide) return(aplyfun(xln, function(y) do.call(rbind, .mapply(FUN, c(list(gsplit(y, g, use.g.names)), asl), mord))))
return(aplyfun(xln, function(y) .mapply(FUN, c(list(gsplit(y, g, use.g.names)), asl), mord)))
}
if(expand.wide) return(aplyfun(xln, function(y) do.call(rbind, lapply(gsplit(y, g, use.g.names), FUN, ...))))
return(aplyfun(xln, function(y) lapply(gsplit(y, g, use.g.names), FUN, ...)))
}
# Wider output (for multiple summary statistics like quantile())
if(expand.wide) {
if(return == 1L) { # Return data frame
splitfun <- if(multi) function(y) .Call(Cpp_mctl, do.call(rbind, .mapply(FUN, c(list(gsplit(y, g)), asl), mord)), TRUE, 0L) else
function(y) .Call(Cpp_mctl, do.call(rbind, lapply(gsplit(y, g), FUN, ...)), TRUE, 0L)
res <- unlist(aplyfun(.Call(Cpp_mctl, x, TRUE, 0L), splitfun), recursive = FALSE, use.names = TRUE)
ax <- list(names = names(res),
row.names = if(use.g.names && length(gn <- GRPnames(g))) gn else .set_row_names(.Call(C_fnrow, res)),
class = "data.frame")
} else { # Return a matrix
splitfun2 <- if(multi) function(y) do.call(rbind, .mapply(FUN, c(list(gsplit(y, g)), asl), mord)) else
function(y) do.call(rbind, lapply(gsplit(y, g), FUN, ...))
res <- do.call(cbind, aplyfun(.Call(Cpp_mctl, x, FALSE, 0L), splitfun2))
cn <- dimnames(res)[[2L]]
namr <- rep(dimnames(x)[[2L]], each = ncol(res)/ncol(x))
dn <- list(if(use.g.names) GRPnames(g) else NULL,
if(length(cn)) paste(namr, cn, sep = ".") else namr)
if(return == 2L) return(`dimnames<-`(res, dn))
ax <- attributes(x)
ax[["dim"]] <- dim(res)
ax[["dimnames"]] <- dn
}
return(setAttributes(res, ax))
}
dn <- dimnames(x)
matl <- return > 1L
xl <- .Call(Cpp_mctl, x, FALSE, 0L) # Plain list from matrix columns
# No expand wide (classical result)
if(use.g.names && !is.null(g$groups)) {
res <- vector("list", length(xl))
res1 <- if(multi) .mapply(FUN, c(list(gsplit(xl[[1L]], g)), asl), mord) else lapply(gsplit(xl[[1L]], g), FUN, ...)
if(length(names(res1[[1L]]))) { # We apply a function that assigns names (e.g. quantile())
names(res1) <- GRPnames(g)
res1 <- unlist(res1, recursive = FALSE, use.names = TRUE)
rn <- names(res1)
names(res1) <- NULL
if(reorder && length(res1) == n && !isTRUE(g$ordered[2L])) {
warning("nrow(result) is same as nrow(x) but the grouping is not sorted and the function used added names. Thus BY cannot decisively distinguish whether you are using a transformation function like scale() or a summary function like quantile() that computes a vector of statistics. The latter is assumed and the result is not reordered. To receive reordered output without constructed names set use.g.names = FALSE")
reorder <- FALSE
}
} else { # function doesn't assign names, different options.
res1 <- unlist(res1, FALSE, FALSE)
rn <- if(length(res1) == g[[1L]]) GRPnames(g) else
if(length(res1) == n && (reorder || isTRUE(g$ordered[2L]))) dn[[1L]] else NULL
}
# Finish computing results...
res[[1L]] <- res1
if(length(res) > 1L) res[-1L] <- if(multi) aplyfun(xl[-1L], splmaplfun, g, FUN, asl, mord) else aplyfun(xl[-1L], splaplfun, g, FUN, ...)
if(matl) { # Return a matrix
res <- do.call(cbind, res)
dn <- list(rn, dn[[2L]])
}
} else { # Not using group names
res <- if(multi) aplyfun(xl, splmaplfun, g, FUN, asl, mord) else aplyfun(xl, splaplfun, g, FUN, ...)
if(matl) { # Return matrix
res <- do.call(cbind, res)
if(nrow(res) != n || !(reorder || isTRUE(g$ordered[2L]))) dn <- list(NULL, dn[[2L]])
} else { # Return data frame
rn <- if(.Call(C_fnrow, res) == n && (reorder || isTRUE(g$ordered[2L]))) dn[[1L]] else NULL
}
}
# reorder result if necessary, without dimnames...
if(reorder && fnrow(res) == n && !isTRUE(g$ordered[2L])) {
ind <- .Call(C_greorder, seq_len(n), g)
res <- if(matl) res[ind, , drop = FALSE] else .Call(C_subsetDT, res, ind, seq_along(res), FALSE)
}
if(matl) {
if(return == 2L) return(`dimnames<-`(res, dn))
ax <- attributes(x)
ax[["dim"]] <- dim(res)
ax[["dimnames"]] <- dn
} else { # Returning a data.frame
ax <- list(names = dn[[2L]],
row.names = if(length(rn)) rn else .set_row_names(.Call(C_fnrow, res)),
class = "data.frame")
}
return(setAttributes(res, ax))
}
BY.grouped_df <- function(x, FUN, ..., reorder = TRUE, keep.group_vars = TRUE, use.g.names = FALSE) {
g <- GRP.grouped_df(x, call = FALSE)
gn <- which(attr(x, "names") %in% g[[5L]])
res <- BY.data.frame(if(length(gn)) fcolsubset(x, -gn) else x, g, FUN, ..., reorder = reorder, use.g.names = use.g.names)
# Other return options
if(!is.data.frame(res)) return(res)
n <- fnrow(res)
same_size <- n == fnrow(x)
if(!same_size && is.null(g[[4L]])) keep.group_vars <- FALSE
# Not preserving grouping variable or same size and no grouping variables: return appropriate object
if(!keep.group_vars || (same_size && length(gn) == 0L)) return(if(same_size && (reorder || isTRUE(g$ordered[2L]))) res else fungroup(res))
# If same size, with grouping variables...
if(same_size) {
if(!(reorder || isTRUE(g$ordered[2L]))) return(fungroup(res))
ar <- attributes(res)
ar[["names"]] <- c(g[[5L]], ar[["names"]])
return(condalcSA(c(.subset(x, gn), res), ar, any(ar$class == "data.table")))
}
# If other size or no groups
if(n != g[[1L]]) {
if(is.null(g[[4L]])) return(fungroup(res))
len <- n / g[[1L]]
if(len != as.integer(len)) stop("length of output (", n, ") is not a multiple of the number of groups: ", g[[1L]])
g[[4L]] <- .Call(C_subsetDT, g[[4L]], rep(seq_len(g[[1L]]), each = len), seq_along(g[[5L]]), FALSE)
}
# Aggregation
ar <- attributes(fungroup2(res, oldClass(res)))
ar[["names"]] <- c(g[[5L]], ar[["names"]])
condalcSA(c(g[[4L]], res), ar, any(ar$class == "data.table"))
}
BY.zoo <- function(x, ...) if(is.matrix(x)) BY.matrix(x, ...) else BY.default(x, ...)
BY.units <- BY.zoo # return = "same" preserves attributes by default
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.