Nothing
## GRP() with separate methods for data frames
GRP2 <- function(X, by = NULL, sort = TRUE,
return.order = sort,
return.groups = TRUE,
return.locs = FALSE,
...){
if (is_GRP(X)){
return(X)
} else if (is_df(X)){
df_to_GRP(X, .cols = by %||% names(X), order = sort,
return.order = return.order,
return.groups = return.groups,
return.locs = return.locs)
} else {
GRP3(
X, by = by, sort = sort,
return.order = return.order,
return.groups = return.groups,
return.locs = return.locs,
...
)
}
}
## GRP() but always returns group starts
GRP3 <- function(X, by = NULL, sort = TRUE,
return.order = sort,
return.groups = TRUE,
return.locs = FALSE,
call = FALSE, ...){
if (is_GRP(X)){
return(X)
}
if (!sort && !return.groups && !is.factor(X)) {
if (!is.null(by)) {
X <- cheapr::sset_df(X, j = by)
}
out <- cheapr::new_list(10L)
# groups <- group2(X)
groups <- group3(X, starts = TRUE, group.sizes = TRUE)
out[[1L]] <- attr(groups, "N.groups")
out[[3L]] <- attr(groups, "group.sizes")
out[[8L]] <- attr(groups, "starts")
if (is.factor(X)) {
out[[6L]] <- add_names(c(NA, TRUE), c("ordered","sorted"))
}
else {
out[[6L]] <- add_names(c(FALSE, NA), c("ordered", "sorted"))
}
strip_attrs(groups, set = TRUE)
out[[2L]] <- groups
names(out) <- c("N.groups", "group.id",
"group.sizes", "groups", "group.vars", "ordered",
"order", "group.starts", "call", "locs")
class(out) <- "GRP"
}
else {
out <- collapse::GRP(
X, by = by, sort = sort, return.order = return.order,
return.groups = return.groups, call = call, ...
)
}
if (!is.null(out)){
out[[8L]] <- GRP_starts(out)
out <- c(unclass(out), list(locs = NULL))
if (return.locs){
out[[10L]] <- GRP_loc(out)
}
class(out) <- "GRP"
}
out
}
# Two alternatives to collapse::group
# They both handle nested data frames
# group2() is simpler (and better) but has 'large' overhead
group2 <- function(X){
group_id(X, order = FALSE, as_qg = TRUE)
}
# Lower overhead than group2() and is essentially an extension
# to collapse::group() for list objects, etc
group3 <- function(X, starts = FALSE, group.sizes = FALSE){
if (is.null(X)) {
return(NULL)
}
if (inherits(X, "data.frame")) {
if (df_ncol(X) == 0){
N <- df_nrow(X)
out <- rep_len(1L, N)
attr(out, "N.groups") <- min(1L, N)
if (starts) {
attr(out, "starts") <- if (N == 0)
integer()
else 1L
}
if (group.sizes) {
attr(out, "group.sizes") <- if (N == 0)
integer()
else N
}
attr(out, "class") <- c("qG", "na.included")
return(out)
} else {
X <- df_mutate_exotic_to_ids(X, order = FALSE)
}
} else if (cpp_is_exotic(X)){
# Essentially group2() as above
out <- group_id(X, order = FALSE, as_qg = TRUE)
if (!starts){
attr(out, "starts") <- NULL
}
if (!group.sizes){
attr(out, "group.sizes") <- NULL
}
return(out)
}
collapse::group(X, starts = starts, group.sizes = group.sizes)
}
# Is object a collapse GRP?
is_GRP <- function(GRP){
inherits(GRP, "GRP")
}
check_GRP <- function(GRP){
if (!is_GRP(GRP)){
cli::cli_abort("{.arg GRP} must be a {.cls GRP}")
}
}
# Number of groups
GRP_n_groups <- function(GRP){
GRP[["N.groups"]]
}
# Group IDs (integer)
GRP_group_id <- function(GRP){
GRP[["group.id"]]
}
GRP_data_size <- function(GRP){
length(GRP_group_id(GRP))
}
# Group sizes
GRP_group_sizes <- function(GRP){
GRP[["group.sizes"]]
}
GRP_expanded_group_sizes <- function(GRP){
GRP_group_sizes(GRP)[GRP_group_id(GRP)]
}
# Groups
GRP_groups <- function(GRP){
GRP[["groups"]]
}
# Group variable names
GRP_group_vars <- function(GRP){
GRP[["group.vars"]]
}
check_GRP_has_groups <- function(GRP){
if (is_GRP(GRP) && is.null(GRP_groups(GRP))){
stop("GRP has no group data. Please supply a GRP object with group data")
}
}
check_data_GRP_size <- function(x, GRP){
if (!is.null(GRP)){
size <- NROW(x)
GRP_size <- GRP_data_size(GRP)
if (size != GRP_size){
stop("size of x must match size of groups")
}
}
}
# Alternate mixed method
# GRP_duplicated <- function(GRP, all = FALSE){
# sizes <- GRP_group_sizes(GRP)
# group_id <- GRP_group_id(GRP)
# if (is.null(GRP[["order"]])){
# out <- (sizes > 1L)[group_id]
# if (!all){
# out[GRP_starts(GRP)] <- FALSE
# }
# } else {
# if (all){
# out <- (sizes > 1L)[group_id]
# } else {
# out <- row_id(GRP) > 1L
# }
# }
# out
# }
GRP_duplicated <- function(GRP, all = FALSE){
sizes <- GRP_group_sizes(GRP)
group_id <- GRP_group_id(GRP)
out <- (sizes > 1L)[group_id]
if (!all){
out[GRP_starts(GRP)] <- FALSE
}
out
}
# Alternate version
# GRP_which_duplicated <- function(GRP, all = FALSE){
# sizes <- GRP_group_sizes(GRP)
# group_id <- GRP_group_id(GRP)
# if (all){
# which((sizes > 1L)[group_id])
# } else {
# which_val(row_id(GRP), 1L, invert = TRUE)
# }
# }
GRP_which_duplicated <- function(GRP, all = FALSE){
which(GRP_duplicated(GRP, all))
}
sorted_group_starts <- function(group_sizes, init_loc = 1L){
cpp_sorted_group_starts(as.integer(group_sizes), init_loc)
}
sorted_group_ends <- function(group_sizes){
collapse::fcumsum(group_sizes)
}
GRP_starts <- function(GRP, use.g.names = FALSE){
out <- GRP[["group.starts"]]
if (is.null(out)){
GRP_sizes <- GRP_group_sizes(GRP)
if (GRP_is_sorted(GRP)){
sorted_starts <- attr(GRP[["order"]], "starts")
if (!is.null(sorted_starts)){
out <- sorted_starts
} else {
out <- sorted_group_starts(GRP_sizes)
}
# For factors with 0 size, replace calculated group starts with 0
out[cheapr::which_val(GRP_sizes, 0L)] <- 0L
} else {
o <- GRP_order(GRP)
starts <- attr(o, "starts")
if (collapse::anyv(GRP_sizes, 0L)){
out <- integer(GRP_n_groups(GRP))
out[cheapr::which_val(GRP_sizes, 0L, invert = TRUE)] <- o[starts]
} else {
out <- o[starts]
}
}
}
if (is.null(out)){
out <- integer()
}
if (use.g.names){
names(out) <- GRP_names(GRP)
}
out
}
# Extract group ends from GRP object safely and efficiently
GRP_ends <- function(GRP, use.g.names = FALSE,
loc = NULL){
GRP_sizes <- GRP_group_sizes(GRP)
if (GRP_is_sorted(GRP)){
out <- sorted_group_ends(GRP_sizes)
# For factors with 0 size, replace 0 with NA
out[cheapr::which_val(GRP_sizes, 0L)] <- 0L
} else {
if (is.null(loc)){
loc <- GRP_loc(GRP, use.g.names = FALSE)
}
out <- GRP_loc_ends(loc, GRP_sizes)
}
if (is.null(out)){
out <- integer()
}
if (use.g.names){
names(out) <- GRP_names(GRP)
}
out
}
# Extract group order from GRP object safely
GRP_order <- function(GRP){
### Only use the below arguments
### If GRP_order is called from radixorderv2
### Otherwise leave as is
# starts = TRUE, group.sizes = FALSE, sort = TRUE){
if (is.null(GRP)){
return(NULL)
}
out <- GRP[["order"]]
if (is.null(out)){
group_id <- GRP_group_id(GRP)
if (GRP_is_sorted(GRP) || cpp_group_id_sorted(group_id)){
out <- seq_along(group_id)
sizes <- GRP_group_sizes(GRP)
starts <- GRP_starts(GRP)
attributes(out) <- list(starts = starts,
maxgrpn = collapse::fmax(sizes),
sorted = TRUE)
# }
} else {
out <- collapse::radixorderv(group_id,
starts = TRUE,
group.sizes = FALSE,
sort = TRUE)
}
}
out
}
# Alternative to gsplit(NULL, g)
GRP_loc <- function(GRP, use.g.names = FALSE){
locs <- GRP[["locs"]]
if (!is.null(locs)){
return(locs)
}
group_id <- GRP_group_id(GRP)
group_sizes <- GRP_group_sizes(GRP)
group_order <- GRP[["order"]]
if (!is.null(group_order)){
out <- cpp_group_locs(group_order, group_sizes)
if (use.g.names){
names(out) <- GRP_names(GRP)
}
} else if (length(group_id) == 0L){
if (use.g.names){
out <- add_names(list(), character(0))
} else {
out <- list()
}
} else {
out <- cpp_group_locs2(group_id, group_sizes)
# out <- collapse::gsplit(NULL, g = GRP, use.g.names = use.g.names)
}
out
}
group_locs <- function(x){
if (is_GRP(x)){
GRP_loc(x)
} else {
# g <- group3(x, group.sizes = TRUE)
# cpp_group_locs2(g, attr(g, "group.sizes", TRUE))
o <- radixorderv2(x, group.sizes = TRUE, starts = FALSE, sort = TRUE)
cpp_group_locs(o, attr(o, "group.sizes"))
}
}
# GRP starts & ends from list of group locations
# Groups are assumed to be sorted and
# index locations are also assumed to be sorted
GRP_loc_starts <- function(loc){
cpp_list_subset(loc, integer(), 1L, 0L)
}
GRP_loc_ends <- function(loc, sizes = NULL){
if (is.null(sizes)){
sizes <- cheapr::list_lengths(loc)
}
list_subset(loc, sizes, default = 0L)
}
GRP_ordered <- function(GRP){
GRP[["ordered"]]
}
GRP_is_ordered <- function(GRP){
ordered <- GRP_ordered(GRP)
sorted <- ordered[names(ordered) == "sorted"]
ordered <- ordered[names(ordered) == "ordered"]
isTRUE(ordered || (is.na(ordered) && !is.na(sorted)))
}
# Logical is GRP sorted
GRP_is_sorted <- function(GRP){
ordered <- GRP_ordered(GRP)
isTRUE(ordered[names(ordered) == "sorted"])
}
GRP_group_data <- function(GRP, expand = FALSE){
out <- list_as_tbl(as.list(GRP_groups(GRP)))
if (expand){
out <- cheapr::sset_df(out, GRP_group_id(GRP))
}
out
}
GRP_names <- function(GRP, sep = "_", expand = FALSE, force.char = FALSE){
g_names <- collapse::GRPnames(GRP, force.char = force.char, sep = sep)
if (expand && !is.null(g_names)){
g_names[GRP_group_id(GRP)]
} else {
g_names
}
}
# Convert data frame to GRP safely
# Either treats data as 1 big group or
# Uses dplyr group vars
grouped_df_as_GRP <- function(data, return.order = TRUE, ...){
cpp_grouped_df_as_grp(data)
}
# Custom GRP method for data frames
# Group starts is always returned
df_to_GRP <- function(data, .cols = character(),
order = group_by_order_default(data),
# drop = df_group_by_drop_default(data),
return.order = TRUE,
return.groups = TRUE,
return.locs = FALSE){
dplyr_groups <- group_vars(data)
cols <- drop_names(col_select_names(data, .cols = .cols))
extra_groups <- vec_setdiff(cols, dplyr_groups)
data2 <- cheapr::sset_df(data, j = cols)
if (length(names(data2)) == 0L){
out <- grouped_df_as_GRP(cpp_ungroup(data2),
return.groups = return.groups,
return.order = return.order,
return.locs = return.locs)
} else if (length(extra_groups) == 0L &&
order == group_by_order_default(data)){
# drop == df_group_by_drop_default(data)){
out <- grouped_df_as_GRP(data2, return.order = return.order,
return.groups = return.groups,
return.locs = return.locs)
} else {
data2 <- cpp_ungroup(data2)
out <- GRP3(
df_mutate_exotic_to_ids(data2, order = order), sort = order,
return.order = return.order,
return.groups = FALSE,
call = FALSE
)
# Basically if any addresses don't match,
# then df_mutate_exotic_to_ids() has converted some
# cols to group id cols.
# If this is the case we need to sset the distinct groups
# using the original data.
# Always add group starts as it's usually not too expensive
starts <- GRP_starts(out)
out[["group.starts"]] <- starts
if (return.groups){
if (order){
slice <- !(length(starts) == df_nrow(data2) &&
isTRUE(attr(out[["order"]], "sorted")))
} else {
slice <- !(length(starts) == df_nrow(data2))
}
if (slice){
groups <- cheapr::sset(data2, starts)
} else {
groups <- data2
}
out[["group.vars"]] <- cols
# if (!drop){
# groups <- expand_unused_levels(groups)
# Extra logic to adjust group sizes and group starts
# }
out[["groups"]] <- groups
}
}
if (return.locs){
out <- cheapr::list_assign(out, list("locs" = GRP_loc(out)))
}
class(out) <- "GRP"
out
}
#' @exportS3Method collapse::GRP
GRP.Interval <- function(X, return.groups = TRUE, ...){
x <- X
X <- interval_separate(x)
out <- collapse::GRP(X, return.groups = FALSE, ...)
if (return.groups){
out[["groups"]] <- list(x = x[GRP_starts(out)])
}
out
}
#' @exportS3Method collapse::GRP
GRP.list <- function(X, return.groups = TRUE, ...){
out <- collapse::GRP(group_id(X, as_qg = TRUE), return.groups = FALSE, ...)
if (return.groups){
out[["groups"]] <- list(x = cheapr::sset(X, GRP_starts(out)))
}
out
}
#' @exportS3Method collapse::GRP
GRP.vctrs_rcrd <- function(X, return.groups = TRUE, ...){
out <- GRP2(cheapr::list_as_df(X), return.groups = FALSE, ...)
if (return.groups){
out[["groups"]] <- list(x = cheapr::sset(X, GRP_starts(out)))
}
out[[10L]] <- NULL
out
}
#' @exportS3Method collapse::GRP
GRP.NULL <- function(X, ...){
NULL
}
#' @exportS3Method collapse::GRP
GRP.integer64 <- function(X, return.groups = TRUE, ...){
out <- collapse::GRP(cpp_int64_to_numeric(X), return.groups = FALSE, ...)
if (return.groups){
out[["groups"]] <- list(x = cheapr::sset(X, GRP_starts(out)))
}
out
}
gsplit2 <- function(x = NULL, g = NULL, use.g.names = FALSE, ...){
if (is.null(g)){
if (is.null(x)){
list(integer())
} else {
list(x)
}
} else {
collapse::gsplit(x, g = g, use.g.names = use.g.names, ...)
}
}
radixorderv2 <- function(x, na.last = TRUE, decreasing = FALSE,
starts = FALSE, sort = TRUE, group.sizes = FALSE){
if (is.null(x)){
return(NULL)
}
if (is_GRP(x)){
return(GRP_order(x))
}
if (is_df(x)){
if (df_ncol(x) == 0){
N <- df_nrow(x)
out <- seq_len(N)
if (starts){
attr(out, "starts") <- if (N == 0) integer() else 1L
}
if (group.sizes){
attr(out, "group.sizes") <- if (N == 0) integer() else N
}
attr(out, "maxgrpn") <- N
attr(out, "sorted") <- TRUE
return(out)
} else {
x <- df_mutate_exotic_to_ids(cpp_ungroup(x), order = TRUE)
}
} else if (cpp_is_exotic(x)){
x <- group_id(x, order = TRUE)
}
collapse::radixorderv(x, starts = starts, sort = sort, group.sizes = group.sizes,
na.last = na.last, decreasing = decreasing)
}
# Helper to grab group sizes
group_sizes <- function(x, sort = FALSE, expand = FALSE){
if (sort && !expand){
groups <- radixorderv2(x, group.sizes = TRUE, sort = TRUE)
} else {
groups <- group2(x)
}
out <- attr(groups, "group.sizes")
if (expand){
out <- out[groups]
}
out
}
## Construct a grouped data frame from a GRP object
construct_grouped_df <- function(data, g, group_vars){
if (is.null(g) || length(group_vars) == 0){
return(f_ungroup(data))
}
groups <- GRP_groups(g)
if (is.null(groups)){
groups <- cheapr::sset_df(cpp_ungroup(data), GRP_starts(g), j = group_vars)
}
group_locs <- GRP_loc(g)
groups[[".rows"]] <- vctrs_new_list_of(group_locs, integer())
attr(groups, ".drop") <- df_group_by_drop_default(data)
attr(groups, "ordered") <- group_by_order_default(data)
out <- data
attr(out, "groups") <- groups
class(out) <- c("grouped_df", "tbl_df", "tbl", "data.frame")
out
}
GRP_names <- function(GRP, sep = "_", expand = FALSE, force.char = FALSE){
g_names <- collapse::GRPnames(GRP, force.char = force.char,
sep = sep)
if (expand && !is.null(g_names)) {
g_names[GRP_group_id(GRP)]
}
else {
g_names
}
}
grouped_first <- function(x, na.rm = TRUE, g = NULL, TRA = NULL, use.g.names = FALSE){
if (is.null(g)){
first <- cheapr::sset(x, min(1L, vector_length(x)))
if (na.rm && vector_length(first) != 0 && cheapr::is_na(first)){
not_na_locs <- cheapr::na_find(x, invert = TRUE)
first_not_na <- not_na_locs[min(1L, length(not_na_locs))]
first <- cheapr::sset(x, first_not_na)
}
return(first)
}
if (cpp_is_exotic(x)){
.g <- GRP2(x, sort = FALSE, return.order = FALSE, return.groups = FALSE, return.locs = FALSE)
group_id <- GRP_group_id(.g)
starts <- GRP_starts(.g)
if (na.rm){
group_id[cheapr::na_find(x)] <- NA_integer_
}
out <- collapse::ffirst(
group_id, na.rm = na.rm, g = g, TRA = TRA, use.g.names = use.g.names
)
cheapr::sset(cheapr::sset(x, starts), out)
} else {
collapse::ffirst(x, na.rm = na.rm, g = g, TRA = TRA, use.g.names = use.g.names)
}
}
grouped_last <- function(x, na.rm = TRUE, g = NULL, TRA = NULL, use.g.names = FALSE){
if (is.null(g)){
last <- cheapr::sset(x, vector_length(x))
if (na.rm && vector_length(last) != 0 && cheapr::is_na(last)){
not_na_locs <- cheapr::na_find(x, invert = TRUE)
last_not_na <- not_na_locs[length(not_na_locs)]
last <- cheapr::sset(x, last_not_na)
}
return(last)
}
if (cpp_is_exotic(x)){
.g <- GRP2(x, sort = FALSE, return.order = FALSE, return.groups = FALSE, return.locs = FALSE)
group_id <- GRP_group_id(.g)
starts <- GRP_starts(.g)
if (na.rm){
group_id[cheapr::na_find(x)] <- NA_integer_
}
out <- collapse::flast(
group_id, na.rm = na.rm, g = g, TRA = TRA, use.g.names = use.g.names
)
cheapr::sset(cheapr::sset(x, starts), out)
} else {
collapse::flast(x, na.rm = na.rm, g = g, TRA = TRA, use.g.names = use.g.names)
}
}
grouped_lag <- function(x, n = 1L, fill = NULL, g = NULL, order_by = NULL){
if (!is.null(g) && !is.null(order_by)){
cli::cli_abort(
"Please supply either groups through {.arg g} or
an ordering vector through {.arg order_by}, not both"
)
}
g <- GRP2(g)
o <- GRP_order(g)
o <- o %||% radixorderv2(order_by)
rl <- GRP_group_sizes(g)
is_recursive <- inherits(x, c("data.frame", "vctrs_rcrd", "POSIXlt"))
exotic <- cpp_is_exotic(x) && !is_recursive && !rlang::is_bare_list(x)
y <- x
.fill <- fill
if (exotic){
xg <- GRP2(y, sort = FALSE, return.groups = FALSE, return.order = FALSE)
y <- GRP_group_id(xg)
if (!is.null(fill)){
fill <- GRP_n_groups(xg) + 1L
}
}
if (is.null(o) && is.null(rl) && length(n) == 1L && is.null(order_by)) {
out <- cheapr::lag_(y, n, fill = fill, recursive = is_recursive)
}
else {
out <- cheapr::lag2_(
y, n, order = o,
run_lengths = rl,
fill = fill, recursive = is_recursive
)
}
if (exotic){
uniq <- cheapr::sset(x, GRP_starts(xg))
if (!is.null(fill)){
uniq <- cheapr::cheapr_c(uniq, .fill)
}
out <- cheapr::sset(uniq, out)
}
out
}
grouped_lead <- function(x, n = 1L, fill = NULL, g = NULL, order_by = NULL){
grouped_lag(x, n = -n, fill = fill, g = g, order_by = order_by)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.