library(tidyverse)
library(tidyselect)
library(onezero)
library(Rfast)
pets %>%
shapley_approx(cols = dog:bird, tidy = F) %>% sum()
drop_low_rows <- function(data, cols, n, prop) {
# Using n or prop? --------------------------------------------------------
if (!missing(n) & !missing(prop)) {
stop("Must supply exactly one of `n` and `prop` arguments.")
}
if (!missing(n)) {
counts <- TRUE
} else {
counts <- FALSE
}
# Get columns -------------------------------------------------------------
.cols <- enquo(cols)
col.index <- eval_select(
expr = .cols,
data = data
)
col.names <- names(col.index)
NC <- length(col.names)
# Error check -------------------------------------------------------------
if (counts) {
if (!between(n, 0, NC)) {
stop("Input to `n` must be a value between 0 and number of columns specified in `cols` argument.")
}
} else {
if (!between(prop, 0, 1)) {
stop("Input to `p` must be a value between 0 and 1.")
}
}
# Calc --------------------------------------------------------------------
if (counts) {
keep <- rowSums(data[, col.names], na.rm = TRUE) >= n
} else {
keep <- rowMeans(data[, col.names], na.rm = TRUE) >= prop
}
data[keep, ]
}
pets %>%
drop_low_rows(
cols = where(is_onezero),
prop = 0.4
)
drop_low_cols <- function(data, cols, n, prop) {
# Using n or prop? --------------------------------------------------------
if (!missing(n) & !missing(prop)) {
stop("Must supply exactly one of `n` and `prop` arguments.")
}
if (!missing(n)) {
counts <- TRUE
} else {
counts <- FALSE
}
# Get columns -------------------------------------------------------------
.cols <- enquo(cols)
col.index <- eval_select(
expr = .cols,
data = data
)
col.names <- names(col.index)
NR <- nrow(data)
# Error check -------------------------------------------------------------
if (counts) {
if (!between(n, 0, NR)) {
stop("Input to `n` must be a value between 0 and number of rows in `data`.")
}
} else {
if (!between(prop, 0, 1)) {
stop("Input to `p` must be a value between 0 and 1.")
}
}
# Calc --------------------------------------------------------------------
if (counts) {
drop <- colSums(data[, col.names], na.rm = TRUE) < n
} else {
drop <- colMeans(data[, col.names], na.rm = TRUE) < prop
}
drop.cols <- names(drop[drop])
select(data, -all_of(drop.cols))
}
drop_low_cols(
data = pets,
cols = where(is_onezero),
prop = 0.2
)
drop_na_rows
drop_na_cols
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.