test-code/turf-working/old-attempts/row-col-dropping.R

library(tidyverse)
library(tidyselect)
library(onezero)
library(Rfast)

pets %>%
    shapley_approx(cols = dog:bird, tidy = F) %>% sum()



drop_low_rows <- function(data, cols, n, prop) {


    # Using n or prop? --------------------------------------------------------

    if (!missing(n) & !missing(prop)) {
        stop("Must supply exactly one of `n` and `prop` arguments.")
    }

    if (!missing(n)) {
        counts <- TRUE
    } else {
        counts <- FALSE
    }


    # Get columns -------------------------------------------------------------

    .cols <- enquo(cols)
    col.index <- eval_select(
        expr = .cols,
        data = data
    )

    col.names <- names(col.index)

    NC <- length(col.names)


    # Error check -------------------------------------------------------------

    if (counts) {

        if (!between(n, 0, NC)) {
            stop("Input to `n` must be a value between 0 and number of columns specified in `cols` argument.")
        }

    } else {

        if (!between(prop, 0, 1)) {
            stop("Input to `p` must be a value between 0 and 1.")
        }
    }


    # Calc --------------------------------------------------------------------

    if (counts) {

        keep <- rowSums(data[, col.names], na.rm = TRUE) >= n

    } else {

        keep <- rowMeans(data[, col.names], na.rm = TRUE) >= prop

    }

    data[keep, ]


}

pets %>%
    drop_low_rows(
        cols = where(is_onezero),
        prop = 0.4
    )


drop_low_cols <- function(data, cols, n, prop) {

    # Using n or prop? --------------------------------------------------------

    if (!missing(n) & !missing(prop)) {
        stop("Must supply exactly one of `n` and `prop` arguments.")
    }

    if (!missing(n)) {
        counts <- TRUE
    } else {
        counts <- FALSE
    }


    # Get columns -------------------------------------------------------------

    .cols <- enquo(cols)
    col.index <- eval_select(
        expr = .cols,
        data = data
    )

    col.names <- names(col.index)

    NR <- nrow(data)


    # Error check -------------------------------------------------------------

    if (counts) {

        if (!between(n, 0, NR)) {
            stop("Input to `n` must be a value between 0 and number of rows in `data`.")
        }

    } else {

        if (!between(prop, 0, 1)) {
            stop("Input to `p` must be a value between 0 and 1.")
        }
    }


    # Calc --------------------------------------------------------------------

    if (counts) {

        drop <- colSums(data[, col.names], na.rm = TRUE) < n

    } else {

        drop <- colMeans(data[, col.names], na.rm = TRUE) < prop

    }

    drop.cols <- names(drop[drop])
    select(data, -all_of(drop.cols))

}

drop_low_cols(
    data = pets,
    cols = where(is_onezero),
    prop = 0.2
)


drop_na_rows

drop_na_cols
ttrodrigz/onezero documentation built on May 9, 2023, 2:59 p.m.