R/using.R

Defines functions `[.admisc_fobject` `using.data.frame` `using.matrix` `using.default` `using`

# Copyright (c) 2019 - 2026, Adrian Dusa
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, in whole or in part, are permitted provided that the
# following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * The names of its contributors may NOT be used to endorse or promote
#       products derived from this software without specific prior written
#       permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL ADRIAN DUSA BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#' Evaluate an expression in a data environment
#'
#' A function almost identical to the base function \code{with()}, but allowing
#' to evaluate the expression in every subset of a split file.
#'
#' @name using
#' @rdname using
#' @aliases using.data.frame
#' @rawRd
#' \usage{
#' using(data, expr, split.by = NULL, ...)
#' }
#'
#' \arguments{
#'     \item{data}{A data frame.}
#'     \item{expr}{Expression to evaluate}
#'     \item{split.by}{A factor variable from the \code{data}, or a \code{declared}/\code{labelled} variable}
#'     \item{...}{Other internal arguments.}
#' }
#'
#' \value{
#' A list of results, or a matrix if each separate result is a vector.
#' }
#'
#'
#' \author{
#' Adrian Dusa
#' }
#'
#' \examples{
#' set.seed(123)
#' DF <- data.frame(
#'     Area = factor(sample(c("Rural", "Urban"), 123, replace = TRUE)),
#'     Gender = factor(sample(c("Female", "Male"), 123, replace = TRUE)),
#'     Age = sample(18:90, 123, replace = TRUE),
#'     Children = sample(0:5, 123, replace = TRUE)
#' )
#'
#'
#' # table of frequencies for Gender
#' table(DF$Gender)
#'
#' # same with
#' using(DF, table(Gender))
#'
#' # same, but split by Area
#' using(DF, table(Gender), split.by = Area)
#'
#' # calculate the mean age by gender
#' using(DF, mean(Age), split.by = Gender)
#'
#' # same, but select cases from the urban area
#' using(subset(DF, Area == "Urban"), mean(Age), split.by = Gender)
#'
#' # mean age by gender and area
#' using(DF, mean(Age), split.by = Area & Gender)
#'
#' # same with
#' using(DF, mean(Age), split.by = c(Area, Gender))
#'
#' # average number of children by Area
#' using(DF, mean(Children), split.by = Area)
#'
#' # frequency tables by Area
#' using(DF, table(Children), split.by = Area)
#' }
#'
#' \keyword{functions}
NULL
#' @export
`using` <- function(data, expr, split.by = NULL, ...) {
    UseMethod("using")
}
#' @export
`using.default` <- function(data, expr, ...) {
    if (missing(expr)) {
        args <- unlist(lapply(match.call(), deparse)[-1])
        args <- args[setdiff(names(args), c("data", "expr"))]
        if (length(args) > 1) {
            stopError("Missing or ambiguous expression")
        }
        expr <- str2lang(paste(names(args), args[[1]], sep = "<-"))
    }
    visible <- TRUE
    result <- NULL
    test <- tryCatchWEM({
        tmp <- withVisible(
            eval(substitute(expr), envir = data, enclos = parent.frame())
        )
        visible <- tmp$visible
        result <- tmp$value
    })
    if (is.null(test$error)) {
        if (visible) {
            return(result)
        }
        return(invisible(result))
    }
    stopError(test$error)
}
#' @export
`using.matrix` <- function(data, expr, split.by = NULL, ...) {
    if (missing(expr)) {
        args <- unlist(lapply(match.call(), deparse)[-1])
        args <- args[setdiff(names(args), c("data", "expr"))]
        if (length(args) > 1) {
            stopError("Missing or ambiguous expression")
        }
        expr <- str2lang(paste(names(args), args[[1]], sep = "<-"))
    }
    expr <- substitute(expr)
    return( 
        using(as.data.frame(data), expr, split.by = split.by, ... = ...)
    )
}
#' @export
`using.data.frame` <- function(data, expr = expr, split.by = NULL, ...) {
    if (nrow(data) == 0) {
        stopError("There are no rows in the data.")
    }
    test <- substitute(split.by) 
    split.by <- NULL
    if (!identical(as.character(test), "split.by")) {
        split.by <- test
    }
    sby <- all.vars(split.by)
    nsby <- all.names(split.by)
    if (missing(expr)) {
        args <- unlist(lapply(match.call(), deparse)[-1])
        args <- args[setdiff(names(args), c("data", "expr", "split.by"))]
        if (length(args) > 1) {
            stopError("Missing or ambiguous expression")
        }
        expr <- str2lang(paste(names(args), args[[1]], sep = "<-"))
    }
    test <- substitute(expr) 
    if (!identical(as.character(test), "expr")) {
        expr <- test
    }
    vexpr <- all.vars(expr)
    if (any(vexpr == ".")) {
        vexpr <- colnames(data)
    } else {
        vexpr <- vexpr[is.element(vexpr, colnames(data))]
    }
    if (length(sby) == 0) {
        visible <- TRUE
        result <- NULL
        test <- tryCatchWEM({
            tmp <- withVisible(
                eval(expr, envir = data, enclos = parent.frame())
            )
            visible <- tmp$visible
            result <- tmp$value
        })
        if (is.null(test$error)) {
            if (visible) {
                return(result)
            }
            return(invisible(result))
        }
        stopError(gsub("object", "column", test$error))
    }
    nms <- names(data)
    existing <- sapply(sby, function(x) {
        is.element(x, nms) || exists(x, envir = parent.frame(), inherits = TRUE)
    })
    if (any(!existing)) {
        stopError("Split by variables do not exist in the data.")
    }
    sbylist <- lapply(
        lapply(sby, function(x) {
            eval(parse(text = x), envir = data, enclos = parent.frame())
        }),
        function(x) {
            if (inherits(x, "declared") || inherits(x, "haven_labelled")) {
                labels <- attr(x, "labels", exact = TRUE)
                na_values <- attr(x, "na_values")
                na_range <- attr(x, "na_range")
                if (!is.null(na_range)) {
                    if (length(na_range) > 2) {
                        stopError("Split by variable has a missing range with more than two values.")
                    }
                    na_values <- sort(union(
                        na_values,
                        seq(na_range[1], na_range[2])
                    ))
                }
                if (inherits(x, "haven_labelled")) {
                    x[is.element(x), na_values] <- NA
                }
                uniques <- sort(
                    setdiff(
                        c(undeclareit(x, drop = TRUE), labels),
                        na_values
                    )
                )
                names(uniques) <- uniques
                labels <- labels[is.element(labels, uniques)]
                names(uniques)[match(labels, uniques)] <- names(labels)
                attributes(x) <- NULL
                return(factor(x, levels = uniques, labels = names(uniques)))
            }
            return(as.factor(x))
        }
    )
    names(sbylist) <- sby
    test <- table(sapply(sbylist, length))
    if (length(test) > 1 || nrow(data) != as.numeric(names(test))) {
        stopError("Split variables do not match the number of rows in the data.")
    }
    sl <- lapply(sbylist, function(x) levels(x))
    names(sl) <- sby
    noflevels <- unlist(lapply(sl, length))
    mbase <- c(rev(cumprod(rev(noflevels))), 1)[-1]
    orep  <- cumprod(
        rev(
            c(rev(noflevels)[-1], 1)
        )
    )
    retmat <- sapply(seq_len(length(sl)), function(x) {
        rep.int(
            rep.int(
                seq_len(noflevels[x]),
                rep.int(mbase[x], noflevels[x])
            ),
            orep[x]
        )
    })
    slexp <- retmat
    for (i in seq(length(sl))) {
        slexp[, i] <- sl[[i]][retmat[, i]]
    }
    data <- data[, vexpr, drop = FALSE]
    res <- vector(mode = "list", length = nrow(slexp))
    visible <- TRUE
    for (r in seq(nrow(slexp))) {
        selection <- rep(TRUE, nrow(data))
        for (c in seq(ncol(slexp))) {
            val <- slexp[r, c]
            x <- sbylist[[c]] 
            attrx <- attributes(x)
            if (inherits(x, "declared") | inherits(x, "haven_labelled_spss")) {
                attributes(x) <- NULL
                na_index <- attrx[["na_index"]]
                if (!is.null(na_index)) {
                    nms <- names(na_index)
                    x[na_index] <- nms
                }
                labels <- attrx[["labels"]]
                if (!is.null(labels)) {
                    havelabels <- is.element(x, labels)
                    x[havelabels] <- names(labels)[match(x[havelabels], labels)]
                }
            }
            selection <- selection & (x == val)
        }
        if (sum(selection, na.rm = TRUE) > 0) {
            tmp <- withVisible(
                eval(
                    expr = expr,
                    envir = subset(data, selection),
                    enclos = parent.frame()
                )
            )
            visible <- tmp$visible
            res[[r]] <- tmp$value
        }
    }
    empty <- sapply(res, is.null)
    res <- res[!empty]
    any_wtable <- any(
        sapply(res, function(x) class(x)[1] == "wtable" | class(x)[1] == "w_table")
    )
    slexp <- slexp[!empty, ]
    if (all(sapply(res, is.atomic)) & !any_wtable) {
        classes <- unique(unlist(lapply(res, class)))
        classes <- setdiff(classes, c("integer", "double", "character", "numeric", "complex"))
        lengths <- sapply(res, length)
        result <- matrix(NA, nrow = length(res), ncol = max(lengths))
        for (i in seq(length(res))) {
            if (!is.null(res[[i]])) {
                result[i, seq(length(res[[i]]))] <- res[[i]]
            }
        }
        result[] <- coerceMode(round(result, 3))
        if (is.matrix(slexp)) {
            rownames(result) <- apply(slexp, 1, function(x) paste(x, collapse = ","))
        } else {
            rownames(result) <- slexp
        }
        expr <- as.list(expr)
        if (max(lengths) == 1) {
            colnames(result) <- as.character(expr[[1]])
        }
        else {
            if (as.character(expr[1]) == "c") {
                expr <- expr[-1]
            }
            cexpr <- sapply(expr, as.character)
            if (is.matrix(cexpr) && nrow(cexpr) == 2) {
                if (length(unique(cexpr[1, ])) == 1) {
                    cexpr <- cexpr[2, ]
                } else if (length(unique(cexpr[2, ])) == 1) {
                    cexpr <- cexpr[1, ]
                }
            }
            nms <- names(res[[which.max(lengths)]])
            if (is.null(nms)) {
                if (max(lengths) == length(expr) && !is.element("table", expr)) {
                    if (max(lengths) == length(cexpr)) {
                        nms <- cexpr
                    } else {
                        nms <- sapply(expr, deparse)
                    }
                } else {
                    nms <- rep(" ", max(lengths))
                }
            }
            if (
                any(nms == "") &&
                is.element("summary", cexpr) &&
                sum(nms == "") == length(expr) - 1
            ) {
                nms[nms == ""] <- setdiff(cexpr, "summary")
            }
            colnames(result) <- nms
        }
        res <- result
        class(res) <- c("admisc_fobject", "matrix")
    }
    else {
        attr(res, "split") <- slexp
        class(res) <- c("admisc_fobject", class(res))
    }
    if (visible) {
        return(res)
    }
    return(invisible(res))
}
#' @export
`[.admisc_fobject` <- function(x, i, j, drop = FALSE, ...) {
    class(x) <- setdiff(class(x), "admisc_fobject")
    if (is.matrix(x)) {
        dims <- dimnames(x)
        if (!is.null(dims)) {
            if (!is.null(dims[[1]]) && !missing(i) && !is.null(i)) {
                dims[[1]] <- dims[[1]][i]
            }
            if (!is.null(dims[[2]]) && !missing(j) && !is.null(j)) {
                dims[[2]] <- dims[[2]][j]
            }
        }
        x <- NextMethod()
        x <- as.matrix(x)
        dimnames(x) <- dims
    } else {
        x <- NextMethod()
    }
    class(x) <- c("admisc_fobject", class(x))
    return(x)
}

Try the admisc package in your browser

Any scripts or data that you put into this service are public.

admisc documentation built on March 27, 2026, 9:06 a.m.