R/categories.R

Defines functions is.3vl ensureNoDataCategory addNoDataCategory handleMissingCategoryLookup i2n n2i setDates setValues .na.omit.categories concatenateCategories is.categories

setValidity("Categories", function(object) {
    are.cats <- vapply(object, is.category, logical(1))
    if (!all(are.cats)) {
        badcount <- sum(!are.cats)
        return(paste0(
            "Invalid categories: ", badcount,
            ifelse(badcount > 1,
                " elements are not Crunch category objects.",
                " element is not a Crunch category object."
            )
        ))
    }
    if (any(duplicated(names(object)))) {
        return("Invalid category names: must be unique")
    }
    if (any(duplicated(ids(object)))) {
        return("Invalid category ids: must be unique")
    }
    return(TRUE)
})

is.categories <- function(x) inherits(x, "Categories")

concatenateCategories <- function(...) {
    ## c() S3 method for categories. Dispatch is on ..1
    dots <- list(...)
    iscat <- vapply(dots, is.category, logical(1))
    iscats <- vapply(dots, is.categories, logical(1))
    if (!all(iscat | iscats)) {
        stop("Invalid categories")
    }
    dots[iscat] <- lapply(dots[iscat], function(x) list(x))
    dots[iscats] <- lapply(dots[iscats], function(x) x@.Data)
    return(Categories(data = do.call(c, dots)))
}

#' S3 method to concatenate Categories and Category objects
#'
#' @param ... see \code{\link[base]{c}}
#' @return An object of class \code{\link{Categories}}
#' @name c-categories
#' @export
#' @examples
#' cat.a <- Category(name = "First", id = 1, numeric_value = 1, missing = FALSE)
#' cat.b <- Category(name = "Second", id = 2)
#' cat.c <- Category(name = "Third", id = 3, missing = TRUE)
#' cats.1 <- Categories(cat.a, cat.b)
#' identical(cats.1, c(cat.a, cat.b))
#' identical(c(cats.1, cat.c), Categories(cat.a, cat.b, cat.c))
c.Categories <- concatenateCategories

#' @rdname c-categories
#' @export
c.Category <- concatenateCategories

#' @rdname crunch-extract
#' @export
setMethod("[<-", c("Categories", "ANY"), function(x, i, ..., value) {
    x@.Data[i] <- Categories(data = value)
    return(x)
})


#' @rdname describe-catalog
#' @export
setMethod("ids<-", "Categories", function(x, value) {
    if (!identical(ids(x), value)) {
        halt("Cannot modify category ids")
    }
    return(x)
})

.na.omit.categories <- function(object, ...) {
    missings <- vapply(object, function(x) isTRUE(x$missing), logical(1),
        USE.NAMES = FALSE
    )
    if (any(missings)) {
        object <- object[!missings]
        attr(object, "na.action") <- which(missings)
        attr(object, "class") <- "omit"
    }
    return(object)
}

#' Omit missing categories
#' @param object Categories
#' @param ... additional arguments, ignored
#' @return \code{object} with any categories that have missing: TRUE excluded
#' @name na-omit-categories
NULL

#' @rdname na-omit-categories
#' @export
setMethod("na.omit", "Categories", function(object, ...) {
    Categories(data = .na.omit.categories(object))
})

#' is.na for Categories
#'
#' Crunch categorical variables allow you to set multiple categories as missing.
#' For instance, you might have "not answered" and "doesn't know" both coded as
#' missing. This function returns a logical vector of all dataset entries that
#' fall into any of the missing categories. It also allows you to append
#' additional categories to the list of missing categories using the setter.
#'
#' @param x Categories or a single Category
#' @param value To change the missingness of categories, supply either:
#' 1. a logical vector of equal length of the categories (or length 1 for the
#' Category method); or
#' 1. the names of the categories to mark as missing.
#' If supplying the latter, any categories already indicated as missing will
#' remain missing.
#' @return Getters return logical, a named vector in the case of the Categories
#' method; setters return `x` duly modified.
#' @name is-na-categories
NULL

setValues <- function(x, value) {
    x[] <- mapply(setValue, x[], value = value, SIMPLIFY = FALSE)
    return(x)
}

setDates <- function(x, value) {
    x[] <- mapply(setDate, x[], value = value, SIMPLIFY = FALSE)
    return(x)
}

#' @rdname describe-catalog
#' @export
setMethod("values", "Categories", function(x) vapply(x, value, numeric(1)))

#' @rdname describe-catalog
#' @export
setMethod("values<-", "Categories", setValues)

#' @rdname describe-catalog
#' @export
setMethod("dates", "Categories", function(x) {
    structure(vapply(x, dates, character(1), USE.NAMES = FALSE), .Names = names(x))
})

#' @rdname describe-catalog
#' @export
setMethod("dates<-", "Categories", setDates)

#' @rdname is-na-categories
#' @aliases is-na-categories
#' @export
setMethod("is.na", "Categories", function(x) {
    structure(vapply(x, is.na, logical(1), USE.NAMES = FALSE), .Names = names(x))
})

#' @rdname is-na-categories
#' @export
setMethod("is.na<-", c("Categories", "character"), function(x, value) {
    ix <- match(value, names(x))
    out <- handleMissingCategoryLookup(ix, value, strict = TRUE)
    x[ix] <- lapply(x[ix], `is.na<-`, value = TRUE)
    return(x)
})

#' @rdname is-na-categories
#' @export
setMethod("is.na<-", c("Categories", "logical"), function(x, value) {
    stopifnot(length(x) == length(value))
    x@.Data <- mapply(function(x, value) {
        is.na(x) <- value
        return(x)
    }, x = x@.Data, value = value, USE.NAMES = FALSE, SIMPLIFY = FALSE)
    return(x)
})

n2i <- function(x, cats, strict = TRUE) {
    ## Convert x from category names to the corresponding category ids
    out <- ids(cats)[match(x, names(cats))]
    out <- handleMissingCategoryLookup(out, x, strict)
    return(out)
}

i2n <- function(x, cats, strict = TRUE) {
    ## Convert x from category ids to the corresponding category names
    out <- names(cats)[match(x, ids(cats))]
    out <- handleMissingCategoryLookup(out, x, strict)
    return(out)
}

handleMissingCategoryLookup <- function(result, original, strict = TRUE) {
    bad <- is.na(result)
    if (any(bad)) {
        msg <- paste(
            ifelse(sum(bad) > 1, "Categories", "Category"),
            "not found:", serialPaste(dQuote(original[bad]))
        )
        if (strict) {
            ## Break
            halt(msg)
        } else {
            ## Warn and drop
            msg <- paste0(msg, ". Dropping.")
            warning(msg, call. = FALSE)
            result <- na.omit(result)
        }
    }
    return(result)
}

addNoDataCategory <- function(variable) {
    cats <- ensureNoDataCategory(categories(variable))
    if (is.subvariable(variable)) {
        ## Have to point at parent
        crPATCH(absoluteURL("../../", self(variable)),
            body = toJSON(list(categories = cats))
        )
        variable <- refresh(variable)
    } else {
        categories(variable) <- cats
    }
    return(variable)
}

ensureNoDataCategory <- function(cats) {
    if ("No Data" %in% names(cats)) {
        # check "No Data"?
        return(cats)
    } else {
        return(c(cats, Category(data = .no.data)))
    }
}

.no.data <- list(
    id = -1L,
    name = "No Data",
    numeric_value = NULL,
    missing = TRUE
)

.selected.cats <- list(
    list(
        id = 1L,
        name = "Selected",
        numeric_value = 1,
        missing = FALSE,
        selected = TRUE
    ),
    list(
        id = 0L,
        name = "Other",
        numeric_value = 0,
        missing = FALSE
    ),
    .no.data
)

is.3vl <- function(cats) {
    ## Infer whether these categories are from a Three-Valued Logic categorical
    ## This is temporarily stricter than we want so that only formerly boolean
    ## types are detected as logical, not MR subvars or other "selected" vars
    if (!is.categories(cats)) {
        cats <- categories(cats)
    }
    return(
        setequal(ids(cats), c(-1, 0, 1)) &&
            setequal(names(cats), c("Selected", "Other", "No Data")) &&
            sum(is.selected(cats)) == 1 &&
            sum(is.na(cats)) == 1
    )
}

#' Get and set Categories on Variables
#'
#' @param x a Variable
#' @param value for the setters, an object of class Categories to set.
#' @return Getters return Categories; setters return \code{x} duly modified.
#' @name var-categories
#' @aliases var-categories categories categories<-
setGeneric("categories", function(x) standardGeneric("categories"))
#' @rdname var-categories
setGeneric("categories<-", function(x, value) standardGeneric("categories<-"))

#' @rdname var-categories
#' @export
setMethod("categories", "VariableTuple", function(x) {
    ## VariableTuples from a regular VariableCatalog don't have categories.
    ## But, from variableMetadata() and from variables(cube), they do. And
    ## if they do, return them instead of making an entity() request.
    cats <- x$categories
    if (!is.null(cats)) {
        cats <- Categories(data = cats)
    }
    return(cats)
})

#' @rdname var-categories
#' @export
setMethod("categories", "CrunchVariable", function(x) categories(tuple(x)))
#' @rdname var-categories
#' @export
setMethod(
    "categories", "CategoricalVariable",
    function(x) callNextMethod(x) %||% categories(entity(x))
)
#' @rdname var-categories
#' @export
setMethod(
    "categories", "CategoricalArrayVariable",
    function(x) callNextMethod(x) %||% categories(entity(x))
)

#' @rdname var-categories
#' @export
setMethod(
    "categories", "VariableEntity",
    function(x) Categories(data = x@body$categories)
)

#' @rdname var-categories
#' @export
setMethod(
    "categories<-", c("CategoricalVariable", "Categories"),
    function(x, value) {
        ent <- setEntitySlot(entity(x), "categories", value)
        dropCache(cubeURL(x))
        return(x)
    }
)
#' @rdname var-categories
#' @export
setMethod(
    "categories<-", c("CategoricalArrayVariable", "Categories"),
    function(x, value) {
        ent <- setEntitySlot(entity(x), "categories", value)
        lapply(subvariableURLs(tuple(x)), dropCache) ## Subvariables will update too
        dropCache(cubeURL(x))
        return(x)
    }
)
#' @rdname var-categories
#' @export
setMethod(
    "categories<-", c("CategoricalVariable", "numeric"),
    function(x, value) {
        halt(
            "`categories(x) <- value` only accepts Categories, not numeric. ",
            "Did you mean `values(categories(x)) <- value`?"
        )
    }
)
#' @rdname var-categories
#' @export
setMethod(
    "categories<-", c("CategoricalVariable", "character"),
    function(x, value) {
        halt(
            "`categories(x) <- value` only accepts Categories, not ",
            "character. Did you mean `names(categories(x)) <- value`?"
        )
    }
)
#' @rdname var-categories
#' @export
setMethod(
    "categories<-", c("CategoricalVariable", "ANY"),
    function(x, value) {
        halt(
            "`categories(x) <- value` only accepts Categories, not ",
            class(value), "."
        )
    }
)
#' @rdname var-categories
#' @export
setMethod(
    "categories<-", c("CategoricalArrayVariable", "numeric"),
    function(x, value) {
        halt(
            "`categories(x) <- value` only accepts Categories, not numeric. ",
            "Did you mean `values(categories(x)) <- value`?"
        )
    }
)
#' @rdname var-categories
#' @export
setMethod(
    "categories<-", c("CategoricalArrayVariable", "character"),
    function(x, value) {
        halt(
            "`categories(x) <- value` only accepts Categories, not ",
            "character. Did you mean `names(categories(x)) <- value`?"
        )
    }
)
#' @rdname var-categories
#' @export
setMethod(
    "categories<-", c("CategoricalArrayVariable", "ANY"),
    function(x, value) {
        halt(
            "`categories(x) <- value` only accepts Categories, not ",
            class(value), "."
        )
    }
)
#' @rdname var-categories
#' @export
setMethod(
    "categories<-", c("CrunchVariable", "ANY"),
    function(x, value) {
        halt("category assignment not defined for ", class(x))
    }
)

Try the crunch package in your browser

Any scripts or data that you put into this service are public.

crunch documentation built on Aug. 31, 2023, 1:07 a.m.