R/inum.R

Defines functions weights.inumtotal as.data.frame.inumtotal as.data.frame.inum inum.data.frame inum.default inum

Documented in inum inum.data.frame

inum <- function(object, nmax = 20, ...)
    UseMethod("inum")

inum.default <- function(object, nmax = 20, ...)
    stop("cannot handle objects of class", " ", sQuote(class(object)))

inum.data.frame <- function(object, nmax = 20, ignore = NULL, total = FALSE, 
                           weights = NULL, as.interval = "", 
                           complete.cases.only = FALSE, meanlevels = FALSE, ...) {

    if (total) {
        bdr <- inum(object, nmax = nmax, ignore = ignore, 
                   total = FALSE, as.interval = as.interval) 
        bdr2 <- lapply(bdr, function(x)
            factor(x, levels = 0:nlevels(x)))
        ret <- do.call("interaction", bdr2)
        if (!is.null(weights)) {
            tab <- xtabs(weights ~ ret)
        } else {
            tab <- table(ret)
        }
        tab0 <- which(tab > 0)

        sDF <- vector(mode = "list", length = length(bdr))
        len <- sapply(bdr2, nlevels)
        ### do.call("expand.grid", bdr), essentially
        for (j in 1:length(len)) {
            ix <- 1:len[j]
            if (j > 1)
                ix <- rep(ix, each = prod(len[1:(j - 1)]))
            idx <- rep(ix, length.out = prod(len))[tab0]
            if (inherits(bdr[[j]], "interval")) {
                sDF[[j]] <- (0:nlevels(bdr[[j]]))[idx]
                attr(sDF[[j]], "levels") <- attr(bdr[[j]], "levels")
                class(sDF[[j]]) <- class(bdr[[j]])
            } else {
                lev <- attr(bdr[[j]], "levels")
                lev <- lev[c(1, 1:length(lev))]
                lev[1] <- NA
                sDF[[j]] <- lev[idx, drop = FALSE]
            }
        }
        ### note: sDF contains missings and 
        ### ret is always > 0 (is, no missings)
        ### this is different from enum/integer types
        ### should we handle this here?
        sDF <- as.data.frame(sDF)
        colnames(sDF) <- names(bdr)
        sDF[["(weights)"]] <- as.numeric(tab[tab0])
        rownames(sDF) <- NULL
        ret <- unclass(ret[, drop = TRUE])

        if (complete.cases.only) {
            cc <- rowSums(sapply(sDF[colnames(sDF) != "(weights)"], 
                                 function(x) is.na(x))) == 0
            cc[is.na(cc)] <- TRUE
            if (any(!cc)) {
                sDF <- sDF[cc,,drop = FALSE]
                rownames(sDF) <- 1:nrow(sDF)
                i <- rep.int(1L, length(cc))
                i[!cc] <- 0
                i <- cumsum(i)
                i[!cc] <- 0
                ret <- i[ret]
            }
        }  

        attr(ret, "levels") <- sDF
        class(ret) <- "inumtotal"
        return(ret)
    }

    ret <- vector(mode = "list", length = ncol(object))
    names(ret) <- cn <- colnames(object)

    if (!is.null(ignore)) {
        if (is.integer(ignore)) cn <- cn[-ignore]
        if (is.character(ignore)) cn <- cn[!(cn %in% ignore)]
    }

    if (any(as.interval != "")) {
        if (!is.character(as.interval))
            stop(sQuote("as.interval"), " ", "is not a character")
    }

    for (v in cn) {
        x <- object[[v]]
        if (is.logical(x) || is.factor(x) || is.integer(x)) {
            ix <- enum(x)
        } else if (is.numeric(x)) {
            ux <- oux <- sort(unique(x))
            xmin <- ux[1]
            xmax <- ux[length(ux)] 
            if (length(ux) > nmax)
                ux <- unique(quantile(x, prob = 1:(nmax - 1L) / nmax, 
                                      na.rm = TRUE))
            ux <- ux[ux < xmax]
            if (length(ux) > 1L) {
                tol <- max(min(diff(sort(ux))), sqrt(.Machine$double.eps))
            } else {
                tol <- sqrt(.Machine$double.eps)
            }
            ix <- interval(x, breaks = c(xmin - tol, ux, xmax))
            if (all(as.interval != v)) {
                if (length(oux) <= nmax) {
                    ### assign sorted unique values
                    attr(ix, "levels") <- as.double(oux)
                } else {
                    if (meanlevels) {
                        ### compute mean of x-values for each level
                        ### and assign; first element corresponds to NAs
                        w <- x
                        w[is.na(w)] <- 0 ### does not count
                        ix2 <- unclass(ix) ### is of length + 1
                        attr(ix2, "levels") <- NULL
                        sx <- libcoin::ctabs(ix = ix2, weights = w)[-1] ### w/o NAs
                        cn <- libcoin::ctabs(ix = ix2)[-1]
                        lev <- sx / cn
                        attr(ix, "levels") <- lev
                    } else {
                        ### this maximises distances to original
                        ### measurements but leads to correct cutpoints
                        nux <- c(ux, xmax)
                        attr(ix, "levels") <- as.double(nux)
                    }
                }
                class(ix) <- c("enum", "integer")
             }
        } else if (is.data.frame(x)) {
            ix <- inum(x, nmax = nmax, ignore = ignore, total = TRUE,
                      as.interval = as.interval)
        } else {
            ix <- inum(x, nmax = nmax, ...) ### nothing as of now
        }
        ret[[v]] <- ix
    }
    class(ret) <- "inum"
    ret
}

### only useful for checks
as.data.frame.inum <- function(x, ...) {
    ret <- lapply(x, function(x) {
        if (inherits(x, "interval")) return(x)
        lev <- attr(x, "levels")
        lev <- lev[c(1, 1:length(lev))]
        lev[1] <- NA
        return(lev[x + 1])
    })
    class(ret) <- "data.frame"
    attr(ret, "row.names") <- 1:NROW(ret[[1]])
    ret
}

as.data.frame.inumtotal <- function(x, ...) 
    attr(x, "levels")

weights.inumtotal <- function(object, ...)
    attr(object, "levels")[["(weights)"]]

### does not make sense
# is.numeric.Surv <- function(x, ...)
#    return(FALSE)
# inum.Surv <- function(object, nmax = 20, ...) {
#     x <- inum(as.data.frame(unclass(object)), nmax = nmax, total = TRUE)
#     lev <- as.matrix(attr(x, "levels"))
#     atr <- attributes(object)
#     atr$dim <- dim(lev)
#     atr$dimnames <- dimnames(lev)
#     attributes(lev) <- atr
#     attr(x, "levels") <- lev
#     x
# }

Try the inum package in your browser

Any scripts or data that you put into this service are public.

inum documentation built on March 31, 2023, 3:07 p.m.