R/univar.R

Defines functions `univar`

`univar` <-
function(formula, data, digits = 3) {

    dfname <- as.character(substitute(data))
    if(!is.data.frame(data))
        stop(paste("\n\nThe object ", dfname, " is not data frame.\n\n", sep = ""))

    CALL <- match.call()
    # in case the formula was provided as an object
    CALL[[2]] <- formula(deparse(formula))

    # Convert any variable of mode character into a factor
    data <- as.data.frame(lapply(data, function(x) if(mode(x) == "character") factor(x) else x))

    all.var <- all.vars(formula)
    right.var <- attr(terms(formula), "term.labels")
    nbfact <- length(right.var)
    
    # Remove NA's in right-hand factors    
    useNA <- FALSE
    if(!useNA & nbfact > 0) {
        X <- data[, right.var]
        if(is.null(dim(X))) X <- data.frame(x = X)
        X <- as.data.frame(lapply(X, function(x) if(is.factor(x)) factor(x, exclude = c(NA, NaN)) else x))
        X <- is.na(X)
        X <- data.frame(nam = 1:nrow(X), na = rowSums(X))
        data <- data[X$nam[X$na == 0], ]
        } 

    y <- data[, all.var[1]]
    
    ## If right-hand = "~1"

    if(nbfact == 0) {

        tab <- data.frame(
            length(y[is.na(y)]),
            length(y[!is.na(y)]),
            min(y, na.rm = TRUE),
            quantile(y, prob = 0.25, na.rm = TRUE),
            quantile(y, prob = 0.50, na.rm = TRUE),
            mean(y, na.rm = TRUE),
            quantile(y, prob = 0.75, na.rm = TRUE),
            max(y, na.rm = TRUE),
            sd(y, na.rm = TRUE),
            row.names = 1
            )
        names(tab) <- c("NA's", "n", "min", "q25", "median", "mean", "q75", "max", "sd")

        }

    ## If right-hand != "~1"

    if(nbfact > 0) {

        data$xx.NA.xx <- ifelse(is.na(y), 1, 0)
        data$xx.n.xx <- 1 - data$xx.NA.xx
        
        f.right <- formula(paste("~", paste(right.var, collapse = "+")))
        f.NA <- formula(paste("xx.NA.xx", "~", paste(right.var, collapse = "+")))
        f.n <- formula(paste("xx.n.xx", "~", paste(right.var, collapse = "+")))
        
        tmp <- aggstat(formula = f.NA, data = data, FUN = sum)$tab
        names(tmp)[ncol(tmp)] <- "NA's"
        tmp$n <- aggstat(formula = f.n, data = data, FUN = sum)$tab$xx.n.xx

        tab <- data.frame(
            min = aggstat(formula = formula, data = data, FUN = min)$tab[, nbfact + 1],
            q25 = aggstat(formula = formula, data = data, FUN = quantile, prob = 0.25)$tab[, nbfact + 1],
            median = aggstat(formula = formula, data = data, FUN = quantile, prob = 0.5)$tab[, nbfact + 1],
            mean = aggstat(formula = formula, data = data, FUN = mean)$tab[, nbfact + 1],
            q75 = aggstat(formula = formula, data = data, FUN = quantile, prob = 0.75)$tab[, nbfact + 1],
            max = aggstat(formula = formula, data = data, FUN = max)$tab[, nbfact + 1],
            sd = aggstat(formula = formula, data = data, FUN = sd)$tab[, nbfact + 1]
            )
        tab <- cbind(tmp, tab)

        }
    
    tab$iqr <- tab$q75 - tab$q25
    tab$range <- tab$max - tab$min
    tab$cv <- tab$sd / tab$mean
    #tab$se.mean <- tab$sd / tab$n
    #tab$cv.mean <- tab$se.mean / tab$mean

    structure(list(CALL = CALL, tab = tab, nbfact = nbfact, digits = digits), class = "univar")

    }
rforge/tdisplay documentation built on Feb. 21, 2022, 7:50 a.m.