descripteur: Describe Data Sets

Documented in c_bnry c_bstd c_catg c_cOR c_cstd c_cstd.each c_date c_dstd c_HR c_HRCI c_OR c_overlap c_pdiff c_real c_rr c_RR c_rstd c_shift c_sstd c_surv

warn_if_wrong_glist_length <- function(glist, length){
    if(desc_get("warn_if_wrong_glist_length")){
        n <- length(glist)
        if(n != length){
            x <- paste0("glist of length ", n, " is given to a function ",
                        "that only makes use of ", length, ".")
            warning(x)
        }
    }
    invisible(NULL)
}

warn_if_weight_not_used <- function(...){
    if(desc_get("warn_if_weight_not_used")){
        l <- list(...)
        if("w" %in% names(l)){
            if(!is.null(l[['w']])){
                x <- paste0("'w' is being passed to a function that does not ",
                            "use weighting")
                warning(x)
            }
        }
    }
    invisible(NULL)
}

     ## +----------------------------------------+ ##
     ## | comparing functions for real variables | ##
     ## +----------------------------------------+ ##

##' various comparer functions for real types
##'
##' @param x vector
##' @param glist a grouping list
##' @param w weight
##' @param ... this is to be able to tolerate unnecessary arguments
c_real <- function(...) invisible(NULL)

##' @describeIn c_real median shift measured in units of original IQR, 2 groups only
##' @export
c_shift <- function(x, glist, ...){
    warn_if_weight_not_used(...)
    warn_if_wrong_glist_length(glist, 2)
    x1 <- x[glist[[1]]]
    x2 <- x[glist[[2]]]
    (stats::median(x1, na.rm = TRUE) - stats::median(x2, na.rm = TRUE)) /
        stats::IQR(x1, na.rm = TRUE)
}
attr(c_shift, "dtable") <- "comp"

##' @describeIn c_real standardized difference (2 groups only) assuming
##'     independence
##' @references \url{https://www.lerner.ccf.org/qhs/software/lib/stddiff.pdf}
##' @export
c_rstd <- function(x, glist, w = NULL, ...){
    warn_if_wrong_glist_length(glist, 2)
    if(is.null(w)) w <- rep(1, length(x))
    x1 <- x[glist[[1]]]
    w1 <- w[glist[[1]]]
    x2 <- x[glist[[2]]]
    w2 <- w[glist[[2]]]
    (d_mean(x = x1, w = w1) - d_mean(x = x2, w = w2)) /
    sqrt((d_sd(x = x1, w = w1)^2 + d_sd(x = x2, w = w2)^2) / 2)
}
attr(c_rstd, "dtable") <- "comp"


     ## +----------------------------------------+ ##
     ## | comparing functions for bnry variables | ##
     ## +----------------------------------------+ ##

##' various comparer functions for bnry types
##'
##' @param x vector
##' @param glist a grouping list
##' @param w weight
##' @param ... this is to be able to tolerate unnecessary arguments
c_bnry <- function(...) invisible(NULL)


##' @describeIn c_bnry risk ratio, 2 groups only
##' @export
c_RR <- function(x, glist, w = NULL, ...){
    warn_if_wrong_glist_length(glist, 2)
    d_bp(x[glist[[1]]], w = w[glist[[1]]]) /
        d_bp(x[glist[[2]]], w = w[glist[[2]]])
}
attr(c_RR, "dtable") <- "comp"

##' @describeIn c_bnry odds ratio, 2 groups only
##' @export
c_OR <- function(x, glist, w = NULL, ...){
    warn_if_wrong_glist_length(glist, 2)
    if(is.null(w)) w <- rep(1, length(x))
    w1 <- w[glist[[1]]]
    w2 <- w[glist[[2]]]
    x1 <- x[glist[[1]]]
    x2 <- x[glist[[2]]]
    d_odds(x = x1, w = w1) / d_odds(x2, w = w2)
}
attr(c_OR, "dtable") <- "comp"

##' @describeIn c_bnry standardized differences for binary variables, 2 groups only
##' @export
##' @references \url{https://www.lerner.ccf.org/qhs/software/lib/stddiff.pdf}
c_bstd <- function(x, glist, w = NULL, ...){
    warn_if_wrong_glist_length(glist, 2)
    if(is.null(w)) w <- rep(1, length(x))
    w1 <- w[glist[[1]]]
    w2 <- w[glist[[2]]]
    x1 <- x[glist[[1]]]
    x2 <- x[glist[[2]]]
    p1 <- d_bp(x1, w = w1)
    p2 <- d_bp(x2, w = w2)
    (p1 - p2) / sqrt((p1*(1-p1) + p2*(1-p2)) / 2)
}
attr(c_bstd, "dtable") <- "comp"

     ## +----------------------------------------+ ##
     ## | comparing functions for date variables | ##
     ## +----------------------------------------+ ##

##' various comparer functions for date types
##'
##' @param x vector
##' @param glist a grouping list
##' @param w weights
##' @param ... this is to be able to tolerate unnecessary arguments
c_date <- function(...) invisible(NULL)

##' @describeIn c_date proportion of records in the overlapping time period, 2
##'     groups only
##' @export
c_overlap <- function(x, glist, w = NULL, ...){
    warn_if_wrong_glist_length(glist, 2)
    if(is.null(w)) w <- rep(1, length(x))
    x1 <- x[glist[[1]]]
    x2 <- x[glist[[2]]]
    w1 <- w[glist[[1]]]
    w2 <- w[glist[[2]]]
    a <- max(min(x1, na.rm = TRUE), min(x2, na.rm = TRUE))
    b <- min(max(x1, na.rm = TRUE), max(x2, na.rm = TRUE))
    ## sum(x >= a & x <= b, na.rm = TRUE) ## / sum(!is.na(x))
    overlap <- x >= a & x <= b
    sum(w[overlap], na.rm = TRUE) / sum(w[!is.na(x)], na.rm = TRUE)
}
attr(c_overlap, "dtable") <- "comp"

##' @describeIn c_date standardized difference for date variables (same as for
##'     real variables when the dates are interpreted as numericals)
##' @export
c_dstd <- function(x, glist, ...){
    c_rstd(x = as.numeric(x), glist = glist, ...)
}
attr(c_dstd, "dtable") <- "comp"

     ## +----------------------------------------+ ##
     ## | comparing functions for catg variables | ##
     ## +----------------------------------------+ ##

##' various comparer functions for catg types
##'
##' @param x vector
##' @param glist a grouping list
##' @param useNA display info for missing?
##' @param w weight
##' @param ... this is to be able to tolerate unnecessary arguments
##' @seealso \code{\link{t_fisher.p}} and \code{\link{t_chisq.p}} which work for
##'     catg variables as well
c_catg <- function(...) invisible(NULL)

##' @describeIn c_catg difference in proportions, 2 groups only
##' @export
c_pdiff <- function(x, glist, useNA = FALSE, w = NULL, ...){
    warn_if_wrong_glist_length(glist, 2)
    w1 <- w[glist[[1]]]
    w2 <- w[glist[[2]]]
    p1 <- d_cp(x[glist[[1]]], w = w1, useNA = useNA)
    p2 <- d_cp(x[glist[[2]]], w = w2, useNA = useNA)
    p1 - p2
}
attr(c_pdiff, "dtable") <- "comp"

##' @describeIn c_catg odds ratios, 2 groups only
##' @export
c_cOR <- function(x, glist, useNA = FALSE, w = NULL, ...){
    warn_if_wrong_glist_length(glist, 2)
    w1 <- w[glist[[1]]]
    w2 <- w[glist[[2]]]
    p1 <- d_cp(x[glist[[1]]], w = w1, useNA=useNA)
    p2 <- d_cp(x[glist[[2]]], w = w2, useNA=useNA)
    (p1 / (1-p1)) / (p2 / (1-p2))
}
attr(c_cOR, "dtable") <- "comp"

##' @describeIn c_catg standardized differences (2 groups only) for each level
##' @export
c_cstd.each <- function(x, glist, useNA = FALSE, w = NULL, ...){
    warn_if_wrong_glist_length(glist, 2)
    useNA <- FALSE ## could this couse problems?
    if(is.null(w)) w <- rep(1, length(x))
    w1 <- w[glist[[1]]]
    w2 <- w[glist[[2]]]
    x1 <- x[glist[[1]]]
    x2 <- x[glist[[2]]]
    p1 <- d_cp(x1, w = w1)
    p2 <- d_cp(x2, w = w2)
    (p1 - p2) / sqrt((p1*(1-p1) + p2*(1-p2)) / 2)
}
attr(c_cstd.each, "dtable") <- "comp"

##' @describeIn c_catg standardized differences (2 groups only) for the variable
##' @references \url{https://www.lerner.ccf.org/qhs/software/lib/stddiff.pdf}
##' @param expand.levels if only one value is returned for a multilevel categorical
##'     variable, should the results be expanded with \code{NA}:s to match the
##'     typical output length for a function of such a variable?
##' @export
c_cstd <- function(x, glist, useNA = FALSE, w = NULL,
                   expand.levels = TRUE, ...){
    warn_if_wrong_glist_length(glist, 2)
    useNA <- FALSE ## could this couse problems?
    if(is.null(w)) w <- rep(1, length(x))
    w1 <- w[glist[[1]]]
    w2 <- w[glist[[2]]]
    x1 <- x[glist[[1]]]
    x2 <- x[glist[[2]]]
    p1 <- d_cp(x1, w = w1)[-1]
    p2 <- d_cp(x2, w = w2)[-1]
    k <- length(p1)
    S <- matrix(NA, nrow = k, ncol = k)
    for(i in 1:k){
        for(j in 1:k){
            if(i == j){
                S[i, j] <- (p1[i]*(1-p1[i]) + p2[i]*(1-p2[i])) / 2
            } else {
                S[i, j] <- -(p1[i]*p1[j] + p2[i]*p2[j]) / 2
            }
        }
    }
    ## r <- sqrt(t(p1-p2) %*% MASS::ginv(S) %*% (p1-p2))
    r <- if(all(S == 0)){
             Inf
         } else if(is.null(
                    tryCatch(INV <- solve(S),
                             error = function(e) NULL)
                )){
             Inf
         } else{
             sqrt(t(p1-p2) %*% INV %*% (p1-p2))
         }
    if(expand.levels) c(r, rep(NA, k)) else r
}
attr(c_cstd, "dtable") <- "comp"

     ## +----------------------------------------+ ##
     ## | comparing functions for surv variables | ##
     ## +----------------------------------------+ ##

##' various comparer functions for surv types
##'
##' @param x vector
##' @param glist a grouping list
##' @param w weight
##' @param cens.type type of censoring
##' @param ... this is to be able to tolerate unnecessary arguments
c_surv <- function(...) invisible(NULL)

##' @describeIn c_surv rate ratio, 2 groups only
##' @export
c_rr <- function(x, glist, w = NULL, cens.type = "right", ...){
    warn_if_wrong_glist_length(glist, 2)
    survcheck(x)
    if(is.null(w)) w <- rep(1, length(x))
    x1 <- x[glist[[1]]]
    x2 <- x[glist[[2]]]
    w1 <- w[glist[[1]]]
    w2 <- w[glist[[2]]]
    if(cens.type == "right"){
        check_right(x)
        d_rate(x1, w = w1, cens.type = cens.type) /
            d_rate(x2, w = w2, cens.type = cens.type)
    } else {
        stop("no cens.type but 'right' has been implemented")
    }
}
attr(c_rr, "dtable") <- "comp"

##' @describeIn c_surv standardized difference between the rates of two surv
##'     variables (difference in mean divided by 'average sd'-isch)
##' @export
c_sstd <- function(x, glist, w = NULL, cens.type = "right", ...){
    warn_if_wrong_glist_length(glist, 2)
    survcheck(x)
    if(is.null(w)) w <- rep(1, length(x)/2)
    x1 <- x[glist[[1]]]
    x2 <- x[glist[[2]]]
    w1 <- w[glist[[1]]]
    w2 <- w[glist[[2]]]
    n1 <- d_esum(x = x1, w = w1, cens.type = cens.type, ...)
    n2 <- d_esum(x = x2, w = w2, cens.type = cens.type, ...)
    t1 <- d_tsum(x = x1, w = w1, cens.type = cens.type, ...)
    t2 <- d_tsum(x = x1, w = w1, cens.type = cens.type, ...)
    (n1 / t1 - n2 / t2) / sqrt((n1 / t1 + n2 / t2) / 2)
}
attr(c_sstd, "dtable") <- "comp"

cph_model <- function(x, glist, w = NULL){
    if(is.null(w)) w <- rep(1, length(x))
    survival::coxph(x ~ factorize_glist(glist, as.factor = TRUE,
                                        reverse.levels = TRUE),
                    weights = w)
}

##' @describeIn c_surv Hazard Ratio from Cox model
##' @export
c_HR <- function(x, glist, w = NULL, cens.type = "right", ...){
    warn_if_wrong_glist_length(glist, 2)
    survcheck(x)
    if(is.null(w)) w <- rep(1, length(x))
    mod <- cph_model(x = x, glist = glist, w = w)
    as.numeric(exp(mod$coefficients)[1])
}
attr(c_HR, "dtable") <- "comp"

##' @describeIn c_surv Hazard Ratio and confidence interval (as character) from
##'     Cox model
##' @param d.hrci digits argument for HR and confidence interval
##' @export
c_HRCI <- function(x, glist, w = NULL, cens.type = "right", d.hrci = 2, ...){
    warn_if_wrong_glist_length(glist, 2)
    survcheck(x)
    if(is.null(w)) w <- rep(1, length(x))
    mod <- cph_model(x = x, glist = glist, w = w)
    v <- exp(as.numeric(c(mod$coefficients[1], stats::confint(mod))))
    s <- paste0("%.", d.hrci, "f (%.", d.hrci, "f - %.", d.hrci, "f)")
    sprintf(s, v[1], v[2], v[3])
}
attr(c_HRCI, "dtable") <- "comp"


if(FALSE){
    n <- 1000
    x <- survival::Surv(time = rexp(n, 1/100), event = rbinom(n, 1, 0.2))
    a <- sample(c(TRUE, FALSE), n, replace = TRUE)
    glist <- list("A" = a, "B" = !a)
    c_cph(x, glist)
    t_cph.p(x, glist)
    t_lr.p(x, glist)
    summary(cph_model(x, glist))
    c_HR(x, glist)
    c_HRCI(x, glist)
}

renlund/descripteur documentation built on July 5, 2025, 6:35 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

renlund/descripteur
Describe Data Sets

R/comparers.R
In renlund/descripteur: Describe Data Sets

Defines functions c_HRCI c_HR cph_model c_sstd c_rr c_surv c_cstd c_cstd.each c_cOR c_pdiff c_catg c_dstd c_overlap c_date c_bstd c_OR c_RR c_bnry c_rstd c_shift c_real warn_if_weight_not_used warn_if_wrong_glist_length

Documented in c_bnry c_bstd c_catg c_cOR c_cstd c_cstd.each c_date c_dstd c_HR c_HRCI c_OR c_overlap c_pdiff c_real c_rr c_RR c_rstd c_shift c_sstd c_surv

R Package Documentation

Browse R Packages

We want your feedback!

renlund/descripteur Describe Data Sets

R/comparers.R In renlund/descripteur: Describe Data Sets

Defines functions c_HRCI c_HR cph_model c_sstd c_rr c_surv c_cstd c_cstd.each c_cOR c_pdiff c_catg c_dstd c_overlap c_date c_bstd c_OR c_RR c_bnry c_rstd c_shift c_real warn_if_weight_not_used warn_if_wrong_glist_length

Documented in c_bnry c_bstd c_catg c_cOR c_cstd c_cstd.each c_date c_dstd c_HR c_HRCI c_OR c_overlap c_pdiff c_real c_rr c_RR c_rstd c_shift c_sstd c_surv

R Package Documentation

Browse R Packages

We want your feedback!

renlund/descripteur
Describe Data Sets

R/comparers.R
In renlund/descripteur: Describe Data Sets