R/summary.R

Defines functions .elixhauser_summary .charlson_summary .pccc_v3_summary .pccc_v2_summary summary.medicalcoder_comorbidities_with_subconditions summary.medicalcoder_comorbidities

Documented in summary.medicalcoder_comorbidities summary.medicalcoder_comorbidities_with_subconditions

#' Summaries of Comorbidities
#'
#' Build summaries (counts and percentages) for each comorbidity and other
#' summary statistics by method.
#'
#' @param object a `medicalcoder_comorbidities` object generated by calling
#' [comorbidities()]
#' @param ... additional parameters, not currently used
#'
#' @examples
#' pccc_v3.1_results <-
#'   comorbidities(data = mdcr,
#'                 icd.codes = "code",
#'                 id.vars = "patid",
#'                 dx.var = "dx",
#'                 method = "pccc_v3.1",
#'                 flag.method = 'current',
#'                 poa = 1)
#' summary(pccc_v3.1_results)
#'
#' charlson_results <-
#'   comorbidities(data = mdcr,
#'                 icd.codes = "code",
#'                 id.vars = "patid",
#'                 dx.var = "dx",
#'                 method = "charlson_quan2011",
#'                 flag.method = 'current',
#'                 poa = 1)
#' summary(charlson_results)
#'
#' elixhauser_results <-
#'   comorbidities(data = mdcr,
#'                 icd.codes = "code",
#'                 id.vars = "patid",
#'                 dx.var = "dx",
#'                 method = "elixhauser_ahrq2025",
#'                 primarydx = 1,
#'                 flag.method = 'current',
#'                 poa = 1)
#' summary(elixhauser_results)
#'
#' @return either a list or a data `data.frame`
#'
#' @export
summary.medicalcoder_comorbidities <- function(object, ...) {
  if (startsWith(attr(object, "method"), "pccc_v2")) {
    .pccc_v2_summary(object)
  } else if (startsWith(attr(object, "method"), "pccc_v3")) {
    .pccc_v3_summary(object)
  } else if (startsWith(attr(object, "method"), "charlson")) {
    .charlson_summary(object)
  } else if (startsWith(attr(object, "method"), "elixhauser")) {
    .elixhauser_summary(object)
  } else {
    stop(sprintf("No summary method for a medicalcoder_comorbidities object with method %s has been built", attr(object, "method")))
  }
}

#' Summaries of Comorbidities with Subconditions
#'
#' Build summaries (counts and percentages) for each Pediatric Complex Chronic
#' Condition (PCCC) condition and subcondition.
#'
#' @param object a `medicalcoder_comorbidities_with_subconditions` object generated by calling
#' [`comorbidities()`] with `subconditions = TRUE`.  This is currently only
#' applicable to PCCC.
#'
#' @param ... additional parameters, not currently used
#'
#' @seealso [`comorbidities()`],
#' `vignette(topic = "pccc", package = "medicalcoder")`
#'
#' @examples
#' pccc_v3.1_subcondition_results <-
#'   comorbidities(data = mdcr,
#'                 icd.codes = "code",
#'                 id.vars = "patid",
#'                 dx.var = "dx",
#'                 method = "pccc_v3.1",
#'                 flag.method = 'current',
#'                 poa = 1,
#'                 subconditions = TRUE)
#' summary(pccc_v3.1_subcondition_results)
#'
#' @return a `data.frame` with five columns.
#'
#' 1. `condition` the primary condition
#' 2. `subcondition` the subcondition(s) within the `condition`.  There will be
#'    a row where `subcondition` is `NA` which is used to report the `count` and
#'    `percent_of_cohort` for the `condition` overall.
#' 3. `count` the number of rows in `object` with the applicable `condition` and
#'    `subcondition`.
#' 4. `percent_of_cohort`: a numeric value within \[0, 100\] for the percent of
#'     rows in `object` with the flagged `condition` and `subcondition`.
#' 5. `percent_of_those_with_condition`: a numeric value within \[0, 100\] for the
#'    subset of rows in `object` with the primary `condition` and the flagged
#'    `subcondition`.  Will be `NA` for the primary `condition`.
#'
#' @export
summary.medicalcoder_comorbidities_with_subconditions <- function(object, ...) {
  if (attr(object, "flag.method") != "current") {
    warning(sprintf("Logic for pccc_summary_table has been implemented for flag.method = 'current'.  Using this function for flag.method = '%s' may not provide a meaningful summary.", attr(object, "flag.method")))
  }

  N <- nrow(object[["conditions"]])

  conditions <- ..mdcr_internal_pccc_conditions..[c("condition", "condition_label")]
  conditions <- unique(conditions)
  conditions <- conditions[order(conditions[["condition"]]), ]

  cols <- conditions[["condition"]]
  if (startsWith(attr(object, "method"), "pccc_v3")) {
    cols <- paste0(cols, "_dxpr_or_tech")
  }
  counts <- colSums(mdcr_select(object[["conditions"]], cols))
  names(counts) <- conditions[["condition"]]

  scounts <-
    lapply(object[["subconditions"]],
           function(x) {
             cols <- names(x)
             cols <- cols[-which(cols %in% attr(object, "id.vars"))]
             colSums(mdcr_select(x, cols))
           })

  rtn <-
    lapply(conditions[["condition"]],
           function(cnd) {
             x1 <- data.frame(
               condition = cnd,
               subcondition = NA_character_,
               count = counts[cnd],
               percent_of_cohort = if (N > 0) {100 * counts[cnd] / N} else {NA_real_},
               percent_of_those_with_condition = NA_real_,
               stringsAsFactors = FALSE
             )
             x2 <- data.frame(
               condition = cnd,
               subcondition = names(scounts[[cnd]]),
               count = scounts[[cnd]],
               percent_of_cohort = if (N > 0) {100 * scounts[[cnd]] / N} else {NA_real_},
               percent_of_those_with_condition = ifelse(scounts[[cnd]] > 0, 100 * scounts[[cnd]] / counts[cnd], NA_real_),
               stringsAsFactors = FALSE
             )
             rbind(x1, x2)
           }
           )
  rtn <- do.call(rbind, rtn)
  rownames(rtn) <- NULL
  rtn
}

################################################################################
# internal summaries by method, each of these are called by the summary methods
# above

.pccc_v2_summary <- function(object) {
  if (attr(object, "flag.method") != "current") {
    warning(sprintf("Logic for pccc_summary_table has been implemented for flag.method = 'current'.  Using this function for flag.method = '%s' may not provide a meaningful summary.", attr(object, "flag.method")))
  }

  conditions <- ..mdcr_internal_pccc_conditions..[c("condition", "condition_label")]
  conditions <- unique(conditions)
  conditions <- conditions[order(conditions[["condition"]]), ]

  cnds <- mdcr_select(object, c(conditions[["condition"]], "any_tech_dep", "any_transplant", "cmrb_flag"))

  # Track running counts for patients meeting at least N conditions
  # so the summary can report distribution thresholds (>=2, >=3, ...).
  tlts <- lapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
  tlts <- do.call(cbind, tlts)
  colnames(tlts) <- paste(">=", 2:11, "conditions")

  p <- 100 * c(colMeans(cnds), colMeans(tlts))
  p <- ifelse(is.nan(p), NA_real_, p)

  rtn <-
    data.frame(
      condition = c(names(cnds), rep("num_cmrb", ncol(tlts))),
      label     = c(conditions[["condition_label"]], "Any Technology Dependence", "Any Transplantation", "Any Condition", colnames(tlts)),
      count     = as.integer(c(colSums(cnds), colSums(tlts))),
      percent   = p,
      stringsAsFactors = FALSE
    )

  rownames(rtn) <- NULL

  rtn
}

.pccc_v3_summary <- function(object) {
  if (attr(object, "flag.method") != "current") {
    warning(sprintf("Logic for pccc_summary_table has been implemented for flag.method = 'current'.  Using this function for flag.method = '%s' may not provide a meaningful summary.", attr(object, "flag.method")))
  }

  conditions <- ..mdcr_internal_pccc_conditions..[c("condition", "condition_label")]
  conditions <- unique(conditions)
  conditions <- conditions[order(conditions[["condition"]]), ]

  # Track running counts for patients meeting at least N conditions
  # so the summary can report distribution thresholds (>=2, >=3, ...).
  tlts <- lapply(2:11, function(x) { as.integer(object[["num_cmrb"]] >= x)})
  tlts <- do.call(cbind, tlts)
  colnames(tlts) <- paste(">=", 2:11, "conditions")

  sets <-
    list(
      dxpr_or_tech  = mdcr_select(object, paste0(conditions[["condition"]], "_dxpr_or_tech")),
      dxpr_only     = mdcr_select(object, paste0(conditions[["condition"]], "_dxpr_only"   )),
      tech_only     = mdcr_select(object, paste0(conditions[["condition"]], "_tech_only"   )),
      dxpr_and_tech = mdcr_select(object, paste0(conditions[["condition"]], "_dxpr_and_tech")),
      flags         = mdcr_select(object, c("any_tech_dep", "any_transplant", "cmrb_flag")),
      totals        = tlts
    )

  counts <- lapply(sets, colSums, na.rm = TRUE)
  N <- nrow(object)
  percents <- lapply(counts,
    function(x) {
      y <- 100 * x / N
      ifelse(is.nan(y), NA_real_, y)
    })

  rtn <-
    data.frame(
      condition = conditions[["condition"]],
      label = conditions[["condition_label"]],
      dxpr_or_tech_count = as.integer(counts[["dxpr_or_tech"]]),
      dxpr_or_tech_percent = percents[["dxpr_or_tech"]],
      dxpr_only_count = as.integer(counts[["dxpr_only"]]),
      dxpr_only_percent = percents[["dxpr_only"]],
      tech_only_count = as.integer(counts[["tech_only"]]),
      tech_only_percent = percents[["tech_only"]],
      dxpr_and_tech_count = as.integer(counts[["dxpr_and_tech"]]),
      dxpr_and_tech_percent = percents[["dxpr_and_tech"]],
      stringsAsFactors = FALSE
    )

  rtn <-
    rbind(rtn,
      data.frame(
        condition = c(names(counts[["flags"]]), rep("num_cmrb", length(counts[["totals"]]))),
        label = c("Any Technology Dependence", "Any Transplantation", "Any Condition", names(counts[["totals"]])),
        dxpr_or_tech_count = as.integer(c(counts[["flags"]], counts[["totals"]])),
        dxpr_or_tech_percent = c(percents[["flags"]], percents[["totals"]]),
        dxpr_only_count = NA_integer_,
        dxpr_only_percent = NA_real_,
        tech_only_count = NA_integer_,
        tech_only_percent = NA_real_,
        dxpr_and_tech_count = NA_integer_,
        dxpr_and_tech_percent = NA_real_,
        stringsAsFactors = FALSE
      )
  )

  rownames(rtn) <- NULL

  rtn
}

.charlson_summary <- function(object) {
  if (attr(object, "flag.method") != "current") {
    warning(sprintf("Logic for charlson summary table has been implemented for flag.method = 'current'.  Using this function for flag.method = '%s' may not provide a meaningful summary.", attr(object, "flag.method")))
  }

  cmrbs <- ..mdcr_internal_charlson_index_scores..[!is.na( ..mdcr_internal_charlson_index_scores..[[attr(object, "method")]]), c("condition_description", "condition")]

  cmrbs[["count"]]   <- colSums(object[cmrbs[["condition"]]])
  cmrbs[["percent"]] <- 100 * colMeans(object[cmrbs[["condition"]]])

  num_cmrbs <-
    lapply(seq_len(max(c(1L, object[["num_cmrb"]]))),
      function(x) {
        y <- object[["num_cmrb"]] >= x
        data.frame(
          condition_description = paste(">=", x),
          condition = NA_character_,
          count = sum(y),
          percent = 100 * mean(y),
          stringsAsFactors = FALSE
        )
      })
  num_cmrbs <- do.call(rbind, num_cmrbs)
  cmrbs <- rbind(cmrbs, num_cmrbs)
  rownames(cmrbs) <- NULL

  # set percent to NA instead of NaN
  cmrbs[["percent"]][is.nan(cmrbs[["percent"]])] <- NA_real_

  index_summary <-
    if (length(object[["cci"]]) > 0L) {
      data.frame(
        min       = min(object[["cci"]]),
        q1        = stats::quantile(object[["cci"]], prob = 0.25),
        median    = stats::median(object[["cci"]]),
        q3        = stats::quantile(object[["cci"]], prob = 0.75),
        max       = max(object[["cci"]]),
        row.names = NULL,
        stringsAsFactors = FALSE
      )
    } else {
      data.frame(
        min       = NA_integer_,
        q1        = NA_real_,
        median    = NA_real_,
        q3        = NA_real_,
        max       = NA_integer_,
        row.names = NULL,
        stringsAsFactors = FALSE
      )
    }

  age_summary <-
    merge(
      x = stats::setNames(as.data.frame(table(object[["age_score"]], useNA = "always"), stringsAsFactors = FALSE), c("age_score", "count")),
      y = stats::setNames(as.data.frame(100 * prop.table(table(object[["age_score"]], useNA = "always")), stringsAsFactors = FALSE), c("age_score", "percent")),
      by = c("age_score")
    )

  # set NA instead of NaN
  age_summary[["percent"]][is.nan(age_summary[["percent"]])] <- NA_real_

  list(
    conditions = cmrbs,
    age_summary = age_summary,
    index_summary = index_summary
  )
}

.elixhauser_summary <- function(object) {
  if (attr(object, "flag.method") != "current") {
    warning(sprintf("Logic for Elixhauser summary has been implemented for flag.method = 'current'.  Using this function for flag.method = '%s' may not provide a meaningful summary.", attr(object, "flag.method")))
  }

  cmrbs <- ..mdcr_internal_elixhauser_index_scores..[!is.na( ..mdcr_internal_elixhauser_index_scores..[[attr(object, "method")]]), "condition", drop = FALSE]
  cmrbs <- unique(cmrbs)

  cmrbs[["count"]]   <- colSums(object[cmrbs[["condition"]]])
  cmrbs[["percent"]] <- 100 * colMeans(object[cmrbs[["condition"]]])

  num_cmrbs <-
    lapply(seq_len(max(c(1L, object[["num_cmrb"]]))),
      function(x) {
        y <- object[["num_cmrb"]] >= x
        data.frame(condition = paste(">=", x), count = sum(y), percent = 100 * mean(y), stringsAsFactors = FALSE)
      })
  num_cmrbs <- do.call(rbind, num_cmrbs)
  cmrbs <- rbind(cmrbs, num_cmrbs)
  rownames(cmrbs) <- NULL
  # set percent to NA instead of NaN
  cmrbs[["percent"]][is.nan(cmrbs[["percent"]])] <- NA_real_

  index_summary <-
    if (length(object[["readmission_index"]]) > 0L) {
      data.frame(
        index  = c("readmission", "mortality"),
        min    = c(min(object[["readmission_index"]]),
          min(object[["mortality_index"]])),
        q1     = c(stats::quantile(object[["readmission_index"]], prob = 0.25),
          stats::quantile(object[["mortality_index"]], prob = 0.25)),
        median = c(stats::median(object[["readmission_index"]]),
          stats::median(object[["mortality_index"]])),
        q3     = c(stats::quantile(object[["readmission_index"]], prob = 0.75),
          stats::quantile(object[["mortality_index"]], prob = 0.75)),
        max    = c(max(object[["readmission_index"]]),
          max(object[["mortality_index"]])),
        row.names = NULL,
        stringsAsFactors = FALSE
      )
    } else {
      data.frame(
        index  = c("readmission", "mortality"),
        min    = c(NA_integer_, NA_integer_),
        q1     = c(NA_real_, NA_real_),
        median = c(NA_real_, NA_real_),
        q3     = c(NA_real_, NA_real_),
        max    = c(NA_integer_, NA_integer_),
        row.names = NULL,
        stringsAsFactors = FALSE
      )
    }

  list(
    conditions = cmrbs,
    index_summary = index_summary
  )
}

################################################################################
#                                 End of File                                  #
################################################################################

Try the medicalcoder package in your browser

Any scripts or data that you put into this service are public.

medicalcoder documentation built on Feb. 22, 2026, 5:08 p.m.