R/Table1.R

Defines functions createCmTable1 getDefaultCmTable1Specifications

Documented in createCmTable1 getDefaultCmTable1Specifications

# @file Table1.R
#
# Copyright 2024 Observational Health Data Sciences and Informatics
#
# This file is part of CohortMethod
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' Get the default table 1 specifications
#'
#' @description
#' Loads the default specifications for a table 1, to be used with the \code{\link{createTable1}}
#' function.
#'
#' Important: currently only works for binary covariates.
#'
#' @return
#' A specifications objects.
#'
#' @export
getDefaultCmTable1Specifications <- function() {
  fileName <- system.file("csv", "Table1Specs.csv", package = "CohortMethod")
  colTypes <- list(
    label = readr::col_character(),
    analysisId = readr::col_integer(),
    covariateIds = readr::col_character()
  )
  # Workaround for issue https://github.com/tidyverse/vroom/issues/519:
  readr::local_edition(1)
  specifications <- readr::read_csv(fileName, col_types = colTypes)
  return(specifications)
}

#' Create a table 1
#'
#' @description
#' Creates a formatted table of cohort characteristics, to be included in publications or reports.
#'
#' @param balance                 A data frame created by the \code{computeCovariateBalance} function.
#' @param specifications          Specifications of which covariates to display, and how.
#' @param beforeTargetPopSize     The number of people in the target cohort before matching/stratification/trimming,
#'                                to mention in the table header. If not provide, no number will be included in the header.
#' @param beforeComparatorPopSize The number of people in the comparator cohort before matching/stratification/trimming,
#'                                to mention in the table header. If not provide, no number will be included in the header.
#' @param afterTargetPopSize      The number of people in the target cohort after matching/stratification/trimming,
#'                                to mention in the table header. If not provide, no number will be included in the header.
#' @param afterComparatorPopSize  The number of people in the comparator cohort after matching/stratification/trimming,
#'                                to mention in the table header. If not provide, no number will be included in the header.
#' @param beforeLabel             Label for identifying columns before matching / stratification / trimming.
#' @param afterLabel              Label for identifying columns after matching / stratification / trimming.
#' @param targetLabel             Label for identifying columns of the target cohort.
#' @param comparatorLabel         Label for identifying columns of the comparator cohort.
#' @param percentDigits           Number of digits to be used for percentages.
#' @param stdDiffDigits           Number of digits to be used for the standardized differences.
#'
#' @return
#' A data frame with the formatted table 1.
#'
#' @export
createCmTable1 <- function(balance,
                           specifications = getDefaultCmTable1Specifications(),
                           beforeTargetPopSize = NULL,
                           beforeComparatorPopSize = NULL,
                           afterTargetPopSize = NULL,
                           afterComparatorPopSize = NULL,
                           beforeLabel = "Before matching",
                           afterLabel = "After matching",
                           targetLabel = "Target",
                           comparatorLabel = "Comparator",
                           percentDigits = 1,
                           stdDiffDigits = 2) {
  errorMessages <- checkmate::makeAssertCollection()
  checkmate::assertDataFrame(balance, null.ok = TRUE, add = errorMessages)
  checkmate::assertDataFrame(specifications, null.ok = TRUE, add = errorMessages)
  checkmate::assertInt(beforeTargetPopSize, null.ok = TRUE, lower = 0, add = errorMessages)
  checkmate::assertInt(beforeComparatorPopSize, null.ok = TRUE, lower = 0, add = errorMessages)
  checkmate::assertInt(afterTargetPopSize, null.ok = TRUE, lower = 0, add = errorMessages)
  checkmate::assertInt(afterComparatorPopSize, null.ok = TRUE, lower = 0, add = errorMessages)
  checkmate::assertCharacter(beforeLabel, len = 1, add = errorMessages)
  checkmate::assertCharacter(afterLabel, len = 1, add = errorMessages)
  checkmate::assertCharacter(targetLabel, len = 1, add = errorMessages)
  checkmate::assertCharacter(comparatorLabel, len = 1, add = errorMessages)
  checkmate::assertInt(percentDigits, lower = 0, add = errorMessages)
  checkmate::assertInt(stdDiffDigits, lower = 0, add = errorMessages)
  checkmate::reportAssertions(collection = errorMessages)

  fixCase <- function(label) {
    idx <- (toupper(label) == label)
    if (any(idx)) {
      label[idx] <- paste0(
        substr(label[idx], 1, 1),
        tolower(substr(label[idx], 2, nchar(label[idx])))
      )
    }
    return(label)
  }

  formatPercent <- function(x) {
    result <- format(round(100 * x, percentDigits), digits = percentDigits + 1, justify = "right")
    result <- gsub("NA", "", result)
    result <- gsub(" ", " ", result)
    return(result)
  }

  formatStdDiff <- function(x) {
    result <- format(round(x, stdDiffDigits), digits = stdDiffDigits + 1, justify = "right")
    result <- gsub("NA", "", result)
    result <- gsub(" ", " ", result)
    return(result)
  }

  resultsTable <- data.frame()
  for (i in 1:nrow(specifications)) {
    if (is.na(specifications$analysisId[i])) {
      resultsTable <- rbind(
        resultsTable,
        data.frame(Characteristic = specifications$label[i], value = "")
      )
    } else {
      idx <- balance$analysisId == specifications$analysisId[i]
      if (any(idx)) {
        if (!is.na(specifications$covariateIds[i])) {
          covariateIds <- as.numeric(strsplit(as.character(specifications$covariateIds[i]), ";")[[1]])
          idx <- balance$covariateId %in% covariateIds
        } else {
          covariateIds <- NULL
        }
        if (any(idx)) {
          balanceSubset <- balance[idx, ]
          if (is.null(covariateIds)) {
            balanceSubset <- balanceSubset[order(balanceSubset$covariateId), ]
          } else {
            balanceSubset <- merge(balanceSubset, data.frame(
              covariateId = covariateIds,
              rn = 1:length(covariateIds)
            ))
            balanceSubset <- balanceSubset[order(
              balanceSubset$rn,
              balanceSubset$covariateId
            ), ]
          }
          balanceSubset$covariateName <- fixCase(gsub(
            "^.*: ",
            "",
            balanceSubset$covariateName
          ))
          if (is.na(specifications$covariateIds[i]) || length(covariateIds) > 1) {
            resultsTable <- rbind(resultsTable, data.frame(
              Characteristic = specifications$label[i],
              beforeMatchingMeanTarget = NA,
              beforeMatchingMeanComparator = NA,
              beforeMatchingStdDiff = NA,
              afterMatchingMeanTarget = NA,
              afterMatchingMeanComparator = NA,
              afterMatchingStdDiff = NA,
              stringsAsFactors = FALSE
            ))
            resultsTable <- rbind(
              resultsTable,
              data.frame(
                Characteristic = paste0("  ", balanceSubset$covariateName),
                beforeMatchingMeanTarget = balanceSubset$beforeMatchingMeanTarget,
                beforeMatchingMeanComparator = balanceSubset$beforeMatchingMeanComparator,
                beforeMatchingStdDiff = balanceSubset$beforeMatchingStdDiff,
                afterMatchingMeanTarget = balanceSubset$afterMatchingMeanTarget,
                afterMatchingMeanComparator = balanceSubset$afterMatchingMeanComparator,
                afterMatchingStdDiff = balanceSubset$afterMatchingStdDiff,
                stringsAsFactors = FALSE
              )
            )
          } else {
            resultsTable <- rbind(resultsTable, data.frame(
              Characteristic = specifications$label[i],
              beforeMatchingMeanTarget = balanceSubset$beforeMatchingMeanTarget,
              beforeMatchingMeanComparator = balanceSubset$beforeMatchingMeanComparator,
              beforeMatchingStdDiff = balanceSubset$beforeMatchingStdDiff,
              afterMatchingMeanTarget = balanceSubset$afterMatchingMeanTarget,
              afterMatchingMeanComparator = balanceSubset$afterMatchingMeanComparator,
              afterMatchingStdDiff = balanceSubset$afterMatchingStdDiff,
              stringsAsFactors = FALSE
            ))
          }
        }
      }
    }
  }
  resultsTable$beforeMatchingMeanTarget <- formatPercent(resultsTable$beforeMatchingMeanTarget)
  resultsTable$beforeMatchingMeanComparator <- formatPercent(resultsTable$beforeMatchingMeanComparator)
  resultsTable$beforeMatchingStdDiff <- formatStdDiff(resultsTable$beforeMatchingStdDiff)
  resultsTable$afterMatchingMeanTarget <- formatPercent(resultsTable$afterMatchingMeanTarget)
  resultsTable$afterMatchingMeanComparator <- formatPercent(resultsTable$afterMatchingMeanComparator)
  resultsTable$afterMatchingStdDiff <- formatStdDiff(resultsTable$afterMatchingStdDiff)

  headerRow <- as.data.frame(t(rep("", ncol(resultsTable))))
  colnames(headerRow) <- colnames(resultsTable)
  headerRow$beforeMatchingMeanTarget <- targetLabel
  headerRow$beforeMatchingMeanComparator <- comparatorLabel
  headerRow$afterMatchingMeanTarget <- targetLabel
  headerRow$afterMatchingMeanComparator <- comparatorLabel

  subHeaderRow <- as.data.frame(t(rep("", ncol(resultsTable))))
  colnames(subHeaderRow) <- colnames(resultsTable)
  subHeaderRow$Characteristic <- "Characteristic"
  if (missing(beforeTargetPopSize)) {
    subHeaderRow$beforeMatchingMeanTarget <- "%"
  } else {
    subHeaderRow$beforeMatchingMeanTarget <- paste0("% (n = ", format(beforeTargetPopSize, big.mark = ","), ")")
  }
  if (missing(beforeComparatorPopSize)) {
    subHeaderRow$beforeMatchingMeanComparator <- "%"
  } else {
    subHeaderRow$beforeMatchingMeanComparator <- paste0("% (n = ", format(beforeComparatorPopSize, big.mark = ","), ")")
  }
  subHeaderRow$beforeMatchingStdDiff <- "Std. diff"
  if (missing(afterTargetPopSize)) {
    subHeaderRow$afterMatchingMeanTarget <- "%"
  } else {
    subHeaderRow$afterMatchingMeanTarget <- paste0("% (n = ", format(afterTargetPopSize, big.mark = ","), ")")
  }
  if (missing(afterComparatorPopSize)) {
    subHeaderRow$afterMatchingMeanComparator <- "%"
  } else {
    subHeaderRow$afterMatchingMeanComparator <- paste0("% (n = ", format(afterComparatorPopSize, big.mark = ","), ")")
  }
  subHeaderRow$afterMatchingStdDiff <- "Std. diff"
  resultsTable <- rbind(headerRow, subHeaderRow, resultsTable)
  colnames(resultsTable) <- rep("", ncol(resultsTable))
  colnames(resultsTable)[2] <- beforeLabel
  colnames(resultsTable)[5] <- afterLabel
  return(resultsTable)
}
OHDSI/CohortMethod documentation built on Oct. 9, 2024, 12:50 p.m.