R/da_object_oriented_functions.R
In pvda: Disproportionality Functions for Pharmacovigilance

Documented in da print.da summary.da

# 1.0 constructor da ----
#' @title Disproportionality Analysis
#' @description The function \code{da} executes disproportionality analyses,
#' i.e. compares the proportion of reports with a specific adverse event for a drug,
#' against an event proportion from a comparator based on the passed data frame.
#' See the vignette for a brief introduction to disproportionality analysis.
#' Furthermore, \code{da} supports three estimators: Information Component (IC),
#' Proportional Reporting Rate (PRR) and the Reporting Odds Ratio (ROR).
#' @inheritParams add_expected_counts
#' @inheritParams add_disproportionality
#' @inheritParams ror
#' @param number_of_digits Round decimal columns to specified precision, default is two decimals.
#' @param excel_path Intended for users who prefer to work in excel with minimal work in R.
#' To write the output of \code{da} to an excel file, provide a path
#' to a folder. For instance, to write to your current working directory, pass \code{getwd()}.
#'  The excel file will by default be named \code{da.xlsx}. To control the excel file name,
#'  pass a path ending with the desired filename suffixed with \code{.xlsx}. If you
#'  do not want to export the output to an excel file, pass NULL (the default).
#' @param sort_by The output is sorted in descending order of the lower bound of
#'  the confidence/credibility interval for a passed da estimator. Any of the passed strings
#'  in "da_estimators" is accepted, the default is "ic".
#'  If a grouping variable is passed, sorting is made by the sample average across each drug-event-combination (ignoring NAs).
#' @inheritSection add_expected_counts The df object
#' @return \code{da} returns a data frame (invisibly) containing counts and
#' estimates related to supported disproportionality estimators. Each row
#' corresponds to a drug-event pair.
#' @examples
#' ### Run a disproportionality analysis
#'
#' da_1 <-
#'   tiny_dataset |>
#'   da()
#'
#' ### Run a disproportionality across subgroups
#' list_of_colnames <-
#'   list(
#'     report_id = "report_id",
#'     drug = "drug",
#'     event = "event",
#'     group_by = "group"
#'   )
#'
#'  da_2 <-
#'   tiny_dataset |>
#'   da(df_colnames = list_of_colnames)
#'
#' # If columns in your df have different names than the default ones,
#' # you can specify the column names in the df_colnames parameter list:
#'
#' renamed_df <-
#'   tiny_dataset |>
#'   dplyr::rename(ReportID = report_id)
#'
#' list_of_colnames$report_id <- "ReportID"
#'
#' da_3 <-
#'   renamed_df |>
#'   da(df_colnames = list_of_colnames)
#'
#'
#' @export
#' @importFrom checkmate qassert
#' @importFrom dplyr bind_cols select pull slice
#' @importFrom purrr map list_rbind %||%
da <- function(df = NULL,
               df_colnames = list(
                 report_id = "report_id",
                 drug = "drug",
                 event = "event",
                 group_by = NULL
               ),
               da_estimators = c("ic", "prr", "ror"),
               sort_by = "ic",
               number_of_digits = 2,
               rule_of_N = 3,
               conf_lvl = 0.95,
               excel_path = NULL) {

  # If null, set the default values of these parameters
  df_colnames[['report_id']] <-
    df_colnames[['report_id']] %||% "report_id"
  df_colnames[['drug']] <- df_colnames[['drug']] %||% "drug"
  df_colnames[['event']] <- df_colnames[['event']] %||% "event"

  input_params <- list(
    df = df,
    df_colnames = df_colnames,
    da_estimators = da_estimators,
    sort_by = sort_by,
    number_of_digits = number_of_digits,
    rule_of_N = rule_of_N,
    conf_lvl = conf_lvl,
    excel_path = excel_path
  )

  checkmate::qassert(df_colnames$group_by, c("S1", "N1", "0"))
  checkmate::qassert(excel_path, c("S1", "0"))
  df_syms <- lapply(df_colnames, \(x) {
    if (!is.null(x)) {
      rlang::sym(x)
    }
  })

  # ic uses expected counts from rrr
  expected_count_estimators <- gsub("ic", "rrr", da_estimators)

  NULL -> obs -> exp_rrr -> exp_prr -> exp_ror
  assign(df_colnames$report_id, NULL)
  assign(df_colnames$drug, NULL)
  assign(df_colnames$event, NULL)

  if (is.null(df_colnames$group_by)) {
    # No subgrouping provided:
    output <- df |>
      pvda::add_expected_counts(
        df_colnames = df_colnames,
        df_syms = df_syms,
        expected_count_estimators = expected_count_estimators
      ) |>
      pvda::add_disproportionality(
        df_syms = df_syms,
        da_estimators = da_estimators,
        conf_lvl = conf_lvl,
        rule_of_N = rule_of_N
      ) |>
      round_and_sort_by_lower_da_limit(
        df_colnames = df_colnames,
        df_syms = df_syms,
        conf_lvl = conf_lvl,
        sort_by = sort_by,
        da_estimators = da_estimators,
        number_of_digits = number_of_digits
      )
  } else {
    if (!df_colnames$group_by %in% colnames(df)) {
      stop("Passed grouping column name '",
           df_colnames$group_by,
           "' not found in passed df.")
    }

    drug <- rlang::sym(df_colnames$drug)
    event <- rlang::sym(df_colnames$event)
    group_by <- rlang::sym(df_colnames$group_by)

    # When subgroups are provided:
    output <- df |>
      split(f = df[[df_colnames$group_by]]) |>
      purrr::map(
        grouped_da,
        df_colnames = df_colnames,
        df_syms = df_syms,
        expected_count_estimators = expected_count_estimators,
        da_estimators = da_estimators,
        conf_lvl = conf_lvl,
        rule_of_N = rule_of_N,
        number_of_digits = number_of_digits
      ) |>
      purrr::list_rbind() |>
      dplyr::select(
        !!drug,!!event,!!group_by,
        obs,
        exp_rrr,
        dplyr::starts_with("ic"),
        exp_prr,
        dplyr::starts_with("prr"),
        exp_ror,
        dplyr::starts_with("ror"),
        everything()
      ) |>
      round_and_sort_by_lower_da_limit(
        df_colnames = df_colnames,
        df_syms = df_syms,
        conf_lvl = conf_lvl,
        sort_by = sort_by,
        da_estimators = da_estimators,
        number_of_digits = number_of_digits
      )
  }

  # If excel path is non-NULL
  write_to_excel(output, excel_path)

  output_list <- list("da_df" = output,
                      "input_params" = input_params)
  class(output_list) <- "da"

  return(invisible(output_list))
}

# 1.1 summary.da ----

#' @title Summary function for disproportionality objects
#' @description Provides summary counts of SDRs and shows the top five DECs
#' @param object A S3 obj of class "da", output from \code{pvda::da()}.
#' @param print Do you want to print the output to the console. Defaults to TRUE.
#' @param ... For passing additional parameters to extended classes.
#' @return Passes a tibble with the SDR counts invisibly.
#' @export
#' @importFrom stringr str_subset regex str_which str_replace
#' @importFrom tidyr separate_wider_delim unite
#' @importFrom dplyr group_by summarise pull mutate count distinct slice_head across all_of select join_by
#' @importFrom purrr pluck
#' @importFrom rlang sym
#' @importFrom tibble tibble as_tibble
#' @importFrom cli col_blue col_grey
#' @importFrom stats setNames
summary.da <- function(object, print = TRUE, ...) {
  # object <- drug_event_df |> da(df_colnames=list(group_by="group"))

  NULL -> drug -> event -> da_estimator -> exp_ror -> exp_prr ->
    conf_lvl_digits -> lower_bound_da

  input_params <- object$input_params
  conf_lvl <- input_params$conf_lvl
  da_estimators <- input_params$da_estimators

  quant_prob_list <-
    conf_lvl |>
    conf_lvl_to_quantile_prob()

  lower_bound_column_names <- split(da_estimators, da_estimators) |>
    lapply(\(x) {
      as.character(build_colnames_da(quant_prob_list, x)[[1]])
    }) |>
    purrr::list_c()

  # exp on the ic bound to have the same threshold value across all estimators
  ic_index <- stringr::str_which(lower_bound_column_names, "ic")
  ic_col <- lower_bound_column_names[ic_index]
  ic_col_sym <- rlang::sym(ic_col)
  ic_exp_col <- lower_bound_column_names[ic_index]
  ic_exp_sym <- rlang::sym(ic_exp_col)

  lower_bound_column_names_w_2 <-
    lower_bound_column_names |>
    stringr::str_replace(ic_col, ic_exp_col)

  name_of_total_col = "Total drug-event combination count"
  group_is_null = is.null(object$input_params$df_colnames$group_by)

  # If group_by is NULL
  if (group_is_null) {
    da_summary_counts <-
      object |>
      purrr::pluck("da_df") |>
      dplyr::mutate(!!ic_exp_sym := 2 ^ (!!ic_col_sym)) |>
      dplyr::summarise(across(all_of(lower_bound_column_names_w_2), \(x) {
        sum(1 < x, na.rm = TRUE)
      }))

    # Put together an overview count table

    # Count total N of DECs in object
    N_DECs <-
      object$da_df |>
      dplyr::count() |>
      as.numeric()
    total_df <-
      tibble::tibble("Name" = name_of_total_col, "Stat sign" = N_DECs)

    output <- tibble::tibble("Name" = colnames(da_summary_counts),
                             "Stat sign" = da_summary_counts[1,] |>
                               as.numeric()) |>
      dplyr::bind_rows(total_df)

    # If group_by is not NULL
  } else {
    group_colname <-
      rlang::sym(object$input_params$df_colnames$group_by)

    da_summary_counts <-
      object |>
      purrr::pluck("da_df") |>
      dplyr::mutate(!!ic_exp_sym := 2 ^ (!!ic_col_sym)) |>
      dplyr::group_by(!!group_colname) |>
      dplyr::summarise(across(all_of(lower_bound_column_names_w_2), \(x) {
        sum(1 < x, na.rm = TRUE)
      }))

    # Count total N of DECs in object
    N_DECs <-
      object$da_df |>
      dplyr::group_by(!!group_colname) |>
      dplyr::count()

    output <- da_summary_counts |>
      dplyr::left_join(N_DECs, dplyr::join_by(!!group_colname)) |>
      t() |>
      tibble::as_tibble(.name_repair = "minimal")

    output <- as.data.frame(output)
    rownames(output) = c(group_colname,
                         unlist(lower_bound_column_names_w_2),
                         name_of_total_col)

  }

  # The number of printed columns need to depend on da_estimators, in case
  # not all da estimators are used
  N_da_estimators <- length(da_estimators)
  end_of_print_df <- 3 + 4 * N_da_estimators

  # Add an extra column if a group_by-column was passed
  add_extra_col <- !is.null(input_params$df_colnames$group_by)
  if (add_extra_col) {
    end_of_print_df <- end_of_print_df + 1
  }

  if (print) {
    message(cli::col_blue("Summary of Disproportionality Analysis \nNumber of SDRs \n  "))
    print(output, row.names = !group_is_null) #Row names only needed with subgroups

    message(c("\n", cli::col_blue(paste0(
      "Top disproportionate DECs"
    ))))
    print(object, ...)
  }

  return(invisible(output))
}

# 1.2 print.da ----
#' print function for da objects
#' @param x A S3 obj of class "da", output from \code{pvda::da()}.
#' @param n Control the number of rows to print.
#' @inheritParams summary.da
#' @return Nothing, but prints the tibble da_df in the da object.
#' @export
#' @examples
#'
#' da_1 <-
#' tiny_dataset |>
#' da()
#' print(da_1)
#' @importFrom purrr flatten
#' @importFrom stringr str_detect
print.da <- function(x, n = 10, ...) {
  # x <- drug_event_df |> da()
  # Prevent pillar package from printing negative numbers in red
  # and set the number of digits to the da-specified integer.
  old_neg_option <- getOption("pillar.neg")
  old_sigfig_option <- getOption("pillar.sigfig")
  options(pillar.sigfig = x$input_params$number_of_digits)
  options(pillar.neg = FALSE)
  on.exit({options("pillar.neg" = old_neg_option)
          options("pillar.sigfig" = old_sigfig_option)})

  # The following code could benefit from using the OO of the pillar package, but
  # this is not priority right now.
  quantile_prob_list <- x$input_params$conf_lvl |>
    conf_lvl_to_quantile_prob()

  column_group <- list()
  da_estimators <- x$input_params$da_estimators
  exp_estimators <-
    da_estimators |> stringr::str_replace("ic", "rrr")

  for (i in seq_along(da_estimators)) {
    column_group[[i]] <-
      build_colnames_da(quantile_prob_list, da_name = da_estimators[i])
  }

  # This should only be done when the last column to colour is present, otherwise
  # glue_data will throw an error. An interactive version provided for debugging.
  if(!interactive()){
  printed_by_default_tbl <-
    utils::capture.output(print(x$da_df, n = n, ...))
  } else {
  printed_by_default_tbl <-
  utils::capture.output(print(x$da_df, n = n))
  }

  column_names <- column_group |> purrr::flatten()
  end_column_names <- column_names[c(2, 4, 6)] |> as.character()

  all_end_column_names_present <-
    all(stringr::str_detect(printed_by_default_tbl[[2]], end_column_names))
  cg <- list()
  start_colour_here <- list()

  # This code could probably be compressed
  if (all_end_column_names_present) {
    for (i in seq_along(column_group)) {
      # i = 1
      cg[[i]] <- column_group[[i]]
      col <- c("red", "green", "magenta")[i]

      start_colour_here[seq_along(column_group)] <-
        suppressWarnings(stringr::fixed(paste0("exp_", exp_estimators)))
      start_with_colour_inserted <-
        suppressWarnings(stringr::fixed(paste0("{", col, " {start_colour_here[[", i, "]]}")))

      # Open with a colour {
      printed_by_default_tbl[2] <- stringr::str_replace(printed_by_default_tbl[2],
                                                        start_colour_here[[i]],
                                                        start_with_colour_inserted)

      # Close the colour }
      printed_by_default_tbl[2] <- stringr::str_replace(
        printed_by_default_tbl[2],
        suppressWarnings(stringr::fixed(cg[[i]]$upper)),
        stringr::fixed(paste0("{cg[[", i, "]]$upper}}"))
      )

      # The insertion of the color codes shifts the da estimator column header
      #  slightly, so we adjust:
      add_this_number_of_spaces <-
        c(0, rep(1, length(da_estimators) - 1))

      printed_by_default_tbl[2] <- stringr::str_replace(
        printed_by_default_tbl[2],
        stringr::fixed(
          paste0(da_estimators[i], add_this_number_of_spaces[i], collapse = "")
        ),
        stringr::fixed(da_estimators[i])
      )
    }
  } else {
    message(
      "Your console width is too narrow to support coloured printouts, reverting to non-coloured output."
    )
  }

  # Second and last two rows should be in grey
  last_two_rows <- length(printed_by_default_tbl) - 1

  for (i in seq_along(printed_by_default_tbl)) {
    # Make first row blue
    if (i %in% 1) {
      message(cli::col_blue(printed_by_default_tbl[i]))
    } else {
      print(glue::glue_col(printed_by_default_tbl[i]))
    }
  }

  # Print a helpful message about the sorting at the end
  sorted_by_arg <- x$input_params$sort_by
  sorted_by_message <- paste0("(Drug-event-combinations sorted according to lower bound of ",
                              sorted_by_arg,
                              ")")

  message(cli::col_grey(paste0(
    sorted_by_message, " \n", "# Printing x[['da_df']]"
  )))
}