R/prevalence.R

Defines functions nordcanstat_year_based_prevalent_patient_count

Documented in nordcanstat_year_based_prevalent_patient_count

#' @title Prevalence
#' @description
#' Compute numbers of people alive at specific time points who have had
#' a cancer diagnosis in their past.
#' @template param_x
#' @template param_by
#' @template param_entities
#' @param observation_years `[NULL, integer]` (optional, default `NULL`)
#'
#' - `NULL`: NORDCAN default set of years is used
#' - `integer`: only observe prevalence at the ends of these years; see
#'   [basicepistats::stat_year_based_prevalent_subject_count] for more
#'   information
#' @param subset see [basicepistats::stat_year_based_prevalent_subject_count]
#' @param subset_style see [basicepistats::stat_year_based_prevalent_subject_count]
#' @export
#' @family nordcanstat
nordcanstat_year_based_prevalent_patient_count <- function(
  x,
  by = NULL,
  entities = NULL,
  observation_years = NULL,
  subset = NULL,
  subset_style = "zeros"
) {
  # just to ensure settings are set
  nordcancore::get_global_nordcan_settings()

  settings <- nordcanstat_settings("nordcanstat_prevalent_subject_count")
  col_nms <- unlist(settings[
    c("entry_year_col_nm", "exit_year_col_nm", "subject_id_col_nm")
  ])
  col_nms <- union("vit_sta", col_nms)
  dbc::assert_prod_input_is_data.table_with_required_names(
    x,
    required_names = col_nms
  )
  dbc::assert_prod_input_is_one_of(
    x = observation_years,
    funs = list(dbc::report_is_NULL, 
                dbc::report_is_integer_nonNA_vector)
  )
  arg_list <- c(mget(c("x", "by", "subset", "subset_style")), settings)
  if (!is.null(observation_years)) {
    arg_list[["observation_years"]] <- observation_years
  }

  # ensure that prevalence is calculated correctly for the last year used in
  # NORDCAN --- having the exit year of those who were in follow-up at the end
  # of the study period to be larger than last year of observation ensures
  # that they count towards prevalent subjects in the last year of observation.
  # see ?basicepistats::stat_year_based_prevalent_subject_count.
  # the way this is done here avoids taking a copy of any other data, only the
  # year of exit is copied.
  exit_year_col_nm <- settings[["exit_year_col_nm"]]
  use_x <- data.table::setDT(lapply(names(x), function(col_nm) {
    col <- x[[col_nm]]
    if (col_nm == exit_year_col_nm) {
      col <- col + 0L
      last_year <- nordcancore::get_global_nordcan_settings()[["last_year_incidence"]]
      survived_last_year <- x[["vit_sta"]] == 1L & col == last_year
      col[survived_last_year] <- max(last_year) + 1L
    }
    col
  }))
  data.table::setnames(use_x, names(use_x), names(x))
  arg_list[["x"]] <- use_x

  dt <- nordcanstat_by_entity(
    entities = entities,
    arg_list = arg_list,
    basicepistats_fun = basicepistats::stat_year_based_prevalent_subject_count,
    loop_over = "entity_numbers"
  )

  info <- nordcancore::nordcan_metadata_participant_info()
  if (is.character(by) && "region" %in% by && info[["has_sub_regions"]]) {
    arg_list[["by"]] <- setdiff(arg_list[["by"]], "region")
    if (length(arg_list[["by"]]) == 0L) {
      arg_list[["by"]] <- NULL
    }
    dt_topregion <- nordcanstat_by_entity(
      entities = entities,
      arg_list = arg_list,
      basicepistats_fun = basicepistats::stat_year_based_prevalent_subject_count,
      loop_over = "entity_numbers"
    )
    topregion_number <- info[["topregion_number"]]
    dt_topregion[, "region" := topregion_number]
    data.table::setcolorder(dt_topregion, names(dt))
    ## remove the count for top region in dt.
    dt <- dt[dt$region != topregion_number, ]
    dt <- rbind(dt, dt_topregion)
    data.table::setcolorder(dt, by)
    data.table::setkeyv(dt, by)
  }

  dt <- remove_regional_counts_before_start_year(
    dt, year_col_nm = "observation_year", prevalence = TRUE
  )

  if (is.character(by)) {
    data.table::setcolorder(dt, by)
    data.table::setkeyv(dt, by)
  } else {
    data.table::setkeyv(dt, setdiff(names(dt), "N"))
  }
  data.table::setnames(dt, "N", "prevalent_patient_count")
  return(dt[])
}
CancerRegistryOfNorway/nordcanepistats documentation built on June 3, 2024, 5:41 p.m.