R/lookup_table.R

Defines functions lookup_table

#' @param icd_codes Vector of ICD codes
#' @param ref_df Data.frame with use to look up
#' @param data_col Single string matching the name of the column in
#'   `ref_df` that should contain the vector of codes `icd_codes`
#' @param one_to_one Defaults to `TRUE`, where it assumes for each
#'   code it is given, one row should be returned
#'
#' @importFrom rlang !!
#' @importFrom rlang :=
#' @keywords internal
lookup_table <- function(icd_codes, ref_df, data_col, one_to_one=TRUE){

  ### Verify the types are correct
  stopifnot(rlang::inherits_any(ref_df, "data.frame"))
  stopifnot(data_col %in% names(ref_df))

  if(is.factor(icd_codes)) {
    rlang::warn("ICD codes should be characters, not factors")
    icd_codes <- as.character(icd_codes)
  }

  ## If all given values are NA, warn and don't look anything up
  if(all(is.na(icd_codes))){
    msg <- glue::glue("All of the values in ",
                      "{rlang::expr_label(base::substitute(icd_codes))} ",
                      "were `NA`. Returning NA")
    rlang::warn(msg)
    return(icd_codes)
  }


  # ## If was given a single value, use a filter, otherwise,
  # ## look up using left-join
  # if(length(icd_codes)==1) {
  #   # not sure if this actually helps with speed...
  #   df <- ref_df[ref_df[[data_col]]==icd_codes,]
  #
  # } else({
  #   d <- data.frame(x = icd_codes)
  #   names(d)[1] <- data_col
  #   df <- dplyr::left_join(d, ref_df, by = data_col)
  # })

  ## Use left-join to preserve the vector of ICD codes in
  ## the same order as the original, even if nothing shows
  ## up
  d <- data.frame(x = icd_codes)
  names(d)[1] <- data_col
  df <- dplyr::left_join(d, ref_df, by = data_col)

  if(one_to_one){
    # Use waldo to check that new df matches old
    comp <- waldo::compare(icd_codes, df[[data_col]],
                           x_arg = "ICD codes provided",
                           y_arg = "Matches in lookup table")

    # warn if not matching
    if(length(comp)>0){
      msg <- glue::glue("The look up table didn't come back right ",
                        "when they were cross-referenced to the ",
                        "{rlang::expr_label(base::substitute(data_col))} ",
                        "column of the ",
                        "{rlang::expr_label(base::substitute(ref_df))}\n\n",
                        '{paste0(comp, collapse = "\n\n")}')
      rlang::warn(msg)
    }
  }

  df
}
HunterRatliff1/hcup documentation built on Aug. 6, 2023, 6:10 p.m.