R/utils.R

Defines functions find_diag

Documented in find_diag

# make the icd10cm regex

make_regex <- purrr::compose(
  function(x) gsub("X", "^X", x),
  function(x) paste(x, collapse = "|"),
  function(x) gsub("(?<!^)x", ".", x, ignore.case = T, perl = T)
)

# cleaning the names for mechanism

clean_mech_names <- purrr::compose(
  # remove repeat "_" and extreme "_"
  function(x) gsub("(_)(?=_*\\1)|^_|_$", "", x, perl = T),
  # not [A-Za-z0-9_] and replace with "_"
  function(x) gsub("\\W", "_", x),
  # parenthesis and its contents
  function(x) gsub("\\(.+\\)", "", x)
)

# finding matches in multiple fields

#' Create a new variable based on pattern in the argument expr
#'
#' @param data input data
#' @param expr regular expression describing the pattern of interest
#' @param colvec indices of variables of interest
#' @param ignore.case logical
#' @param perl logical
#'
#' @return new variable matching the pattern described in the regular expression
#' @export
#' @importFrom purrr flatten_dbl map_dfr
#'
#' @examples
#'
#' library(dplyr)
#' library(purrr)
#' icd10cm_data150 %>%
#'   mutate(hero = find_diag(., expr = "T401.[1-4]", colvec = c(2:6))) %>%
#'   count(hero)
find_diag <- function(data, expr, colvec, ignore.case = T, perl = T) {

  requireNamespace("dplyr", quietly = T)
  # assign '1' if the regular expression matched
  f1 <- function(x) grepl(expr, x, ignore.case = ignore.case, perl = perl)
  # any 1 in the diagnosis field suffices
  f2 <- function(x) {
    sign(rowSums(x, na.rm = TRUE))
  }

  data %>% as_tibble() %>%
    select({{colvec}}) %>%
    mutate_all(as.character) %>%
    purrr::map_dfr(f1) %>%
    transmute(new_diag = f2(.)) %>%
    flatten_dbl()
}


# a row operation that will form a vector of the first match of a pattern.

#' a row operation that will form a vector of the first match of a pattern.
#'
#'
#' @param data input data
#' @param colvec selected columns to match
#' @param pattern the pattern to match

#' @return return the vector of the matched characters with NA for a no match
#' @export
#' @importFrom purrr transpose detect map map_if
#' @examples
#' dat <- data.frame(x1 = letters[1:3], x2 = c("d", "a", "e"))
#' library(dplyr)
#' library(purrr)
#' dat %>% mutate(x3 = first_valid_regex(., colvec = c(1:2), pattern = "a"))
#'
first_valid_regex <- function(data, colvec, pattern) {

  requireNamespace("dplyr", quietly = T)
  requireNamespace("purrr", quietly = T)

  f0 <- function(x) grepl(pattern = pattern, x, ignore.case = T, perl = T)
  f1 <- function(x) detect(x, f0)
  data %>%
    select({{colvec}}) %>%
    map_dfr(as.character) %>%
    transpose() %>%
    map(f1) %>%
    map_if(is.null, ~NA_character_) %>%
    unlist()
}

# a row operation that will form an index vector of the first match of a pattern

first_valid_index <- function(data, colvec, pattern) {

  requireNamespace("dplyr", quietly = T)
  requireNamespace("purrr", quietly = T)

  f0 <- function(x) grepl(pattern = pattern, x, ignore.case = T, perl = T)
  f1 <- function(x) purrr::detect_index(x, f0)
  data %>%
    select({{colvec}}) %>%
    purrr::map_dfr(as.character) %>%
    purrr::transpose() %>%
    purrr::map_int(f1)
}

# valid external cause
icd10cm__external_cause_ <- "(^[VWX]\\d....|(?!(Y0[79]))Y[0-3]....|Y07.{1,3}|Y09|(T3[679]9|T414|T427|T4[3579]9)[1-4].|(?!(T3[679]9|T414|T427|T4[3579]9))(T3[6-9]|T4[0-9]|T50)..[1-4]|T1491.{0,1}|(T1[5-9]|T5[1-9]|T6[0-5]|T7[1346])...|T75[0-3]..)(A|$)"

#' Find records with valid external causes of injury icd-10-cm.
#'
#' @param data input data
#' @param diag_ecode_col column indices
#'
#' @return valid_external, a binary variable indicating whether the record has (value = 1) a valid external cause of injury icd-10-cm code
#' @export
#'
#' @examples
#'
#' library(dplyr)
#' library(purrr)
#' set.seed(5)
#' icd10cm_data150 %>%
#' matrix_valid_external(diag_ecode_col = c(2:6)) %>%
#' sample_n(10)
matrix_valid_external <- function(data, diag_ecode_col) {

requireNamespace("dplyr", quietly = T)

icd10cm__external_cause_ <- "(^[VWX]\\d....|(?!(Y0[79]))Y[0-3]....|Y07.{1,3}|Y09|(T3[679]9|T414|T427|T4[3579]9)[1-4].|(?!(T3[679]9|T414|T427|T4[3579]9))(T3[6-9]|T4[0-9]|T50)..[1-4]|T1491.{0,1}|(T1[5-9]|T5[1-9]|T6[0-5]|T7[1346])...|T75[0-3]..)(A|$)"

data %>%
  mutate(valid_external = find_diag(.,
                                       expr = icd10cm__external_cause_,
                                       colvec = diag_ecode_col))
}

# first_match ---------------------------------------------------------

#' a row operation that will form a vector of the first match of a pattern.
#'
#'
#' @param data input data
#' @param new_name proposed name for the new variable
#' @param colvec selected columns to match
#' @param pattern the pattern to match

#' @return return the vector of the matched characters with NA for a no match
#'
#' @export
#' @importFrom purrr detect
#'
#' @examples
#'
#' dat <- data.frame(x1 = letters[1:3], x2 = c("d", "a", "e"))
#' library(dplyr)
#' library(purrr)
#' dat %>% first_match(new_name = "x3", colvec = c(1:2), pattern = "a")
#'
first_match <- function(data, new_name, colvec, pattern) {

  requireNamespace("dplyr", quietly = T)
  requireNamespace("purrr", quietly = T)

  # colvec <- enquo(colvec)
  f0 <- function(x) grepl(pattern = pattern, x, ignore.case = T, perl = T)
  f1 <- function(x) detect(x, f0, .default = NA_character_)
  data %>%
    rowwise() %>%
    mutate({{new_name}} := f1(c_across({{colvec}})))
}


# first_match_index ---------------------------------------------------

#' Row operation that creates a vector of indices of the first match of a pattern
#'
#' @param data input data
#'
#' @param colvec selected columns to match
#' @param pattern the pattern to match
#' @param new_name proposed name for the new variable
#' @return return the vector of the indices of the matches with 0 for no match
#' @export
#' @importFrom purrr detect_index
#'
#' @examples
#'
#' dat <- data.frame(x1 = letters[1:3], x2 = c("d", "a", "e"))
#' library(dplyr)
#' library(purrr)
#' dat %>% first_match_index(new_name = "x3", colvec = c(1:2), pattern = "a")
#'
first_match_index <- function(data, new_name, colvec, pattern) {

  requireNamespace("dplyr", quietly = T)
  requireNamespace("purrr", quietly = T)

  f0 <- function(x) grepl(pattern = pattern, x, ignore.case = T, perl = T)
  f1 <- function(x) detect_index(x, f0)
  data %>%
    rowwise() %>%
    mutate({{new_name}} := f1(c_across({{colvec}})))
}



# create_indicator ----------------------------------------------------

#' Create a new indicator based on pattern in the argument expr
#'
#' @param data input data
#' @param new_name proposed name for the indicator
#' @param expr regular expression describing the pattern of interest
#' @param colvec indices or names of variables (where are the pattern) without quotes
#' @param ignore.case logical
#' @param perl logical
#'
#' @return new indicator matching the pattern described in the regular expression
#' @export
#' @importFrom dplyr if_any
#'
#' @examples
#'
#' library(dplyr)
#' library(injurymatrix)
#' icd10cm_data150 %>%
#'   create_indicator(new_name = "hero", expr = "T401.[1-4]", colvec = c(2:6))) %>%
#'   count(hero)
#'
create_indicator <- function(data, new_name, expr,
                                 colvec, ignore.case = T, perl = T) {
  requireNamespace("dplyr", quietly = T)

  data %>%
    mutate({{new_name}} := case_when(
      if_any({{colvec}},
             function(x) grepl(expr, x, ignore.case = ignore.case, perl = perl)) ~ 1,
      TRUE ~ 0))
}
epinotes/injurymatrix documentation built on July 28, 2022, 12:34 a.m.