R/identify_icd.R

Defines functions identify_icd_multiple identify_icd

Documented in identify_icd identify_icd_multiple

#' Find diagnosis using ICD-codes
#'
#' This function allows you to find a diagnosis based on ICD-codes. Creates a new column based on the ICD-codes.
#' @param .data A tibble with data in QTF-format.
#' @param icd A character vector (or scalar) with ICD-codes that should be identified in the data.
#' @param new_variable A character string with the column name for the new variable.
#' @param diag_level A character string to decide the diagnosis level. Options are "primary", "secondary" or "both". Defaults to "both".
#' @param register A character string to decide which NPR register to be used. Options are "inpatient", "outpatient", or "both. Defaults to "both".
#' @param simple Only the new variable column will be returned. Support function. Defaults to FALSE.
#' @param patient_inclusion Returns a vector with unique patient id:s that can be used to filter data at a later stage. Defaults to FALSE.
#'
#'@examples
#'data(data_icd)
#'data_icd %>% combine_sdia() %>% identify_icd("L405", "psa")
#'
#' @keywords ICD-codes, Quantify
#' @export
#' identify_icd

identify_icd <- function(.data, icd, new_variable = "new_variable", diag_level = "both", register = "both", simple = FALSE, patient_inclusion = FALSE)
  {

  if(diag_level == "primary") {

    primary <- TRUE
    secondary <- FALSE

  } else if (diag_level == "secondary") {

    primary <- FALSE
    secondary <- TRUE

  } else if (diag_level == "both") {

    primary <- TRUE
    secondary <- TRUE

  } else {

    stop("'diag_level' needs to be specified with either: 'primary', 'secondary', or 'both'")

  }

  if(register == "inpatient") {

    inpatient <- TRUE
    outpatient <- FALSE

  } else if (register == "outpatient") {

    inpatient <- FALSE
    outpatient <- TRUE

  } else if (register == "both") {

    inpatient <- TRUE
    outpatient <- TRUE

  } else {

    stop("'register' needs to be specified with either: 'inpatient', 'outpatient', or 'both'")

  }


  if (is.character(new_variable) == FALSE) {
    stop("'new_variable' must be a character variable")
  }

  if (primary == FALSE & secondary == FALSE) {
    stop("'primary' and 'secondary' can't be false at the same time")
  }

  if (outpatient == FALSE & inpatient == FALSE) {
    stop("'outpatient' and 'inpatient' can't be false at the same time")
  }

      if (primary == FALSE) {

        if(outpatient == FALSE) {

          .data <- .data %>% mutate(!!new_variable := case_when(
            str_detect(sdia, paste0(icd, collapse = "|")) & source == "inpatient" ~ TRUE,
            TRUE ~ FALSE))

        } else if (inpatient == FALSE) {

          .data <- .data %>% mutate(!!new_variable := case_when(
            str_detect(sdia, paste0(icd, collapse = "|")) & source == "outpatient" ~ TRUE,
            TRUE ~ FALSE))

        } else if (outpatient == TRUE & inpatient == TRUE) {

          .data <- .data %>% mutate(!!new_variable := case_when(
            str_detect(sdia, paste0(icd, collapse = "|")) ~ TRUE,
            TRUE ~ FALSE))

        }

    } else if (secondary == FALSE) {

      if (outpatient == FALSE) {

        .data <- .data %>% mutate(!!new_variable := case_when(
          str_detect(pdia, paste0("^", icd, collapse = "|")) & source == "inpatient" ~ TRUE,
          TRUE ~ FALSE))

      } else if (inpatient == FALSE) {

        .data <- .data %>% mutate(!!new_variable := case_when(
          str_detect(pdia, paste0("^", icd, collapse = "|")) & source == "outpatient" ~ TRUE,
          TRUE ~ FALSE))

      } else if (outpatient == TRUE & inpatient == TRUE) {

        .data <- .data %>% mutate(!!new_variable := case_when(
          str_detect(pdia, paste0("^", icd, collapse = "|")) ~ TRUE,
          TRUE ~ FALSE))

      }

    } else {

      if (outpatient == FALSE) {

        .data <- .data %>% mutate(!!new_variable := case_when(
          str_detect(pdia, paste0("^", icd, collapse = "|")) & source == "inpatient" ~ TRUE,
          str_detect(sdia, paste0(icd, collapse = "|")) & source == "inpatient" ~ TRUE,
          TRUE ~ FALSE))

      } else if (inpatient == FALSE) {

        .data <- .data %>% mutate(!!new_variable := case_when(
          str_detect(pdia, paste0("^", icd, collapse = "|")) & source == "outpatient" ~ TRUE,
          str_detect(sdia, paste0(icd, collapse = "|")) & source == "outpatient" ~ TRUE,
          TRUE ~ FALSE))

      } else {

        .data <- .data %>% mutate(!!new_variable := case_when(
          str_detect(pdia, paste0("^", icd, collapse = "|")) ~ TRUE,
          str_detect(sdia, paste0(icd, collapse = "|")) ~ TRUE,
          TRUE ~ FALSE))

      }

    }

    if (simple == TRUE) {
      .data <- .data %>%
        select(!!new_variable)
    }

    if (patient_inclusion == TRUE & simple == FALSE) {
      x <- sym(new_variable) # To get a string into a object name
      x <- enquo(x) # To evaluate the object

      .data_temp <- .data %>%
        group_by(patid) %>%
        summarise(sum_var = sum(!!x), .groups = "drop") %>%
        mutate(!!x := case_when(
          sum_var > 0 ~ TRUE,
          sum_var == 0 ~ FALSE))

      .data <- .data_temp %>%
        filter(!!x == TRUE) %>%
        select(patid) %>%
        pull()
    }

  return(.data)

}

#' Find multiple diagnoses using ICD-codes
#'
#' This function allows you to find multiple diagnoses based on ICD-codes. Creates new columns based on the ICD-codes.
#' @param .data A tibble with data in QTF-format
#' @param icd_list A list object with two or more elements. Each element should be a character vector or a scalar, with ICD-codes that should be identified in the data.
#' @param names A list object with two or more elements. Each element should be a character string with the column name for the new variable. Should correspond to the elements in icd_list.
#'
#'@examples
#'icd_list <- list("L405", c("Z879", "M403")
#'names <- list("psa", "other")
#'data(data_icd)
#'data_icd %>% combine_sdia() %>% identify_icd(icd, names)
#'
#' @keywords ICD-codes, Quantify
#' @export
#' identify_icd_multiple


identify_icd_multiple <- function(.data, icd_list, names) {
  list_of_icd_vec <- map2(icd_list, names, function(.x, .y) identify_icd.f(.data, .x, .y, simple = TRUE))
  # Check that the vectors in the list are the same length
  if (!all(sapply(list_of_icd_vec, length) == length(list_of_icd_vec[[1]]))) {
    stop("Vectors are not the same length")
  }
  # Bind all separate vectors in the list into a tibble
  new_.data <- bind_cols(list_of_icd_vec)
  # Bind the original data with the new tibble
  bind_cols(.data, new_.data)
}
liljamathias/QRfunctions documentation built on Jan. 20, 2021, 1:32 a.m.