R/LoT_main.R

Defines functions create_LoT

Documented in create_LoT

#' Create LoT table
#'
#' Create_lot executes LoT business rules to create the Line of Therapy table. The function
#' loops through each unique patients in the input claims dataframe to produce line data for
#' those patients based on the business rules.\cr
#' LoT Steps:\cr
#' \enumerate{
#'   \item Create unique patient list to loop through
#'   \item Loop through each patient in the patient list
#'   \item For each patient, create a copy of the claims data filtered for only that patient
#'   \item For that patient's claims data, order the data by ascending med start and grab line data on a step-wise line by line basis\
#'   \item Add the output to the LoT table
#'   \item After grabbing each line data, cut the claims data to snip out the line information we already extracted so that the first line of the post-snipped data is the start of the new line
#'   \item Repeat 3-5 until all lines for that patient is gathered. Then move to the next patient in the patient list until all patients are processed
#'   }
#'
#' @param df claims dataframe or the drug episode table
#' @param input_indication indication of interest as a string
#' @param input_r_window a number representing the threshold number of days for the regimen defining window to detect combination drugs
#' @param input_l_disgap a number representing the threshold number of days of gap in administration before advancing the line
#' @param input_drug_switch_ignore TRUE or FALSE flag to see if a drug is administered during the r_window period, but never administered again after, then ignore it from the regimen
#' @param input_combo_dropped_line_advance TRUE or FALSE flag to see if a combination drug is dropped, whether or not it triggers an advance in line number
#' @param patientid_column The name of the column in \emph{df} containing the patient IDs (as a string)
#' @param drugname_columm The name of the column in \emph{df} containing the drug names (as a string)
#' @param medstart_column The name of the column in \emph{df} containing the drug start date (as a string)
#' @param medend_column The name of the column in \emph{df} containing the drug end date (as a string)
#' @return list of dataframes: \cr
#' \describe{
#'   \item{output_lot}{A line of therapy dataframe containing patient id, line number, line Name, line start date, line end date, line type, and line end reason}
#'   \item{output_doses}{A dataframe with patient dosage information w/ line information}
#'   \item{input_data}{Input dataframe for used in generating the LoT table}
#' }
#'
#' @importFrom tibble is_tibble
#' @importFrom stringr str_detect


#' @export
create_LoT <- function(df,
                       input_indication = "NSCLC",
                       input_r_window = 28,
                       input_l_disgap = 120,
                       input_drug_switch_ignore = FALSE,
                       input_combo_dropped_line_advance = FALSE,
                       patientid_column = "patient_id",
                       drugname_column = "drug_name",
                       medstart_column = "med_start",
                       medend_column = "med_end") {
  patientid <- rlang::sym(patientid_column)
  drugname <- rlang::sym(drugname_column)
  medstart <- rlang::sym(medstart_column)
  medend <- rlang::sym(medend_column)

  if (is_tibble(df)) {
    df <- as.data.frame(df)
  }

  input_data <-
    df %>% select(!!patientid, !!drugname, !!medstart, !!medend)
  colnames(input_data) <-
    c("PATIENT_ID", "MED_NAME", "MED_START", "MED_END")

  input_unique_patients <- unique(input_data$PATIENT_ID)

  input_data$MED_START <-
    as.Date(input_data$MED_START,
      tryFormats = c("%Y-%m-%d", "%m/%d/%y")
    )
  input_data$MED_END <-
    as.Date(input_data$MED_END, tryFormats = c("%Y-%m-%d", "%m/%d/%y"))
  input_data$MED_NAME <- tolower(input_data$MED_NAME)

  output_lot <- data.frame(
    PATIENT_ID = character(),
    LINE_NUMBER = character(),
    LINE_NAME = character(),
    START_DATE = as.Date(character()),
    END_DATE = as.Date(character()),
    LINE_TYPE = character(),
    IS_MAINTENANCE = logical(),
    ADD_EXEMPTION = logical(),
    SUB_EXEMPTION = logical(),
    GAP_EXEMPTION = logical(),
    NAME_EXEMPTION = logical(),
    LINE_END_REASON = character(),
    ENHANCED_COHORT = character(),
    INDEX_DATE = as.Date(character()),
    stringsAsFactors = FALSE
  )

  output_doses <- data.frame(
    PATIENT_ID = character(),
    MED_START = as.Date(character()),
    MED_END = as.Date(character()),
    MED_NAME = as.Date(character()),
    LINE_NUMBER = character(),
    LINE_NAME = character(),
    stringsAsFactors = FALSE
  )



  for (i in 1:length(input_unique_patients)) {
    # Get data for that patient
    tmp_data <-
      input_data %>% filter(PATIENT_ID == input_unique_patients[i])

    tmp_data <- tmp_data %>% arrange(MED_START)
    input_index_date <- tmp_data[1, "MED_START"]


    # Scan patient claims data to acquire line information on a step-wise line by line basis

    # Initialize the line number and other parameters to their initial values
    tmp_line_number <- 0
    tmp_previous_line <- NULL
    tmp_is_next_maintenance <- FALSE

    # Scan through the patient's data until it is all through
    while (nrow(tmp_data) > 0) {
      # Get Regimen and Line Start Information
      tmp_regimen <- get_regimen(
        df = tmp_data,
        r_window = input_r_window
      )

      # Acquire rest of line data
      tmp_line_data <- get_line_data(
        df = tmp_data,
        r_regimen = tmp_regimen,
        l_disgap = input_l_disgap,
        l_line_number = tmp_line_number,
        l_is_next_maintenance  = tmp_is_next_maintenance,
        input_r_window,
        input_drug_switch_ignore,
        input_combo_dropped_line_advance,
        input_indication
      )

      tmp_line_name <- tmp_line_data$line_name
      tmp_line_type <- tmp_line_data$line_type
      tmp_line_start <- tmp_line_data$line_start
      tmp_line_end <- tmp_line_data$line_end
      tmp_line_next_start <- tmp_line_data$line_next_start
      tmp_line_end_reason <- tmp_line_data$line_end_reason
      tmp_line_number <- tmp_line_data$line_number
      tmp_line_is_maintenance <-
        tmp_line_data$line_is_maintenance
      tmp_is_next_maintenance <-
        tmp_line_data$line_is_next_maintenance
      tmp_line_add_exemption <-
        tmp_line_data$line_add_exemption
      tmp_line_sub_exemption <-
        tmp_line_data$line_sub_exemption
      tmp_line_gap_exemption <-
        tmp_line_data$line_gap_exemption
      tmp_line_name_exemption <-
        tmp_line_data$line_name_exemption


      # Acquire dosage information associated with this line
      if (is.null(tmp_line_next_start) ||
        is.na(tmp_line_next_start)) {
        tmp_output_doses <- tmp_data
      }
      else {
        tmp_output_doses <-
          tmp_data %>% filter(MED_START < tmp_line_next_start)
      }

      # Append line data to final output
      tmp_output_lot <- data.frame(
        "PATIENT_ID" = input_unique_patients[[i]],
        "LINE_NUMBER" = tmp_line_number,
        "LINE_NAME" = tmp_line_name,
        "START_DATE" = tmp_line_start,
        "END_DATE" = tmp_line_end,
        "LINE_TYPE" = tmp_line_type,
        "IS_MAINTENANCE" = tmp_line_is_maintenance,
        "ADD_EXEMPTION" = tmp_line_add_exemption,
        "SUB_EXEMPTION" = tmp_line_sub_exemption,
        "GAP_EXEMPTION" = tmp_line_gap_exemption,
        "NAME_EXEMPTION" = tmp_line_name_exemption,
        "LINE_END_REASON" = tmp_line_end_reason,
        "ENHANCED_COHORT" = input_indication,
        "INDEX_DATE" = input_index_date
      )

      output_lot <- rbind(output_lot, tmp_output_lot)

      # Append patient dosage information w/ line information
      tmp_output_doses <-
        tmp_output_doses %>% select(PATIENT_ID = PATIENT_ID, MED_START, MED_END, MED_NAME)
      tmp_output_doses$LINE_NUMBER <- tmp_line_number
      tmp_output_doses$LINE_NAME <- tmp_line_name
      tmp_output_doses$MED_NAME <-
        sapply(tmp_output_doses$MED_NAME, capitalize)

      output_doses <- rbind(output_doses, tmp_output_doses)

      tmp_previous_line <- tmp_line_number

      # Cut the data to the next line
      if (is.null(tmp_line_next_start) ||
        is.na(tmp_line_next_start)) {
        break
      }
      tmp_cut <- snip_dataframe(tmp_data, tmp_line_next_start)
      tmp_data <- tmp_cut$after
    }
  }


  outfile <-
    list(
      "output_lot" = output_lot,
      "output_doses" = output_doses,
      "input_data" = input_data
    )
  invisible(outfile)
}
sutsabs/rwLoT documentation built on Feb. 11, 2022, 12:20 a.m.