R/get_analysis.R

Defines functions get_analysis

Documented in get_analysis

#' Real World Time on Treatment Estimation Analysis
#'
#' This function runs ToT estimation and returns rwToT statistics for additional
#' analysis that includes Restricted mean (RMST) and Treatment rates.
#' @param cohort_name Cohort Name
#' @param df_cohort A dataframe. Minimum required columns:
#' \itemize{
#' \item{`STATUS` : Censor status (0 = censored, 1 = event)}
#' \item{`DD` : Number of days between first and last dose}
#' \item{`START_DATE` : First dose date}
#' \item{`LAST_ACTIVITY_DATE` : Last Activity date}
#' }
#' @param database_cutoff_date Database Cutoff Date
#' @param days_in_yr Number of days in year used for days to month conversions.
#' Default value is 365.
#' @return A list of 2
#' \itemize{
#' \item{output_table  :A dataframe with rwToT Statistics table}
#' \item{parametric_km :A dataframe with rwToT KM curve data upto 48 months}
#' }
#' @seealso \code{\link{get_restricted_mean}}, \code{\link{get_treatment_rate}}
#' @examples
#' #To compute rwToT estimation
#' tot.estimation <- get_analysis(df_cohort = rwToT_test_file_A,
#'                                cohort_name = "Cohort A",
#'                                database_cutoff_date = "2021-04-21")
#'
#'
#' # ToT estimation analysis table
#' tot.estimation$output_table
#'
#' # ToT estimation parametric data
#' tot.estimation$parametric_km
#' @export
get_analysis <-  function(df_cohort,
                          cohort_name,
                          database_cutoff_date,
                          days_in_yr = 365) {


  #dataframe validation to check the column requirement-------------------------
  validate_df_columns(df_cohort)

  database_cutoff_date    <- as.Date(database_cutoff_date)

  #data selection---------------------------------------------------------------
  df_cohort  <- df_cohort %>%
    #convert Start_date and Last_activity to Date format
    mutate(START_DATE = as.Date(START_DATE, format = "%Y-%m-%d"),
           LAST_ACTIVITY_DATE = as.Date(LAST_ACTIVITY_DATE,
                                        format = "%Y-%m-%d")) %>%
    #convert DD days to Months
    mutate(DD = (DD / days_in_yr) * 12) %>%
    #select only required cols - STATUS, DD, START_DATE, LAST_ACTIVITY_DATE
    select(STATUS, DD, START_DATE, LAST_ACTIVITY_DATE)

  #create survival time object and add to dataframe
  df_cohort  <- df_cohort %>%
    mutate(SurvObj = Surv(DD, STATUS)) #CNSR <- STATUS 1- event 0- censor

  #create survfit object for survival curves
  fit_km     <- survfit(SurvObj ~ 1, data = df_cohort, conf.type = "log-log")


  #parameters used in output table----------------------------------------------
  output_npatients <-  length(df_cohort$DD)
  table_censor_counts <- count_proportion(df_cohort)
  names(table_censor_counts) <- c("censor", "discontinue")
  output_discontinued <- table_censor_counts["discontinue"]
  output_censored <- table_censor_counts["censor"]



  #get Median (Range) Database Followup data------------------------------------
  calc_median_database_followup <- round(12 * median(database_cutoff_date - df_cohort$START_DATE) / days_in_yr, 1)
  calc_range_database_followup <- round(12 * range(database_cutoff_date - df_cohort$START_DATE) / days_in_yr, 1)

  if (calc_range_database_followup[1] == 0) {
    calc_range_database_followup[1] <- "1 day"
    }

  output_median_database_followup <-  paste(calc_median_database_followup, " (",
                                            calc_range_database_followup[1], " - ",
                                            calc_range_database_followup[2], ")", sep = "")


  #get Median (Range) Patient Followup data-------------------------------------
  calc_median_patient_followup <- round(12 * median(df_cohort$LAST_ACTIVITY_DATE - df_cohort$START_DATE) / days_in_yr, 1)
  calc_range_patient_followup  <- round(12 * range(df_cohort$LAST_ACTIVITY_DATE - df_cohort$START_DATE) / days_in_yr, 1)

  if (calc_range_patient_followup[1] == 0) {
    calc_range_patient_followup[1] <- "1 day"
    }

  output_median_patient_followup <-  paste(calc_median_patient_followup, " (",
                                           calc_range_patient_followup[1], " - ",
                                           calc_range_patient_followup[2], ")", sep = "")



  #get Observed Median (Range)--------------------------------------------------
  calc_range_observed_median <- round(range(df_cohort$DD), 1)

  if (calc_range_observed_median[1] == 0) {
    calc_range_observed_median[1] <- "1 day"
    }

  output_observed_median <- paste(round(median(df_cohort$DD), 1), " (",
                                 calc_range_observed_median[1], " - ",
                                 calc_range_observed_median[2], ")", sep = "")

  #get KM Median (Range)--------------------------------------------------------
  calc_km_median <- fit_km %>%
                    survival:::survmean(rmean = max(df_cohort$DD))

  output_km_median <- paste(round(calc_km_median$matrix["median"], 1), " (",
                           round(calc_km_median$matrix["0.95LCL"], 1), " - ",
                           round(calc_km_median$matrix["0.95UCL"], 1), ")", sep = "")

  #rmeans-----------------------------------------------------------------------
  output_rmean_09mon <- get_restricted_mean(df_cohort, 9)
  output_rmean_12mon <- get_restricted_mean(df_cohort, 12)
  output_rmean_18mon <- get_restricted_mean(df_cohort, 18)
  output_rmean_24mon <- get_restricted_mean(df_cohort, 24)
  output_rmean_30mon <- get_restricted_mean(df_cohort, 30)
  output_rmean_36mon <- get_restricted_mean(df_cohort, 36)
  output_rmean_48mon <- get_restricted_mean(df_cohort, 48)

  #treatment Rate outputs-------------------------------------------------------
  output_incidence_06mon <- get_treatment_rate(fit_km, 6)
  output_incidence_12mon <- get_treatment_rate(fit_km, 12)
  output_incidence_18mon <- get_treatment_rate(fit_km, 18)
  output_incidence_24mon <- get_treatment_rate(fit_km, 24)
  output_incidence_27mon <- get_treatment_rate(fit_km, 27)
  output_incidence_30mon <- get_treatment_rate(fit_km, 30)
  output_incidence_33mon <- get_treatment_rate(fit_km, 33)
  output_incidence_36mon <- get_treatment_rate(fit_km, 36)

  #output Table Values----------------------------------------------------------
  table_values <- as.data.frame(c(output_npatients,
                                  output_discontinued,
                                  output_censored,
                                  output_median_database_followup,
                                  output_median_patient_followup,
                                  output_observed_median,
                                  output_km_median,
                                  output_rmean_09mon$mean,
                                  output_rmean_12mon$mean,
                                  output_rmean_18mon$mean,
                                  output_rmean_24mon$mean,
                                  output_rmean_30mon$mean,
                                  output_rmean_36mon$mean,
                                  output_rmean_48mon$mean,
                                  output_incidence_06mon,
                                  output_incidence_12mon,
                                  output_incidence_18mon,
                                  output_incidence_24mon,
                                  output_incidence_27mon,
                                  output_incidence_30mon,
                                  output_incidence_33mon,
                                  output_incidence_36mon,
                                  as.character(database_cutoff_date)))

  #output Table Description-----------------------------------------------------
  table_names <- c("No. patient",
                   "N discontinued (%)",
                   "N censored (%)",
                   "Median (range) database follow-up months",
                   "Median (range) patient follow-up months",
                   "Observed Median rwToT (range)",
                   "KM Median rwToT (95% CI)",
                   "Restricted mean rwToT @ 09 months (95% CI)",
                   "Restricted mean rwToT @ 12 months (95% CI)",
                   "Restricted mean rwToT @ 18 months (95% CI)",
                   "Restricted mean rwToT @ 24 months (95% CI)",
                   "Restricted mean rwToT @ 30 months (95% CI)",
                   "Restricted mean rwToT @ 36 months (95% CI)",
                   "Restricted mean rwToT @ 48 months (95% CI)",
                   "6 months on treatment rate in % (95% CI)",
                   "12 months on treatment rate in % (95% CI)",
                   "18 months on treatment rate in % (95% CI)",
                   "24 months on treatment rate in % (95% CI)",
                   "27 months on treatment rate in % (95% CI)",
                   "30 months on treatment rate in % (95% CI)",
                   "33 months on treatment rate in % (95% CI)",
                   "36 months on treatment rate in % (95% CI)",
                   "Database cutoff")

  #output table-----------------------------------------------------------------
  output_table <- as.data.frame(cbind(table_names,
                                      table_values),
                                stringsAsFactors = FALSE)

  names(output_table) <- c("Description", cohort_name)

  #output return----------------------------------------------------------------
  return(list("output_table"  = output_table,
              "parametric_km" = output_rmean_48mon$parametric_km))
}
sutsabs/rwToT2 documentation built on Feb. 18, 2022, 2:30 a.m.