data-raw/testdata-tb_monthly_report.R

# tables of data to create

# total number of patients
n_patients <- 1000
# number of days in current month
days_current_month <- lubridate::day(Sys.Date()) - 1

# proportion of patients in current month
p_month <- 0.2
# proportion of patients in current year
p_year <- 0.3
# proportion of patients for DS-outcome reporting
p_ds_outcome <- 0.25
# proportion of patients for DR-outcome reporting
p_dr_outcome <- 0.25

# =============
set.seed(9911)

data_tb_monthly <- list()


data_tb_monthly$"2_treatment_initiation_début_de_traitement" <- data.frame(
  patient_id = 1:n_patients,
  tb_treatment_start_date = c(Sys.Date() - sample(0:days_current_month, n_patients * p_month, replace = TRUE),
                              Sys.Date() - sample((days_current_month+1):365, n_patients * p_year, replace = TRUE),
                              Sys.Date() - months(15) + sample(-45:45, n_patients * p_ds_outcome, replace = TRUE),
                              Sys.Date() - months(30) + sample(-90:90, n_patients * p_ds_outcome, replace = TRUE)),
  date_initiation_consulations = lubridate::dmy("01/12/2019"), 
  type_of_tb_regimen = sample(
    c(
      "Only 1st line drugs",
      "Regimen including 2nd line drugs",
      "Not defined",
      NA_character_
    ), n_patients, replace = TRUE
  ),
  start_tb_treatment = sample(c("Yes", "No", NA_character_), n_patients, replace = TRUE, 
                              prob = c(0.8, 0.15, 0.05)),
  stringsAsFactors = FALSE
)


  
data_tb_monthly$"1_history_tb_antécédents_tb" <- data.frame(
        patient_id = 1:n_patients,
        obs_datetime = Sys.time(),
        encounter_id = sample(1:20, n_patients, replace = TRUE),
        date_tb_history_review = data_tb_monthly$"2_treatment_initiation_début_de_traitement"$tb_treatment_start_date - 
          sample(90:-10, n_patients, replace = TRUE),
        who_registration_group = sample(
          c(
            "New",
            "Relapse",
            "Treatment after loss to followup",
            "Treatment After Failure",
            "Other previously treated patients",
            NA_character_
          ), n_patients, replace = TRUE, prob = c(rep(0.19, 5), 0.05)
        ),
        previously_treated_group = sample(
          c(
            "Previously treated only with first line drugs",
            "Previously treated with second line drugs",
            "History unclear/unknown",
            NA_character_
          ), n_patients, replace = TRUE, prob = c(rep(0.32, 3), 0.04)
        ),
        mtb_confirmation = sample(
          c(
            "Bacteriologically Confirmed",
            "Non-confirmed, clinically diagnosed",
            NA_character_
          ), n_patients, replace = TRUE, prob = c(rep(0.49, 2), 0.02)
        ),
        drug_resistance_profile = sample(
          c(
            "Profile unconfirmed",
            "Confirmed drug susceptible",
            "Confirmed drug resistant TB",
            "Unknown",
            NA_character_
          ), n_patients, replace = TRUE, prob = c(rep(0.24, 4), 0.04)
        ),
        sub_class_of_drug_resistance_profile = sample(c(
          "H(S) resistance",
          "HE(S) resistance",
          "R resistance with H susceptibility",
          "GeneXpert RIF resistance only",
          "Confirmed MDR",
          "Confirmed pre-XDR (FQ)",
          "Confirmed pre-XDR (Inj)",
          "Confirmed XDR",
          "Other",
          NA_character_),n_patients, replace = TRUE, prob = c(rep(0.11, 9), 0.01)
        ), 
        mdr_tb_diagnosis_date = data_tb_monthly$"2_treatment_initiation_début_de_traitement"$tb_treatment_start_date - 
          sample(20:60, n_patients, replace = TRUE),
        stringsAsFactors = FALSE
      )
  
data_tb_monthly$"disease_site_s" <- data.frame(
      patient_id = 1:n_patients,
      obs_datetime = data_tb_monthly$"1_history_tb_antécédents_tb"$obs_datetime,
      encounter_id = data_tb_monthly$"1_history_tb_antécédents_tb"$encounter_id,
      disease_site_s = sample(c(
        "Pulmonary",
        "Extrapulmonary",
        NA_character_
      ), n_patients, replace = TRUE, prob = c(rep(0.49, 2), 0.02)),
      stringsAsFactors = FALSE
    )

# Form 4 Weight and height ----
data_tb_monthly$"4_weight_assessment_évaluation_du_poids" <- data.frame(
  patient_id = 1:n_patients, 
  assessment_date = data_tb_monthly$"2_treatment_initiation_début_de_traitement"$tb_treatment_start_date - 
    floor(rbeta(n_patients, 2, 5) * 15), 
  weight = abs(rnorm(n_patients, mean = 80, sd = 30)), 
  height = abs(rnorm(n_patients, mean = 170, sd = 70)),
  stringsAsFactors = FALSE
)


data_tb_monthly$"person_details_default" <- data.frame(
        person_id = 1:n_patients,
        gender = sample(c("M", "F", "U", NA_character_), n_patients, replace = TRUE,
                        prob = c(0.48, 0.48, 0.01, 0.01)),
        age = as.character(sample(1:99, n_patients, replace = TRUE)),
        stringsAsFactors = FALSE
      )
  
data_tb_monthly$"6_treatment_outcome_tb_issue_du_traitement_tb" <- data.frame(
      patient_id = 1:n_patients,
      end_of_treatment_date = data_tb_monthly$"2_treatment_initiation_début_de_traitement"$tb_treatment_start_date +
                                  c(rep(lubridate::as.period(NA), n_patients * (p_month + p_year)),
                                  rep(months(15), p_ds_outcome * n_patients), 
                                    rep(months(30), p_dr_outcome * n_patients)),
      type_of_tb_treatment_finished = c(rep(NA_character_, n_patients * (p_month + p_year)),
                                            rep("DS-TB", p_ds_outcome * n_patients), 
                                            rep("DR-TB", p_dr_outcome * n_patients)),
      outcome = c(rep(NA_character_, n_patients * (p_month + p_year)), 
                  sample(c(
        "Cured",
        "Completed",
        "Died",
        "Failed",
        "LTFU",
        "Not Evaluated",
        "Treatment adapted",
        NA_character_), n_patients * (p_ds_outcome + p_dr_outcome),
        replace = TRUE, prob = c(0.3, 0.4, 0.03, 0.1, 0.1, 0.03, 0.03, 0.01)
      )),
      stringsAsFactors = FALSE
    )


data_tb_monthly$"person_attributes" <-
  data.frame(
    person_id = 1:n_patients,
    Admission_date_Date_d_admission = 
      format(data_tb_monthly$"2_treatment_initiation_début_de_traitement"$tb_treatment_start_date
                                             - lubridate::days(sample(
      700:-7, n_patients, replace = TRUE)),
    format = "%Y-%m-%dT%H:%M:%OS3%z"),
    stringsAsFactors = FALSE
  )

# save tb_month raw data for SQL testing db
saveRDS(data_tb_monthly, "inst/testdata/data_tb_monthly.rds", version = 2)
JayAchar/hisreportr documentation built on March 18, 2020, 5:57 a.m.