data-raw/testdata-hcv_monthly_report.R

# generate test data for HCV monthly report
set.seed(9912)

# number of patients
n_patients <- 1000

# number of days in current month
days_current_month <- lubridate::day(Sys.Date()) - 1
# momth proportion
p_month <- 0.3
# year proportion
p_year <- 0.3

# current Sys.time()
ctime <- Sys.time()


# initiate data list
data_hcv_monthly <- list()

data_hcv_monthly$"2_treatment_initiation_début_de_traitement" <- data.frame(
  patient_id = 1:n_patients,
  start_hepc_treatment = sample(c("Yes", "No", NA_character_), n_patients, 
                                replace = TRUE, 
                                prob = c(0.90, 0.05, 0.05)),
  hepc_treatment_start_date = c(Sys.Date() - sample(0:days_current_month, n_patients * p_month, replace = TRUE),
                                Sys.Date() - sample((days_current_month+1):365, n_patients * p_year, replace = TRUE),
                                Sys.Date() - months(12) + sample(-45:45, n_patients - (n_patients * p_month) - (n_patients * p_year), 
                                                                 replace = TRUE)),
  date_initiation_consulations = rep(Sys.Date(), n_patients),
  stringsAsFactors = FALSE
)


data_hcv_monthly$"person_details_default" <- data.frame(
  person_id = 1:n_patients,
  gender = sample(c("M", "F", "U", NA_character_), n_patients, replace = TRUE,
                  prob = c(0.48, 0.48, 0.01, 0.01)),
  age = as.character(sample(1:99, n_patients, replace = TRUE)),
  stringsAsFactors = FALSE
)

data_hcv_monthly$"person_attributes" <-
  data.frame(
    person_id = 1:n_patients,
    Admission_date_Date_d_admission = 
      format(data_hcv_monthly$"2_treatment_initiation_début_de_traitement"$hepc_treatment_start_date
             - lubridate::days(sample(
               700:-7, n_patients, replace = TRUE)),
             format = "%Y-%m-%dT%H:%M:%OS3%z"),
    stringsAsFactors = FALSE
  )

# Form 4 Weight and height ----
data_hcv_monthly$"4_weight_assessment_évaluation_du_poids" <- data.frame(
  patient_id = 1:n_patients, 
  assessment_date = data_hcv_monthly$"2_treatment_initiation_début_de_traitement"$hepc_treatment_start_date - 
    floor(rbeta(n_patients, 2, 5) * 15), 
  weight = abs(rnorm(n_patients, mean = 80, sd = 30)), 
  height = abs(rnorm(n_patients, mean = 170, sd = 70)),
  stringsAsFactors = FALSE
)


data_hcv_monthly$"4_clin_assess_treat_hcv_éval_clin_trait_vhc" <- 
  data.frame(
    patient_id = 1:n_patients,
    obs_datetime = ctime,
    visit_date = c(data_hcv_monthly$"2_treatment_initiation_début_de_traitement"$hepc_treatment_start_date + 
                     sample(-5:30, n_patients, replace = TRUE)),
    treatment_status_today = sample(c("Not assessed / Pending Results",
                                      "Eligible / Already under treatment",
                                      "Not eligible",
                                      "Follow up after treatment",
                                      "Not specified",
                                      NA_character_), n_patients, replace = TRUE,
                                    prob = c(0.05, 0.80, 0.03, 0.10, 0.01, 0.01)),
    decision_on_treatment_care = sample(c("Waiting List",
                                          "Initiation/ Re-initiation",
                                          "Continuation same dose",
                                          "Modification due to adverse event",
                                          "Modification due to other reasons",
                                          "Discontinuation due to adverse event",
                                          "Discontinuation (Stop) for other reasons",
                                          "Treatment finished",
                                          "Not specified", 
                                          NA_character_), n_patients, replace = TRUE,
                                        prob = c(0.05, 0.60, 0.10, 0.10, 0.05, 0.01, 0.01, 0.07, 0.01, 0)),
    stringsAsFactors = FALSE
  )

data_hcv_monthly$"4_performance_status_statut_de_performance" <- 
  data.frame(
    patient_id = 1:n_patients, 
    assessment_date = c(data_hcv_monthly$"2_treatment_initiation_début_de_traitement"$hepc_treatment_start_date - 
                          sample(0:90, n_patients, replace = TRUE)),
    performance_status_ecog = sample(c("0=Fully active",
                                       "1=Ambulatory",
                                       "2=Capable of self care",
                                       "3=Limited self care",
                                       "4=Completely disabled",
                                       NA_character_), n_patients, replace = TRUE, 
                                     prob = c(rep(0.19, 5), 0.05)),
    stringsAsFactors = FALSE
  )

data_hcv_monthly$"3_lab_virology_lab_virologie" <- 
  data.frame(
    patient_id = 1:n_patients, 
    date_sample_examination = c(data_hcv_monthly$"2_treatment_initiation_début_de_traitement"$hepc_treatment_start_date - 
                                  sample(0:90, n_patients, replace = TRUE)), 
    hcv_genotype_done = sample(c("Yes",
                                 "No",
                                 NA_character_), n_patients, replace = TRUE, 
                               prob = c(0.9, 0.08, 0.02)),
    fibroscan_staging_category = sample(c("F0 no fibrosis",
                                          "F1 mild fibrosis",
                                          "F2 moderate fibrosis",
                                          "F3 severe fibrosis",
                                          "F4 cirrhosis",
                                          "Not specified",
                                          NA_character_), n_patients, replace = TRUE,
                                        prob = c(rep(0.18, 5), 0.07, 0.03)),
    child_pugh_score = sample(5:15, n_patients, replace = TRUE), 
    stringsAsFactors = FALSE
  )

data_hcv_monthly$hcv_genotype <- 
  data.frame(
    patient_id = 1:n_patients,
    hcv_genotype = sample(c("Genotype 1",
                            "Genotype 2",
                            "Genotype 3",
                            "Genotype 4",
                            "Genotype 5",
                            "Genotype 6",
                            "Indeterminate",
                            "Not specified",
                            NA_character_), n_patients, replace = TRUE,
                          prob = c(0.7, 0.01, 0.2, 0.01, 0.01, 0.07, 0, 0, 0)),
    stringsAsFactors = FALSE
  )

data_hcv_monthly$hcv_genotype_sub_type <- 
  data.frame(
    patient_id = 1:n_patients,
    hcv_genotype_sub_type = sample(c("Subtype a",
                                     "Subtype b",
                                     "Subtype c",
                                     "Subtype d",
                                     "Subtype e",
                                     "Subtype other",
                                     "Not specified",
                                     NA_character_), n_patients, replace = TRUE, 
                                   prob = c(0.7, 0.1, 0.1, 0.05, 0.05, 0, 0, 0)),
    stringsAsFactors = FALSE
  )

data_hcv_monthly$hcv_treatment_prescribed <- 
  data.frame(
    patient_id = 1:n_patients,
    obs_datetime = ctime,
    hcv_treatment_prescribed = sample(c("Sofosbuvir (SOF)",
                                        "Daclatasvir (DCV)",
                                        "Ribavirin (RBV)",
                                        "LDP/SOF (FDC)",
                                        "SOF/VEL (FDC)",
                                        "SOF/VEL/VOX (FDC)",
                                        "SOF/DCV (FDC)",
                                        "GLE/PIB (FDC)",
                                        "Other",
                                        NA_character_), n_patients, replace = TRUE,
                                      prob = c(0.01, 0.01, 0.01, 0.02, 0.02, 0.07, 0.8, 0.1, 0.06, 0)),
    stringsAsFactors = FALSE
  )

data_hcv_monthly$"6_treatment_outcome_hcv_issue_du_traitement_vhc" <- 
  data.frame(
    patient_id = 1:n_patients,
    end_of_treatment_date = data_hcv_monthly$"2_treatment_initiation_début_de_traitement"$hepc_treatment_start_date + 
      c(floor(rnorm(n_patients * 0.75, 180, 20)),
        floor(rnorm(n_patients * 0.25, 270, 30))),
		outcome_this_course = sample(c("Cured",
		                               "Failed",
		                               "Died",
		                               "LTFU during treatment",
		                               "Completed, post treatment VL not done",
		                               "Other",
		                               NA_character_), n_patients, replace = TRUE,
		                             prob = c(0.87, 0.03, 0.03, 0.05, 0.02, 0.01, 0)),
		final_end_of_followup = sample(c("Yes",
		                                 "No",
		                                 "Unknown",
		                                 NA_character_), n_patients, replace = TRUE,
		                               prob = c(0.85, 0.10, 0.04, 0.01)),
		reason_end_of_followup = sample(c("Cured",
		                                  "Death",
		                                  "Transfer out",
		                                  "Loss of Follow up",
		                                  "Medical decision to stop HCV Follow up",
		                                  NA_character_), n_patients, replace = TRUE,
		                                prob = c(0.75, 0.08, 0.01, 0.14, 0.02, 0)),
    stringsAsFactors = FALSE
  )

# save tb_month raw data for SQL testing db
saveRDS(data_hcv_monthly, "inst/testdata/data_hcv_monthly.rds", version = 2)
JayAchar/hisreportr documentation built on March 18, 2020, 5:57 a.m.