data-raw/testdata-hiv_monthly_report.R

# generate test data for HIV monthly report
set.seed(9912)

# Parameters ----

# number of patients
n_patients <- 1000

# number of days in current month
days_current_month <- lubridate::day(Sys.Date()) - 1

# proportion for month
p_month <- 0.2

# proportion for year
p_year <- 0.4

# Initiate data list ----
dl <- list()

# Form 2 Treatment initiation ----
dl$"2_treatment_initiation_début_de_traitement" <- data.frame(
  patient_id = 1:n_patients,
  start_or_switch_hiv_treatment = sample(c("Yes", "No", NA_character_), n_patients, 
                                replace = TRUE, 
                                prob = c(0.90, 0.05, 0.05)),
  hiv_treatment_start_date = c(Sys.Date() - sample(0:days_current_month, n_patients * p_month, replace = TRUE),
                               Sys.Date() - sample((days_current_month+1):365, n_patients * p_year, replace = TRUE),
                               Sys.Date() - sample(1500:-14, n_patients - (n_patients * p_month) - (n_patients * p_year), replace = TRUE)),
  type_of_hiv_regimen = sample(c("HIV 1st line",
                                 "HIV 2nd line", 
                                 "HIV 3rd line", 
                                 "Not defined", 
                                 NA_character_), n_patients, replace = TRUE, 
                               prob = c(0.7, 0.2, 0.05, 0.03, 0.02)),
  
  date_initiation_consulations = rep(Sys.Date(), n_patients),
  
  stringsAsFactors = FALSE
)

# Personal details ----
dl$"person_details_default" <- data.frame(
  person_id = 1:n_patients,
  gender = sample(c("M", "F", "U", NA_character_), n_patients, replace = TRUE,
                  prob = c(0.48, 0.48, 0.01, 0.01)),
  age = as.character(sample(1:99, n_patients, replace = TRUE)),
  stringsAsFactors = FALSE
)

# Personal attributes ----
dl$"person_attributes" <-
  data.frame(
    person_id = 1:n_patients,
    Admission_date_Date_d_admission = 
      format(dl$"2_treatment_initiation_début_de_traitement"$hiv_treatment_start_date
             - lubridate::days(sample(
               700:-7, n_patients, replace = TRUE)),
             format = "%Y-%m-%dT%H:%M:%OS3%z"),
    stringsAsFactors = FALSE
  )


# Form 6 Treatment status changes ----
dl$"6_status_change_hiv_changement_du_statut_vih" <- data.frame(
  patient_id = 1:n_patients,
  date_of_status_change = dl$"2_treatment_initiation_début_de_traitement"$hiv_treatment_start_date + 
    floor(rbeta(n_patients, 2, 5) * 350), 
  type_of_programmatic_status_change = sample(c("Death", 
                                                "Transfer out",
                                                "Declared lost to follow-up",
                                                "Returned to care after previous exit",
                                                NA_character_),
                                              n_patients, replace = TRUE,
                                              prob = c(0.2, 0.10, 0.5, 0.1, 0.1)),
  stringsAsFactors = FALSE
)

# Form 4 Weight and height ----
dl$"4_weight_assessment_évaluation_du_poids" <- data.frame(
  patient_id = 1:n_patients, 
  assessment_date = dl$"2_treatment_initiation_début_de_traitement"$hiv_treatment_start_date - 
    floor(rbeta(n_patients, 2, 5) * 15), 
  weight = abs(rnorm(n_patients, mean = 80, sd = 30)), 
  height = abs(rnorm(n_patients, mean = 170, sd = 70)),
 stringsAsFactors = FALSE
)

# Form 4 WHO stage ----
dl$"4_clin_assess_treat_hiv_éval_clin_trait_vih" <- data.frame(
  patient_id = rep(1:n_patients, 2), 
  obs_datetime = Sys.Date(),
  visit_date = c(dl$"2_treatment_initiation_début_de_traitement"$hiv_treatment_start_date - 
    floor(rbeta(n_patients, 2, 5) * 30),
    dl$"2_treatment_initiation_début_de_traitement"$hiv_treatment_start_date + 
      floor(rbeta(n_patients, 2, 5) * 30)), 
  who_stage = sample(c("WHO stage 1", 
                       "WHO stage 2",
                       "WHO stage 3",
                       "WHO stage 4",
                       "Unknown", 
                       NA_character_), 2 * n_patients, replace = TRUE,
                     prob = c(rep(0.22, 4), 0.1, 0.02)),
  stringsAsFactors = FALSE
                     
  )

# Form 3 CD4 & VL results ----
 pre <- data.frame(
  patient_id = 1:n_patients,
  date_sample_examination = dl$"2_treatment_initiation_début_de_traitement"$hiv_treatment_start_date - 
    floor(rbeta(n_patients, 2, 5) * 150),
  lymphocytes_cd4_count = floor(abs(rnorm(n_patients, 170, 80))),
  hiv_viral_load_detectable = sample(c("Detectable", "Undetectable", NA_character_),
                                     n_patients, replace = TRUE, prob = c(0.3, 0.68, 0.02)),
  hiv_viral_load_result = floor(rlnorm(n_patients, 9, 0.7) * 100),
  stringsAsFactors = FALSE
)

post <- data.frame(
  patient_id = 1:n_patients,
  date_sample_examination = dl$"2_treatment_initiation_début_de_traitement"$hiv_treatment_start_date + 
    floor(rbeta(n_patients, 2, 5) * 450),
  lymphocytes_cd4_count = floor(abs(rnorm(n_patients, 170, 80))),
  hiv_viral_load_detectable = sample(c("Detectable", "Undetectable", NA_character_),
                                     n_patients, replace = TRUE, prob = c(0.3, 0.68, 0.02)),
  hiv_viral_load_result = floor(rlnorm(n_patients, 9, 0.7) * 100),
  stringsAsFactors = FALSE
)

dl$"3_lab_virology_lab_virologie" <- rbind(pre, post)

# Form 4 HIV drugs ----
dl$"4_clin_assess_treat_hiv_éval_clin_trait_vih_arvs_prescribed" <- data.frame(
  patient_id = 1:n_patients,
  obs_datetime = Sys.Date(),
  decision_on_hiv_arv_treatment =  sample(c("Initiation/ Re-initiation",
                                     "Continuation same dose",
                                     "Modification of regimen",
                                     "Stop all ARVs",
                                     "Not specified",
                                     NA_character_),
                                     n_patients, replace = TRUE,
                                     prob = c(0.8, 0.05, 0.05, 0.05, 0.03, 0.02)),
  arv_treatment_prescribed = sample(c("TDF/3TC/DTG (FDC)",
                                      "TDF/3TC/EFV (FDC)",
                                      "AZT/3TC/NVP (FDC)", 
                                      "DRV", 
                                      NA_character_), n_patients, replace = TRUE,
                                    prob = c(0.7, 0.2, 0.05, 0.048, 0.002)),
  stringsAsFactors = FALSE
)


# Save ----
saveRDS(dl, "inst/testdata/data_hiv_monthly.rds", version = 2)
JayAchar/hisreportr documentation built on March 18, 2020, 5:57 a.m.