write("Export SPA sick child observation protocol data and run corresponding quality checks", stderr())
locked_spa_sco_data <- main_spa_sco_data %>%
  dplyr::filter(date <= as.Date(lock_date, "%Y-%m-%d"))
n_raw_spa_sco_data <- nrow(locked_spa_sco_data)
db_name <- "SPA sick child observation"

There are r n_raw_spa_sco_data records between r spa_start_date and r lock_date in the raw r db_name database.

r if (is_kenya) { '### Non-valid provider IDs\n\n' }

out <- timci::correct_spa_sco_hcp_ids(locked_spa_sco_data)
locked_spa_sco_data <- out[[1]]
spa_sco_data_hcp_edits <- out[[2]]
spa_sco_data_hcp_edits %>%
  knitr::kable(caption = "Healthcare provider ID to be corrected")

Child ID from Day 0

write("   o Child ID correction 1", stderr())
out <- timci::edit_day0_child_ids_spa_sco(locked_spa_sco_data,
                                          csv_prefix = "day0_childID_correction_inconsistent_facility_check1")
locked_spa_sco_data <- out[[1]]
edited_records1 <- out[[2]]
discarded_edits1 <- out[[3]]
write("   o Child ID correction 2", stderr())
out <- timci::edit_day0_child_ids_spa_sco(locked_spa_sco_data,
                                          csv_prefix = "day0_childID_correction_inconsistent_facility_check2")

locked_spa_sco_data <- out[[1]]
edited_records2 <- out[[2]]
discarded_edits2 <- out[[3]]
write("   o Child ID correction 3", stderr())
out <- timci::edit_day0_child_ids_spa_sco(locked_spa_sco_data,
                                          csv_prefix = "day0_duplicate_correction")

locked_spa_sco_data <- out[[1]]
edited_records3 <- out[[2]]
discarded_edits3 <- out[[3]]

Non-valid facility IDs [compliance check r qc_spa_sco_nonvalid_fids]

write("   o Non-valid facility IDs", stderr())
cleaned_df <- NULL
qc_df <- NULL
qc_description <- "Facility ID does not refer to a SPA facility and is not consistent with the facility code in the child ID. Check the facility ID from the first 5 characters of the child ID versus from the research assistant's manual entry."
qc_rule <- action_alert_no_modification
qc_type <- "spa_sco_inconsistent_facility_info"
df <- locked_spa_sco_data
qc_text <- "inconsistent facility info"
qc_idx <- paste0(qc_spa_sco_nonvalid_fids, "a")
qc_export_label <- "inconsistent_facility"
qc_export_description <- "facility information between the child ID and the research assistant's entry is inconsistent"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
mc_description <- "Correct SPA sick child observation facility ID"
to_correct_df <- locked_spa_sco_data
correction_type <- "correct_spa_sco_inconsistent_facilities"

# Parameters for the quality check following manual corrections
qc_idx <- paste0(qc_spa_sco_nonvalid_fids, "b")
qc_export_label <- "inconsistent_facility"

cat(knitr::knit_child('database_export_sub_corrections.Rmd',
                      envir = environment(),
                      quiet = TRUE))

n_edit_inconsistent_fid <- n_mc
locked_spa_sco_data <- corrected_df

Non-valid participant IDs [compliance check r qc_spa_sco_nonvalid_ids]

write("   o Non-valid participant IDs", stderr())
cleaned_df <- NULL
qc_description <- "Children can be part of the SPA sick child,observation protocol even if they have not been enrolled in the RCT/LS and are not on their Day 0 visit."
qc_rule <- "Flag with value 1 in column *matched* the IDs of children who are found in the locked Day 0 database."
qc_type <- "spatf_nonvalid_ids"
df <- locked_spa_sco_data
idcol1 <- "child_identification-pid"
refdf <- facility_data
idcol2 <- "child_id"
qc_text <- "participant ID not valid"
qc_idx <- qc_spa_sco_nonvalid_ids
qc_export_label <- "unknown_ids"
qc_export_description <- "child IDs are not found in the Day 0 dataset"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
locked_spa_sco_data <- cleaned_df

Duplicate management [compliance check r qc_spa_sco_duplicates]

cleaned_df <- NULL
qc_description <- "Keep duplicated records and investigate the reason for duplicates."
qc_rule <- action_alert_no_modification
qc_type <- "duplicates"
df <- locked_spa_sco_data
col_id <- "child_identification-pid"
col_date <- "start"
cleaning <- "none"
qc_text <- "duplicated IDs"
qc_idx <- qc_spa_sco_duplicates
qc_export_label <- "duplicated_spa_sco"
qc_export_description <- "SPA sick child observations are duplicated"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
locked_spa_sco_data <- cleaned_df

Incomplete observations [compliance check r qc_spa_sco_incomplete]

qc_description <- "Observation of children back from the laboratory should not be a stand-alone observation, but should complement the observation of the initial consultation (before the child was sent to the lab)."
qc_rule <- action_alert_no_modification
qc_type <- "incomplete_spa_observation"
df <- locked_spa_sco_data
qc_text <- "incomplete SPA observations"
qc_idx <- qc_spa_sco_incomplete
qc_export_label <- "incomplete_spa_sco"
qc_export_description <- "SPA sick child observations are incomplete (observation back from lab available, but main observation missing)"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Data cleaning summary

locked_deid_spa_sco_data <- timci::deidentify_spa_data(locked_spa_sco_data)

There are r nrow(locked_deid_spa_sco_data) records in the locked r db_name database.

Data overview

skimr::skim(locked_deid_spa_sco_data)

Data export

timci::dataset_export(spa_sco_data,
                      "14",
                      "timci_spa_consultation_obs_data",
                      params$spa_dir,
                      "Raw SPA sick child observation data")
timci::dataset_export(locked_deid_spa_sco_data,
                      "14",
                      "timci_spa_consultation_obs_data",
                      locked_db_dir,
                      "Cleaned SPA sick child observation data")

Treatments

db_name <- "SPA sick child observation treatments"
sco_spa_data_rx <- spa_sco_data[[2]]
n_raw_sco_spa_data_rx <- nrow(sco_spa_data_rx)

There are r n_raw_sco_spa_data_rx records in the raw r db_name database.

Non-valid parent IDs [compliance check r qc_spa_sco_rx_nonvalid_ids]

write("   o Non-valid treatment parent IDs", stderr())
cleaned_df <- NULL
qc_description <- "Treatments need to be associated with a parent ID in the main SPA observation dataset"
qc_rule <- "Delete non-valid parent IDs."
qc_type <- "nonvalid_ids"
df <- sco_spa_data_rx
idcol1 <- "PARENT_KEY"
refdf <- locked_spa_sco_data
idcol2 <- "KEY"
qc_text <- "parent ID not valid"
qc_idx <- qc_spa_sco_rx_nonvalid_ids
qc_export_label <- "unknown_treatment_parent_ids"
qc_export_description <- "parent IDs are not found in the SPA observation dataset"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
locked_sco_spa_data_rx <- cleaned_df

Export

write("Export SPA sick child observation treatment data", stderr())
locked_sco_spa_data_rx <- timci::deidentify_spa_rx_data(locked_sco_spa_data_rx)
timci::dataset_export(sco_spa_data_rx,
                      "14b",
                      "timci_spa_consultation_obs_rx",
                      params$spa_dir,
                      "SPA sick child observation treatment data")
timci::dataset_export(locked_sco_spa_data_rx,
                      "14b",
                      "timci_spa_consultation_obs_rx",
                      locked_db_dir,
                      "SPA sick child observation treatment data")

Audit data

i <- length(spa_sco_data)

Data export

write("Export SPA sick child observation audit data", stderr())
timci::dataset_export(spa_sco_data[[i]],
                      "14z",
                      "timci_spa_sco_audit",
                      params$spa_dir,
                      "SPA sick child observation audit log data")


Thaliehln/timci documentation built on April 8, 2024, 3:38 p.m.