\newpage

write("Export Day 0 data and run corresponding quality checks", stderr())
###########################
# Day 0 quality check IDs #
###########################
qc_true_duplicates <- "DQC_00_2ED_05"
qc_duplicate_id <- "DQC_00_2ED_06"
qc_ambiguous_enrolment_date1 <- "DQC_00_2ED_07"
qc_ambiguous_enrolment_date2 <- "DQC_00_2ED_08"
qc_multiple_enrolment_id <- "DQC_00_2ED_09"
qc_multiple_enrolment_sex_discrepancy_id <- "DQC_00_2ED_10"
qc_negative_nb_days_id <- "DQC_00_2ED_11"
qc_missing_cp_id <- "DQC_00_2ED_12"
qc_missing_diagnosis_id <- "DQC_00_2ED_13"
qc_missing_referral_id <- "DQC_00_2ED_14"
qc_missing_treatment_id <- "DQC_00_2ED_15"
qc_free_text_rx_id <- "DQC_00_2ED_16"
qc_missing_structured_entry_amox <- "DQC_00_2ED_17"
qc_missing_structured_entry_ctx <- "DQC_00_2ED_18"
qc_pox_ <- "DQC_00_2ED_19"
qc_summary_id <- "DQC_00_2ED_20"
n_true_duplicate_detected <- 0
n_dropped_true_duplicate_records <- 0
n_duplicate_records <- 0
n_duplicate_edits <- 0
n_dropped_duplicate_records <- 0
n_nonvalid_fid_records <- 0
n_incorrect_enroldate_records <- 0
n_missing_cp <- 0
n_missing_diagnosis <- 0
n_drug_edits <- 0
n_spo2_meas1_edits <- 0
n_spo2_meas2_edits <- 0

Day 0 data quality checks and cleaning

db_name <- "Day 0"
day0_data <- facility_data %>%
    dplyr::filter(enrolled == 1)
n_raw_day0_records <- nrow(day0_data)
day0_is_not_null <- !is.null(day0_data)
day0_is_not_empty <- timci::is_not_empty(day0_data)
print_clean_drug_data_title <- ""
drug_edits_is_not_empty <- FALSE

Among the r n_cleaned_screening_records cleaned screening record(s), there are r n_raw_day0_records record(s) corresponding to new Day 0 enrolment(s).

Participant identification

True duplicated records of same enrolment [Compliance check r qc_true_duplicates]

write(" o Management of true duplicates (same ID, same day, same name)", stderr())

r if ( !is_pilot ) { paste0('::: {custom-style="redparagraph"}\n\nInitial check ', qc_true_duplicates, 'a\n\n:::')}

qc_description <- "Participants who have been entered twice as new enrolments with the same ID on the same day, which creates duplicates in the database."
qc_rule <- "True duplicates are detected and corrected where possible."
qc_type <- "true_duplicates"
df <- day0_data
col_id <- "child_id"
col_date <- "start"
cleaning <- "none"
qc_text <- "duplicated IDs"
qc_idx <- paste0(qc_true_duplicates, "a")
qc_export_label <- "timci_true_duplicates"
qc_export_description <- "the data was entered in two different records on the same day"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_true_duplicate_detected <- n_detected
mc_description <- "Unless advised otherwise by the in-country data manager, the oldest record is deleted. This is assuming the research assistant created a new record because they felt a specific information was missing."
to_correct_df <- day0_data
correction_type <- "delete_day0_records"

# Parameters for the quality check following manual corrections
qc_idx <- paste0(qc_true_duplicates, "b")
qc_rule <- "Remaining duplicated child IDs are deleted from the database."
cleaning <- "none"

cat(knitr::knit_child('database_export_sub_corrections.Rmd',
                      envir = environment(),
                      quiet = TRUE))

day0_data <- corrected_df
n_dropped_true_duplicate_records <- n_mc

Duplicated child IDs [Compliance check r qc_duplicate_id ]

write(" o Management of duplicated child IDs", stderr())

r if ( !is_pilot ) { paste0('::: {custom-style="redparagraph"}\n\nInitial check ', qc_duplicate_id, 'a\n\n:::')}

qc_description <- "All participants should have a distinct ID. Duplicated IDs may generate errors for the conduct of follow-ups in the field and results in ambiguous outcomes for duplicated participants once the database is deidentified."
qc_rule <- "Duplicated child IDs are detected and corrected proactively where possible."
qc_type <- "day0_duplicates"
df <- day0_data
ref_df <- raw_successday7fu_data
col_id <- "child_id"
col_date <- "start"
cleaning <- "none"
qc_text <- "duplicated IDs"
qc_idx <- paste0(qc_duplicate_id, 'a')
qc_export_label <- "timci_day0_data_id_duplicates"
qc_export_description <- "the submission was allocated the same ID that has been used for another participant in the databse"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))

n_duplicate_records <- n_detected
fig_caption <- "Duplicated IDs"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- ""
comparison <- "type"
fig_df <- qc_df %>% 
  dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(date_2), "week", week_start = getOption("lubridate.week.start", 1)))) %>%
  dplyr::mutate(fid = substr(id, 3, 7))

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))
initial_duplicates_df <- qc_df %>%
  dplyr::select(id) %>% 
  dplyr::mutate(is_duplicate = 1)
mc_description <- ""
to_correct_df <- day0_data
correction_type <- "edit_day0_child_ids_to_correct_duplicates"

# Parameters for the quality check following manual corrections
qc_idx <- paste0(qc_duplicate_id, "b")
qc_rule <- "Remaining duplicated child IDs are deleted from the database."
cleaning <- "drop_all"

cat(knitr::knit_child('database_export_sub_corrections.Rmd',
                      envir = environment(),
                      quiet = TRUE))

n_duplicate_edits <- n_mc
day0_data <- cleaned_df
n_dropped_duplicate_records <- n_detected

Dates and times

Ambiguities about the enrolment date [Context check r qc_ambiguous_enrolment_date1]

write(" o Ambiguities about the enrolment date", stderr())
qc_df <- NULL
qc_description <- "The enrolment date is defined as the creation (start) date of the form. However if research assistants reuse a form that already exist on their device to enrol a new participant, it is possible that the date on which the name of the participant is entered in the form is not on the same day it was started. As a consequence the enrolment date may be shifted and follow-up may not be triggered at the right time."
qc_rule <- action_alert_no_modification
qc_type <- "date_discrepancy"
df <- day0_data
col_date1 <- "start"
col_date2 <- "contact_start"
cleaning <- "replace_by_end_date"
qc_text <- "an incorrect date of enrolment"
qc_idx <- qc_ambiguous_enrolment_date1
qc_export_label <- "ambiguous_enrolment_date"
qc_export_description <- "the enrolment date is not correct"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_incorrect_enroldate_records <- n_detected
fig_df <- qc_df %>%
  dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) %>% 
  dplyr::mutate(Difference = ifelse(diff <= 1,
                                    "1 day", ifelse(diff < 3,
                                                       "2-3 days",
                                                       "Above 3 days")))

fig_caption <- "Number of days between submission start and entry of participant contact details"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- "Difference"
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))
qc_description <- "The enrolment date is defined as the creation (start) date of the form. However if research assistants reuse a form that already exist on their device to enrol a new participant, it is possible that the date on which the name of the participant is entered in the form is not on the same day it was started. As a consequence the enrolment date may be shifted and follow-up may not be triggered at the right time."
qc_rule <- action_alert_no_modification
qc_type <- "date_discrepancy"
df <- day0_data
col_date1 <- "sd_start"
col_date2 <- "start"
cleaning <- "replace_by_end_date"
qc_text <- "an incorrect date of enrolment"
qc_idx <- qc_ambiguous_enrolment_date1
qc_export_label <- "ambiguous_enrolment_date1"
qc_export_description <- "the enrolment date is not correct"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_incorrect_enroldate_records <- n_detected
qc_description <- "The enrolment date is defined as the creation (start) date of the form. However if research assistants reuse a form that already exist on their device to enrol a new participant, it is possible that the date on which the name of the participant is entered in the form is not on the same day it was started. As a consequence the enrolment date may be shifted and follow-up may not be triggered at the right time."
qc_rule <- action_alert_no_modification
qc_type <- "date_discrepancy"
df <- day0_data
col_date1 <- "start"
col_date2 <- "sd_start"
cleaning <- "replace_by_end_date"
qc_text <- "an incorrect date of enrolment"
qc_idx <- qc_ambiguous_enrolment_date2
qc_export_label <- "ambiguous_enrolment_date2"
qc_export_description <- "the enrolment date is not correct"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_incorrect_enroldate_records <- n_detected
fig_df <- qc_df %>%
  dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) %>% 
  dplyr::mutate(Difference = ifelse(diff <= 1,
                                    "1 day", ifelse(diff < 3,
                                                       "2-3 days",
                                                       "Above 3 days")))

fig_caption <- "Number of days between submission start and entry of participant contact details"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- "Difference"
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Clinical presentation

Missing clinical presentation [Mandatory check r qc_missing_cp_id]

write(" o Missing clinical presentation", stderr())
qc_description <- "All participants should have complete information about their clinical presentation, i.e. a participant cannot have simultaneously:

* No convulsions (*sx_convulsions* $\\neq$ 1)
* No lethargy (*sx_lethargy* $\\neq$ 1)
* No vomiting (*sx_vomit* = 0 or *sx_vomit* = 98)
* No feeding less than usual (*sx_less_feed* = 0 or *sx_less_feed* = 98)
* No cough (*sx_cough* = 0 or *sx_cough* = 98)
* No difficulty breathing (*sx_difficulty_breath* = 0 or *sx_difficulty_breath* = 98)
* No diarrhoea (*sx_diarrhoea* = 0 or *sx_diarrhoea* = 98)
* No fever (*sx_fever* = 0 or *sx_fever* = 98)
* No other complaint (*sx_var* = 96)"
qc_rule <- action_alert_no_modification
qc_type <- "missing_clinical_presentation"
df <- day0_data
qc_text <- "missing clinical presentation"
qc_idx <- qc_missing_cp_id
qc_export_label <- "missing_clinical_presentation"
qc_export_description <- "the clinical presentation is missing"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_missing_cp <- n_detected
fig_df <- qc_df %>%
  dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit),
                                             "week",
                                             week_start = getOption("lubridate.week.start", 1)))  %>%
  dplyr::mutate(Antibiotics = ifelse(antibio_has_been_prescribed == 1 | antibio_has_been_recorded, "Yes", "No"))

fig_caption <- "Missing clinical presentation"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- "Antibiotics"
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Negative illness onset [Consistency check r qc_negative_nb_days_id]

write(" o Negative illness onset", stderr())
qc_description <- "Check for spurious values/outliers in illness onset: illness onset should be positive values (number of days) only."
qc_rule <- action_alert_no_modification
qc_type <- "negative_value"
df <- day0_data
col_value <- "sick_duration"
qc_text <- "negative illness onset"
qc_idx <- qc_negative_nb_days_id
qc_export_label <- "negative_illness_onset"
qc_export_description <- "illness onset is negative"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_negative_illness_onset <- n_detected

Convulsion reporting over time

fig_df <- day0_data %>%
  dplyr::mutate(Convulsions = dplyr::case_when(
    sx_convulsions == 1  ~ "a) Yes",
    sx_convulsions == 0  ~ "b) No",
    sx_convulsions == 98 ~ "c) Not sure")) %>%
  dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1))))

fig_caption <- "Convulsion reporting at clinical presentation over time"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- "Convulsions"
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Lethargy reporting over time

fig_df <- day0_data %>%
  dplyr::mutate(Lethargy = dplyr::case_when(
    sx_lethargy == 1  ~ "a) Yes",
    sx_lethargy == 0  ~ "b) No",
    sx_lethargy == 98 ~ "c) Not sure")) %>%
  dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1))))

fig_caption <- "Lethargy reporting at clinical presentation over time"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- "Lethargy"
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Vomit everything reporting over time

fig_df <- day0_data %>%
  dplyr::mutate("Vomit everything" = dplyr::case_when(
    sx_vomit_evthing == 1                            ~ "a) Yes",
    is.na(sx_vomit_evthing) | sx_vomit_evthing == 0  ~ "b) No",
    sx_vomit_evthing == 98                           ~ "c) Not sure")) %>%
  dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1))))

fig_caption <- "Vomit everything reporting at clinical presentation over time"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- "Vomit everything"
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Unable to feed reporting over time

fig_df <- day0_data %>%
  dplyr::mutate("Unable to feed" = dplyr::case_when(
    sx_unable_feed == 1                          ~ "a) Yes",
    is.na(sx_unable_feed) | sx_unable_feed == 0  ~ "b) No",
    sx_unable_feed == 98                         ~ "c) Not sure")) %>%
  dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1))))

fig_caption <- "Unable to feed reporting at clinical presentation over time"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- "Unable to feed"
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Diagnoses

Missing diagnosis [Mandatory check r qc_missing_diagnosis_id]\n\n

write(" o Missing diagnosis", stderr())
qc_description <- "All participants should have complete information about their diagnosis for their Day 0 visit, i.e. a participant cannot have simultaneously:

* No danger signs (*dx_severe* = 0)
* No pneumonia (*dx_pneumonia* = 0)
* No diarrhoea (*dx_diarrhoea* = 0)
* No dehydration (*dx_dehydration* = 0)
* No malaria (*dx_malaria* = 0)
* No ear infection (*dx_ear_infection* = 0)
* No malnutrition (*dx_malnutrition* = 0)
* No anaemia (*dx_anaemia == 0*)
* No other diagnoses (*sx_var* = 96 and *dx_oth_yn* = 1)"
qc_rule <- action_alert_no_modification
qc_type <- "missing_diagnosis"
df <- day0_data
qc_text <- "missing diagnosis information"
qc_idx <- qc_missing_diagnosis_id
qc_export_label <- "missing_diagnosis"
qc_export_description <- "the diagnosis is missing"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_missing_diagnosis <- n_detected
fig_df <- qc_df %>%
  dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit),
                                             "week",
                                             week_start = getOption("lubridate.week.start", 1)))

fig_caption <- "Missing diagnosis"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- ""
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))
write("   o Free text entries", stderr())
fig_df <- day0_data %>%
  dplyr::mutate("Free text available" = dplyr::case_when(
    !is.na(dx_oth) ~ "a) Yes",
    .default       = "b) No")) %>%
  dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit),
                                             "week",
                                             week_start = getOption("lubridate.week.start", 1)))

fig_caption <- "Diagnosis free text entries"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- "Free text available"
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Referrals

Missing referral information [Mandatory check r qc_missing_referral_id]

write(" o Missing referral", stderr())
qc_description <- "All participants should have complete information about the management at the facility for their Day 0 visit. This information is collected from 2 soures: the caregiver at the exit of the consultation and the facility registries/consultation notes. However, caregivers may be lost at the exit of the consultation, so that the referral information from this source would be missing (*referral_cg* = 'NA')."
qc_rule <- action_alert_no_modification
qc_type <- "missing_referral"
df <- day0_data
qc_text <- "missing referral information (source: caregiver at the exit of the consultation)"
qc_idx <- qc_missing_referral_id
qc_export_label <- "missing_referral_cg"
qc_export_description <- "the referral information that should have been obtained from the caregiver at the exit of the consultation is missing"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_missing_referral_cg <- n_detected
fig_df <- day0_data %>%
  dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) %>% 
  dplyr::mutate(Referral = dplyr::case_when(
    referral_cg == 1 ~ "a) Referred",
    referral_cg == 0 ~ "b) Not referred",
    referral_cg == 98 ~ "c) Not sure",
    referral_cg == 97 ~ "d) Declined",
    is.na(referral_cg) ~ "e) Missing"))

fig_caption <- "Spatiotemporal pattern of referrals reported by the caregiver"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
comparison <- "type"
fill_col <- "Referral"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))
fig_caption <- "Spatiotemporal pattern of referrals recorded in the facility registries/notes"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
comparison <- "type"
fill_col <- "Referral"
fig_df <- day0_data %>%
  dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) %>% 
  dplyr::mutate(Referral = dplyr::case_when(
    referral_hf == 1 ~ "Referred",
    referral_hf == 0 ~ "Not referred"))

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Treatments

Missing treatment [Mandatory check r qc_missing_treatment_id]

write(" o Missing treatment", stderr())
qc_description <- "All participants should have complete information about the treatment at the facility for their Day 0 visit."
qc_rule <- action_alert_no_modification
qc_type <- "missing_treatment"
df <- day0_data
qc_text <- "missing treatment information (source: caregiver at the exit of the consultation)"
qc_idx <- paste0(qc_missing_treatment_id, "a")
qc_export_label <- "missing_treatment"
qc_export_description <- "the treatment information is missing"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_missing_treatment <- n_detected

Free text entries

write(" o Treatment free text entries", stderr())
fig_df <- day0_data %>%
  dplyr::mutate("Free text available" = dplyr::case_when(
    !is.na(rx_misc_oth) | !is.na(rx_misc_oth_hf) ~ "a) Yes",
    .default                                     = "b) No")) %>%
  dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit),
                                             "week",
                                             week_start = getOption("lubridate.week.start", 1)))
fig_df <- day0_data %>%
  dplyr::mutate("Free text available" = dplyr::case_when(
    !is.na(rx_misc_oth) ~ "a) Yes",
    .default            = "b) No")) %>%
  dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit),
                                             "week",
                                             week_start = getOption("lubridate.week.start", 1)))
fig_df <- day0_data %>%
  dplyr::mutate("Free text available" = dplyr::case_when(
    !is.na(rx_misc_oth_hf) ~ "a) Yes",
    .default               = "b) No")) %>%
  dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit),
                                             "week",
                                             week_start = getOption("lubridate.week.start", 1)))
fig_caption <- "Treatment free text entries"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
fill_col <- "Free text available"
comparison <- "type"

cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

drug_enable <- FALSE

Clinical measurements

day0_data <- day0_data %>%
  dplyr::mutate(across(c(spo2_meas1,
                         rr_meas),
                       as.numeric))
day0_data <- day0_data %>%
  dplyr::mutate(across(c(temp_meas),
                       as.numeric))
day0_data <- day0_data %>%
  dplyr::mutate(across(c(temp_meas_farenheit),
                       as.numeric))

Temperature [Reasonability checks]

write(" o Temperature", stderr())
day0_data %>%
  ggplot2::ggplot(ggplot2::aes(x = temp_meas)) +
  ggplot2::geom_histogram(ggplot2::aes(y = ..density..),
                          colour = "black",
                          fill = "white",
                          binwidth = 0.2) +
  ggplot2::geom_density(alpha = 0.2,
                        fill = "#FF6666") +
  ggplot2::geom_vline(aes(xintercept = 37.5),
                      color = "blue",
                      linetype = "dashed",
                      size = 1) +
  ggplot2::coord_cartesian(xlim = c(30, 43), ylim = c(0, 1))
dataMaid::identifyOutliers(day0_data$temp_meas,
                           nMax = 50)
day0_data %>%
  ggplot2::ggplot(ggplot2::aes(x = temp_meas_farenheit)) +
  ggplot2::geom_histogram(ggplot2::aes(y = ..density..),
                          colour = "black",
                          fill = "white",
                          binwidth = 0.2) + 
  ggplot2::geom_density(alpha = 0.2,
                        fill = "#FF6666") +
  ggplot2::geom_vline(aes(xintercept = 99.5),
                      color = "blue",
                      linetype = "dashed",
                      size = 1) +
  ggplot2::coord_cartesian(xlim = c(86, 110), ylim = c(0, 1))
dataMaid::identifyOutliers(day0_data$temp_meas_farenheit,
                           nMax = 50)
fig_df <- day0_data %>%
  dplyr::mutate(week = lubridate::floor_date(as.Date(end),
                                             "week",
                                             week_start = getOption("lubridate.week.start", 1)))
fig_caption <- "Spatiotemporal pattern of temperature values"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
y_is_time <- FALSE
comparison <- "type"
y_col <- "temp_meas"
y_lbl <- "Temperature (in °C)"
y_min <- 25
y_max <- 45
y_col <- "temp_meas_farenheit"
y_lbl <- "Temperature (in °F)"
y_min <- 90
y_max <- NA
cat(knitr::knit_child('database_export_sub_facet_scatter_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Pulse oximetry [Reasonability checks]

write(" o Pulse oximetry", stderr())
day0_data %>%
  ggplot2::ggplot(ggplot2::aes(x = spo2_meas1)) +
  ggplot2::geom_histogram(ggplot2::aes(y = ..density..),
                          colour = "black",
                          fill = "white",
                          binwidth = 1) + 
  ggplot2::geom_density(alpha = 0.2,
                        fill = "#FF6666") +
  ggplot2::coord_cartesian(xlim = c(75, 100), ylim = c(0, 0.6))

spo2_meas

dataMaid::identifyOutliers(day0_data$spo2_meas1,
                           nMax = 100)
fig_df <- day0_data %>%
  dplyr::mutate(week = lubridate::floor_date(as.Date(end),
                                             "week",
                                             week_start = getOption("lubridate.week.start", 1)))
fig_caption <- "Spatiotemporal pattern of oxygen saturation values"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
y_col <- "spo2_meas1"
y_lbl <- "Oxygen saturation (in %)"
y_is_time <- FALSE
y_min <- 0
y_max <- 100
comparison <- "type"
cat(knitr::knit_child('database_export_sub_facet_scatter_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))
mc_description <- "Correct SpO2 measure 1 values based on source data"
to_correct_df <- day0_data
correction_type <- "correct_spo2_meas1"

cat(knitr::knit_child('database_export_sub_corrections.Rmd',
                      envir = environment(),
                      quiet = TRUE))

day0_data <- corrected_df
n_spo2_meas1_edits <- n_mc
mc_description <- "Correct SpO2 measure 2 values based on source data"
to_correct_df <- day0_data
correction_type <- "correct_spo2_meas2"

cat(knitr::knit_child('database_export_sub_corrections.Rmd',
                      envir = environment(),
                      quiet = TRUE))

day0_data <- corrected_df
n_spo2_meas2_edits <- n_mc

Respiratory rate [Reasonability checks]

write(" o Respiratory rate", stderr())
day0_data %>%
  ggplot2::ggplot(ggplot2::aes(x = rr_meas)) +
  ggplot2::geom_histogram(ggplot2::aes(y = ..density..),
                          colour = "black",
                          fill = "white",
                          binwidth = 1) + 
  ggplot2::geom_density(alpha = 0.2,
                        fill = "#FF6666") +
  ggplot2::coord_cartesian(xlim = c(0, 150), ylim = c(0, 0.12))
dataMaid::identifyOutliers(day0_data$rr_meas,
                           nMax = 150)
fig_df <- day0_data %>%
  dplyr::mutate(week = lubridate::floor_date(as.Date(end),
                                             "week",
                                             week_start = getOption("lubridate.week.start", 1)))
fig_caption <- "Spatiotemporal pattern of respiratory rate values"
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
y_col <- "rr_meas"
y_lbl <- "Respiratory rate (in breaths/min)"
y_is_time <- FALSE
y_min <- 0
y_max <- 150
comparison <- "type"
cat(knitr::knit_child('database_export_sub_facet_scatter_plot.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Pseudonymisation

write(" o Pseudonymisation", stderr())
allday0_data <- day0_data
out <- timci::extract_pii(day0_data,
                          is_pilot)
day0_data_nopseudo <- out[[1]]

Pseudonymisation is performed using a cryptographic hash function (md5) that takes strings as input and produces a random-like fixed-length output.

day0_data <- day0_data_nopseudo %>%
  dplyr::rowwise() %>%
  dplyr::mutate(uuid = ifelse(uuid != "", digest(uuid, algo = crypto_algo), ""),
                child_id = ifelse(child_id != "", digest(child_id, algo = crypto_algo), ""),
                prev_id = ifelse(prev_id != "", digest(prev_id, algo = crypto_algo), ""),
                device_id = ifelse(device_id != "", digest(device_id, algo = crypto_algo), "")) %>%
  dplyr::ungroup()
n_cleaned_day0_records <- nrow(day0_data)

Data cleaning summary

write(" o Data cleaning summary", stderr())
timci::create_day0_qc_flowchart(n_raw_day0_records,
                                n_nonvalid_fid_records,
                                n_incorrect_enroldate_records,
                                n_dropped_true_duplicate_records,
                                n_true_duplicate_detected,
                                n_duplicate_records,
                                n_dropped_duplicate_records,
                                n_duplicate_edits,
                                n_drug_edits,
                                n_spo2_meas1_edits,
                                n_spo2_meas2_edits,
                                n_negative_illness_onset,
                                n_missing_cp,
                                n_missing_diagnosis,
                                n_missing_referral_cg,
                                n_cleaned_day0_records)

Data export

write(" o Data export", stderr())
timci::dataset_export(raw_day0_data,
                      "02",
                      "timci_day0_data",
                      rctls_dir,
                      "Raw Day 0 data")
timci::dataset_export(day0_data_nopseudo,
                      "02",
                      "timci_cleaned_day0_data_without_pseudonymisation",
                      rctls_dir,
                      "Cleaned Day 0 data")
timci::dataset_export(day0_data,
                      "02",
                      "timci_day0_data",
                      locked_db_dir,
                      "Cleaned Day 0 data")
ts <- timci::export_df2xlsx(raw_pii,
                            tempdir(),
                            "timci_contact_data")
pii_pwd <- Sys.getenv("TIMCI_PII_PW")
zip(params$participant_zip, 
    files = file.path(tempdir(), "timci_contact_data.xlsx"), 
    flags = paste("-r9Xj --password", pii_pwd))

Data summary statistics

day0_data <- day0_data %>%
  dplyr::mutate(across(c(form_version,
                         fid,
                         who_age_ctg,
                         district,
                         facility,
                         dob_knwn,
                         age_mo_knwn,
                         enrolled,
                         mother_education),
                       factor)) %>%
  dplyr::mutate(across(c(date_visit), ~format(as.Date(.), "%Y-%m-%d"))) %>% 
  dplyr::mutate(across(c(start), ~format(as.Date(.), "%Y-%m-%d %H:%M:%S")))
day0_data <- day0_data %>%
  dplyr::mutate(across(c(living_with_cg),
                       factor))
day0_data <- day0_data %>%
  dplyr::mutate(across(c(location_lvl1,
                         location_lvl2,
                         location_lvl3,
                         location_lvl4),
                       factor))
skimr::skim(day0_data)
rm(raw_day0_data)
rm(day0_data_nopseudo)
gc()


Thaliehln/timci documentation built on April 8, 2024, 3:38 p.m.