\newpage
write("Export Day 0 data and run corresponding quality checks", stderr())
########################### # Day 0 quality check IDs # ########################### qc_true_duplicates <- "DQC_00_2ED_05" qc_duplicate_id <- "DQC_00_2ED_06" qc_ambiguous_enrolment_date1 <- "DQC_00_2ED_07" qc_ambiguous_enrolment_date2 <- "DQC_00_2ED_08" qc_multiple_enrolment_id <- "DQC_00_2ED_09" qc_multiple_enrolment_sex_discrepancy_id <- "DQC_00_2ED_10" qc_negative_nb_days_id <- "DQC_00_2ED_11" qc_missing_cp_id <- "DQC_00_2ED_12" qc_missing_diagnosis_id <- "DQC_00_2ED_13" qc_missing_referral_id <- "DQC_00_2ED_14" qc_missing_treatment_id <- "DQC_00_2ED_15" qc_free_text_rx_id <- "DQC_00_2ED_16" qc_missing_structured_entry_amox <- "DQC_00_2ED_17" qc_missing_structured_entry_ctx <- "DQC_00_2ED_18" qc_pox_ <- "DQC_00_2ED_19" qc_summary_id <- "DQC_00_2ED_20"
n_true_duplicate_detected <- 0 n_dropped_true_duplicate_records <- 0 n_duplicate_records <- 0 n_duplicate_edits <- 0 n_dropped_duplicate_records <- 0 n_nonvalid_fid_records <- 0 n_incorrect_enroldate_records <- 0 n_missing_cp <- 0 n_missing_diagnosis <- 0 n_drug_edits <- 0 n_spo2_meas1_edits <- 0 n_spo2_meas2_edits <- 0
db_name <- "Day 0" day0_data <- facility_data %>% dplyr::filter(enrolled == 1) n_raw_day0_records <- nrow(day0_data) day0_is_not_null <- !is.null(day0_data) day0_is_not_empty <- timci::is_not_empty(day0_data) print_clean_drug_data_title <- "" drug_edits_is_not_empty <- FALSE
Among the r n_cleaned_screening_records
cleaned screening record(s), there are r n_raw_day0_records
record(s) corresponding to new Day 0 enrolment(s).
r qc_true_duplicates
]write(" o Management of true duplicates (same ID, same day, same name)", stderr())
r if ( !is_pilot ) { paste0('::: {custom-style="redparagraph"}\n\nInitial check ', qc_true_duplicates, 'a\n\n:::')}
qc_description <- "Participants who have been entered twice as new enrolments with the same ID on the same day, which creates duplicates in the database." qc_rule <- "True duplicates are detected and corrected where possible." qc_type <- "true_duplicates" df <- day0_data col_id <- "child_id" col_date <- "start" cleaning <- "none" qc_text <- "duplicated IDs" qc_idx <- paste0(qc_true_duplicates, "a") qc_export_label <- "timci_true_duplicates" qc_export_description <- "the data was entered in two different records on the same day" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_true_duplicate_detected <- n_detected
mc_description <- "Unless advised otherwise by the in-country data manager, the oldest record is deleted. This is assuming the research assistant created a new record because they felt a specific information was missing." to_correct_df <- day0_data correction_type <- "delete_day0_records" # Parameters for the quality check following manual corrections qc_idx <- paste0(qc_true_duplicates, "b") qc_rule <- "Remaining duplicated child IDs are deleted from the database." cleaning <- "none" cat(knitr::knit_child('database_export_sub_corrections.Rmd', envir = environment(), quiet = TRUE)) day0_data <- corrected_df n_dropped_true_duplicate_records <- n_mc
r qc_duplicate_id
]write(" o Management of duplicated child IDs", stderr())
r if ( !is_pilot ) { paste0('::: {custom-style="redparagraph"}\n\nInitial check ', qc_duplicate_id, 'a\n\n:::')}
qc_description <- "All participants should have a distinct ID. Duplicated IDs may generate errors for the conduct of follow-ups in the field and results in ambiguous outcomes for duplicated participants once the database is deidentified." qc_rule <- "Duplicated child IDs are detected and corrected proactively where possible." qc_type <- "day0_duplicates" df <- day0_data ref_df <- raw_successday7fu_data col_id <- "child_id" col_date <- "start" cleaning <- "none" qc_text <- "duplicated IDs" qc_idx <- paste0(qc_duplicate_id, 'a') qc_export_label <- "timci_day0_data_id_duplicates" qc_export_description <- "the submission was allocated the same ID that has been used for another participant in the databse" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_duplicate_records <- n_detected
fig_caption <- "Duplicated IDs" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "" comparison <- "type"
fig_df <- qc_df %>% dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(date_2), "week", week_start = getOption("lubridate.week.start", 1)))) %>% dplyr::mutate(fid = substr(id, 3, 7)) cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
initial_duplicates_df <- qc_df %>% dplyr::select(id) %>% dplyr::mutate(is_duplicate = 1)
mc_description <- "" to_correct_df <- day0_data correction_type <- "edit_day0_child_ids_to_correct_duplicates" # Parameters for the quality check following manual corrections qc_idx <- paste0(qc_duplicate_id, "b") qc_rule <- "Remaining duplicated child IDs are deleted from the database." cleaning <- "drop_all" cat(knitr::knit_child('database_export_sub_corrections.Rmd', envir = environment(), quiet = TRUE)) n_duplicate_edits <- n_mc
day0_data <- cleaned_df n_dropped_duplicate_records <- n_detected
r qc_ambiguous_enrolment_date1
]write(" o Ambiguities about the enrolment date", stderr())
qc_df <- NULL
qc_description <- "The enrolment date is defined as the creation (start) date of the form. However if research assistants reuse a form that already exist on their device to enrol a new participant, it is possible that the date on which the name of the participant is entered in the form is not on the same day it was started. As a consequence the enrolment date may be shifted and follow-up may not be triggered at the right time." qc_rule <- action_alert_no_modification qc_type <- "date_discrepancy" df <- day0_data col_date1 <- "start" col_date2 <- "contact_start" cleaning <- "replace_by_end_date" qc_text <- "an incorrect date of enrolment" qc_idx <- qc_ambiguous_enrolment_date1 qc_export_label <- "ambiguous_enrolment_date" qc_export_description <- "the enrolment date is not correct" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_incorrect_enroldate_records <- n_detected
fig_df <- qc_df %>% dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) %>% dplyr::mutate(Difference = ifelse(diff <= 1, "1 day", ifelse(diff < 3, "2-3 days", "Above 3 days"))) fig_caption <- "Number of days between submission start and entry of participant contact details" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "Difference" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
qc_description <- "The enrolment date is defined as the creation (start) date of the form. However if research assistants reuse a form that already exist on their device to enrol a new participant, it is possible that the date on which the name of the participant is entered in the form is not on the same day it was started. As a consequence the enrolment date may be shifted and follow-up may not be triggered at the right time." qc_rule <- action_alert_no_modification qc_type <- "date_discrepancy" df <- day0_data col_date1 <- "sd_start" col_date2 <- "start" cleaning <- "replace_by_end_date" qc_text <- "an incorrect date of enrolment" qc_idx <- qc_ambiguous_enrolment_date1 qc_export_label <- "ambiguous_enrolment_date1" qc_export_description <- "the enrolment date is not correct" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_incorrect_enroldate_records <- n_detected
qc_description <- "The enrolment date is defined as the creation (start) date of the form. However if research assistants reuse a form that already exist on their device to enrol a new participant, it is possible that the date on which the name of the participant is entered in the form is not on the same day it was started. As a consequence the enrolment date may be shifted and follow-up may not be triggered at the right time." qc_rule <- action_alert_no_modification qc_type <- "date_discrepancy" df <- day0_data col_date1 <- "start" col_date2 <- "sd_start" cleaning <- "replace_by_end_date" qc_text <- "an incorrect date of enrolment" qc_idx <- qc_ambiguous_enrolment_date2 qc_export_label <- "ambiguous_enrolment_date2" qc_export_description <- "the enrolment date is not correct" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_incorrect_enroldate_records <- n_detected
fig_df <- qc_df %>% dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) %>% dplyr::mutate(Difference = ifelse(diff <= 1, "1 day", ifelse(diff < 3, "2-3 days", "Above 3 days"))) fig_caption <- "Number of days between submission start and entry of participant contact details" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "Difference" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
r qc_missing_cp_id
]write(" o Missing clinical presentation", stderr())
qc_description <- "All participants should have complete information about their clinical presentation, i.e. a participant cannot have simultaneously: * No convulsions (*sx_convulsions* $\\neq$ 1) * No lethargy (*sx_lethargy* $\\neq$ 1) * No vomiting (*sx_vomit* = 0 or *sx_vomit* = 98) * No feeding less than usual (*sx_less_feed* = 0 or *sx_less_feed* = 98) * No cough (*sx_cough* = 0 or *sx_cough* = 98) * No difficulty breathing (*sx_difficulty_breath* = 0 or *sx_difficulty_breath* = 98) * No diarrhoea (*sx_diarrhoea* = 0 or *sx_diarrhoea* = 98) * No fever (*sx_fever* = 0 or *sx_fever* = 98) * No other complaint (*sx_var* = 96)" qc_rule <- action_alert_no_modification qc_type <- "missing_clinical_presentation" df <- day0_data qc_text <- "missing clinical presentation" qc_idx <- qc_missing_cp_id qc_export_label <- "missing_clinical_presentation" qc_export_description <- "the clinical presentation is missing" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_missing_cp <- n_detected
fig_df <- qc_df %>% dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit), "week", week_start = getOption("lubridate.week.start", 1))) %>% dplyr::mutate(Antibiotics = ifelse(antibio_has_been_prescribed == 1 | antibio_has_been_recorded, "Yes", "No")) fig_caption <- "Missing clinical presentation" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "Antibiotics" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
r qc_negative_nb_days_id
]write(" o Negative illness onset", stderr())
qc_description <- "Check for spurious values/outliers in illness onset: illness onset should be positive values (number of days) only." qc_rule <- action_alert_no_modification qc_type <- "negative_value" df <- day0_data col_value <- "sick_duration" qc_text <- "negative illness onset" qc_idx <- qc_negative_nb_days_id qc_export_label <- "negative_illness_onset" qc_export_description <- "illness onset is negative" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_negative_illness_onset <- n_detected
fig_df <- day0_data %>% dplyr::mutate(Convulsions = dplyr::case_when( sx_convulsions == 1 ~ "a) Yes", sx_convulsions == 0 ~ "b) No", sx_convulsions == 98 ~ "c) Not sure")) %>% dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) fig_caption <- "Convulsion reporting at clinical presentation over time" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "Convulsions" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
fig_df <- day0_data %>% dplyr::mutate(Lethargy = dplyr::case_when( sx_lethargy == 1 ~ "a) Yes", sx_lethargy == 0 ~ "b) No", sx_lethargy == 98 ~ "c) Not sure")) %>% dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) fig_caption <- "Lethargy reporting at clinical presentation over time" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "Lethargy" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
fig_df <- day0_data %>% dplyr::mutate("Vomit everything" = dplyr::case_when( sx_vomit_evthing == 1 ~ "a) Yes", is.na(sx_vomit_evthing) | sx_vomit_evthing == 0 ~ "b) No", sx_vomit_evthing == 98 ~ "c) Not sure")) %>% dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) fig_caption <- "Vomit everything reporting at clinical presentation over time" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "Vomit everything" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
fig_df <- day0_data %>% dplyr::mutate("Unable to feed" = dplyr::case_when( sx_unable_feed == 1 ~ "a) Yes", is.na(sx_unable_feed) | sx_unable_feed == 0 ~ "b) No", sx_unable_feed == 98 ~ "c) Not sure")) %>% dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) fig_caption <- "Unable to feed reporting at clinical presentation over time" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "Unable to feed" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
r qc_missing_diagnosis_id
]\n\nwrite(" o Missing diagnosis", stderr())
qc_description <- "All participants should have complete information about their diagnosis for their Day 0 visit, i.e. a participant cannot have simultaneously: * No danger signs (*dx_severe* = 0) * No pneumonia (*dx_pneumonia* = 0) * No diarrhoea (*dx_diarrhoea* = 0) * No dehydration (*dx_dehydration* = 0) * No malaria (*dx_malaria* = 0) * No ear infection (*dx_ear_infection* = 0) * No malnutrition (*dx_malnutrition* = 0) * No anaemia (*dx_anaemia == 0*) * No other diagnoses (*sx_var* = 96 and *dx_oth_yn* = 1)" qc_rule <- action_alert_no_modification qc_type <- "missing_diagnosis" df <- day0_data qc_text <- "missing diagnosis information" qc_idx <- qc_missing_diagnosis_id qc_export_label <- "missing_diagnosis" qc_export_description <- "the diagnosis is missing" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_missing_diagnosis <- n_detected
fig_df <- qc_df %>% dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit), "week", week_start = getOption("lubridate.week.start", 1))) fig_caption <- "Missing diagnosis" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
write(" o Free text entries", stderr())
fig_df <- day0_data %>% dplyr::mutate("Free text available" = dplyr::case_when( !is.na(dx_oth) ~ "a) Yes", .default = "b) No")) %>% dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit), "week", week_start = getOption("lubridate.week.start", 1))) fig_caption <- "Diagnosis free text entries" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "Free text available" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
r qc_missing_referral_id
]write(" o Missing referral", stderr())
qc_description <- "All participants should have complete information about the management at the facility for their Day 0 visit. This information is collected from 2 soures: the caregiver at the exit of the consultation and the facility registries/consultation notes. However, caregivers may be lost at the exit of the consultation, so that the referral information from this source would be missing (*referral_cg* = 'NA')." qc_rule <- action_alert_no_modification qc_type <- "missing_referral" df <- day0_data qc_text <- "missing referral information (source: caregiver at the exit of the consultation)" qc_idx <- qc_missing_referral_id qc_export_label <- "missing_referral_cg" qc_export_description <- "the referral information that should have been obtained from the caregiver at the exit of the consultation is missing" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_missing_referral_cg <- n_detected
fig_df <- day0_data %>% dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) %>% dplyr::mutate(Referral = dplyr::case_when( referral_cg == 1 ~ "a) Referred", referral_cg == 0 ~ "b) Not referred", referral_cg == 98 ~ "c) Not sure", referral_cg == 97 ~ "d) Declined", is.na(referral_cg) ~ "e) Missing")) fig_caption <- "Spatiotemporal pattern of referrals reported by the caregiver" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" comparison <- "type" fill_col <- "Referral" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
fig_caption <- "Spatiotemporal pattern of referrals recorded in the facility registries/notes" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" comparison <- "type" fill_col <- "Referral"
fig_df <- day0_data %>% dplyr::mutate(week = as.Date(lubridate::floor_date(as.Date(start), "week", week_start = getOption("lubridate.week.start", 1)))) %>% dplyr::mutate(Referral = dplyr::case_when( referral_hf == 1 ~ "Referred", referral_hf == 0 ~ "Not referred")) cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
r qc_missing_treatment_id
]write(" o Missing treatment", stderr())
qc_description <- "All participants should have complete information about the treatment at the facility for their Day 0 visit." qc_rule <- action_alert_no_modification qc_type <- "missing_treatment" df <- day0_data qc_text <- "missing treatment information (source: caregiver at the exit of the consultation)" qc_idx <- paste0(qc_missing_treatment_id, "a") qc_export_label <- "missing_treatment" qc_export_description <- "the treatment information is missing" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) n_missing_treatment <- n_detected
write(" o Treatment free text entries", stderr())
fig_df <- day0_data %>% dplyr::mutate("Free text available" = dplyr::case_when( !is.na(rx_misc_oth) | !is.na(rx_misc_oth_hf) ~ "a) Yes", .default = "b) No")) %>% dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit), "week", week_start = getOption("lubridate.week.start", 1)))
fig_df <- day0_data %>% dplyr::mutate("Free text available" = dplyr::case_when( !is.na(rx_misc_oth) ~ "a) Yes", .default = "b) No")) %>% dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit), "week", week_start = getOption("lubridate.week.start", 1)))
fig_df <- day0_data %>% dplyr::mutate("Free text available" = dplyr::case_when( !is.na(rx_misc_oth_hf) ~ "a) Yes", .default = "b) No")) %>% dplyr::mutate(week = lubridate::floor_date(as.Date(date_visit), "week", week_start = getOption("lubridate.week.start", 1)))
fig_caption <- "Treatment free text entries" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" fill_col <- "Free text available" comparison <- "type" cat(knitr::knit_child('database_export_sub_facet_bar_plot.Rmd', envir = environment(), quiet = TRUE))
drug_enable <- FALSE
day0_data <- day0_data %>% dplyr::mutate(across(c(spo2_meas1, rr_meas), as.numeric))
day0_data <- day0_data %>% dplyr::mutate(across(c(temp_meas), as.numeric))
day0_data <- day0_data %>% dplyr::mutate(across(c(temp_meas_farenheit), as.numeric))
write(" o Temperature", stderr())
day0_data %>% ggplot2::ggplot(ggplot2::aes(x = temp_meas)) + ggplot2::geom_histogram(ggplot2::aes(y = ..density..), colour = "black", fill = "white", binwidth = 0.2) + ggplot2::geom_density(alpha = 0.2, fill = "#FF6666") + ggplot2::geom_vline(aes(xintercept = 37.5), color = "blue", linetype = "dashed", size = 1) + ggplot2::coord_cartesian(xlim = c(30, 43), ylim = c(0, 1))
dataMaid::identifyOutliers(day0_data$temp_meas, nMax = 50)
day0_data %>% ggplot2::ggplot(ggplot2::aes(x = temp_meas_farenheit)) + ggplot2::geom_histogram(ggplot2::aes(y = ..density..), colour = "black", fill = "white", binwidth = 0.2) + ggplot2::geom_density(alpha = 0.2, fill = "#FF6666") + ggplot2::geom_vline(aes(xintercept = 99.5), color = "blue", linetype = "dashed", size = 1) + ggplot2::coord_cartesian(xlim = c(86, 110), ylim = c(0, 1))
dataMaid::identifyOutliers(day0_data$temp_meas_farenheit, nMax = 50)
fig_df <- day0_data %>% dplyr::mutate(week = lubridate::floor_date(as.Date(end), "week", week_start = getOption("lubridate.week.start", 1)))
fig_caption <- "Spatiotemporal pattern of temperature values" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" y_is_time <- FALSE comparison <- "type"
y_col <- "temp_meas" y_lbl <- "Temperature (in °C)" y_min <- 25 y_max <- 45
y_col <- "temp_meas_farenheit" y_lbl <- "Temperature (in °F)" y_min <- 90 y_max <- NA
cat(knitr::knit_child('database_export_sub_facet_scatter_plot.Rmd', envir = environment(), quiet = TRUE))
write(" o Pulse oximetry", stderr())
day0_data %>% ggplot2::ggplot(ggplot2::aes(x = spo2_meas1)) + ggplot2::geom_histogram(ggplot2::aes(y = ..density..), colour = "black", fill = "white", binwidth = 1) + ggplot2::geom_density(alpha = 0.2, fill = "#FF6666") + ggplot2::coord_cartesian(xlim = c(75, 100), ylim = c(0, 0.6))
spo2_meas
dataMaid::identifyOutliers(day0_data$spo2_meas1, nMax = 100)
fig_df <- day0_data %>% dplyr::mutate(week = lubridate::floor_date(as.Date(end), "week", week_start = getOption("lubridate.week.start", 1)))
fig_caption <- "Spatiotemporal pattern of oxygen saturation values" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" y_col <- "spo2_meas1" y_lbl <- "Oxygen saturation (in %)" y_is_time <- FALSE y_min <- 0 y_max <- 100 comparison <- "type"
cat(knitr::knit_child('database_export_sub_facet_scatter_plot.Rmd', envir = environment(), quiet = TRUE))
mc_description <- "Correct SpO2 measure 1 values based on source data" to_correct_df <- day0_data correction_type <- "correct_spo2_meas1" cat(knitr::knit_child('database_export_sub_corrections.Rmd', envir = environment(), quiet = TRUE)) day0_data <- corrected_df n_spo2_meas1_edits <- n_mc
mc_description <- "Correct SpO2 measure 2 values based on source data" to_correct_df <- day0_data correction_type <- "correct_spo2_meas2" cat(knitr::knit_child('database_export_sub_corrections.Rmd', envir = environment(), quiet = TRUE)) day0_data <- corrected_df n_spo2_meas2_edits <- n_mc
write(" o Respiratory rate", stderr())
day0_data %>% ggplot2::ggplot(ggplot2::aes(x = rr_meas)) + ggplot2::geom_histogram(ggplot2::aes(y = ..density..), colour = "black", fill = "white", binwidth = 1) + ggplot2::geom_density(alpha = 0.2, fill = "#FF6666") + ggplot2::coord_cartesian(xlim = c(0, 150), ylim = c(0, 0.12))
dataMaid::identifyOutliers(day0_data$rr_meas, nMax = 150)
fig_df <- day0_data %>% dplyr::mutate(week = lubridate::floor_date(as.Date(end), "week", week_start = getOption("lubridate.week.start", 1)))
fig_caption <- "Spatiotemporal pattern of respiratory rate values" facility_col <- "fid" date_col <- "week" date_lbl <- "Weeks" date_format <- "%b%y" y_col <- "rr_meas" y_lbl <- "Respiratory rate (in breaths/min)" y_is_time <- FALSE y_min <- 0 y_max <- 150 comparison <- "type"
cat(knitr::knit_child('database_export_sub_facet_scatter_plot.Rmd', envir = environment(), quiet = TRUE))
write(" o Pseudonymisation", stderr())
allday0_data <- day0_data out <- timci::extract_pii(day0_data, is_pilot) day0_data_nopseudo <- out[[1]]
Pseudonymisation is performed using a cryptographic hash function (md5) that takes strings as input and produces a random-like fixed-length output.
day0_data <- day0_data_nopseudo %>% dplyr::rowwise() %>% dplyr::mutate(uuid = ifelse(uuid != "", digest(uuid, algo = crypto_algo), ""), child_id = ifelse(child_id != "", digest(child_id, algo = crypto_algo), ""), prev_id = ifelse(prev_id != "", digest(prev_id, algo = crypto_algo), ""), device_id = ifelse(device_id != "", digest(device_id, algo = crypto_algo), "")) %>% dplyr::ungroup()
n_cleaned_day0_records <- nrow(day0_data)
write(" o Data cleaning summary", stderr())
timci::create_day0_qc_flowchart(n_raw_day0_records, n_nonvalid_fid_records, n_incorrect_enroldate_records, n_dropped_true_duplicate_records, n_true_duplicate_detected, n_duplicate_records, n_dropped_duplicate_records, n_duplicate_edits, n_drug_edits, n_spo2_meas1_edits, n_spo2_meas2_edits, n_negative_illness_onset, n_missing_cp, n_missing_diagnosis, n_missing_referral_cg, n_cleaned_day0_records)
write(" o Data export", stderr())
timci::dataset_export(raw_day0_data, "02", "timci_day0_data", rctls_dir, "Raw Day 0 data")
timci::dataset_export(day0_data_nopseudo, "02", "timci_cleaned_day0_data_without_pseudonymisation", rctls_dir, "Cleaned Day 0 data")
timci::dataset_export(day0_data, "02", "timci_day0_data", locked_db_dir, "Cleaned Day 0 data")
ts <- timci::export_df2xlsx(raw_pii, tempdir(), "timci_contact_data") pii_pwd <- Sys.getenv("TIMCI_PII_PW") zip(params$participant_zip, files = file.path(tempdir(), "timci_contact_data.xlsx"), flags = paste("-r9Xj --password", pii_pwd))
day0_data <- day0_data %>% dplyr::mutate(across(c(form_version, fid, who_age_ctg, district, facility, dob_knwn, age_mo_knwn, enrolled, mother_education), factor)) %>% dplyr::mutate(across(c(date_visit), ~format(as.Date(.), "%Y-%m-%d"))) %>% dplyr::mutate(across(c(start), ~format(as.Date(.), "%Y-%m-%d %H:%M:%S")))
day0_data <- day0_data %>% dplyr::mutate(across(c(living_with_cg), factor))
day0_data <- day0_data %>% dplyr::mutate(across(c(location_lvl1, location_lvl2, location_lvl3, location_lvl4), factor))
skimr::skim(day0_data)
rm(raw_day0_data) rm(day0_data_nopseudo) gc()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.