\newpage

write("Export Day 7 follow-up outcome data (successful follow-ups only) and run corresponding quality checks", stderr())

Day 7 follow-up outcome data quality checks

n_death_prior_day0_day7fu <- 0
n_hospit_prior_day0_day7fu <- 0
n_death_prior_hospit_day7fu <- 0

This section only focus on successful follow-ups, i.e. follow-ups where the participant was successfully reached and where follow-up outcomes were collected.

day7fu_data <- allday7fu_data %>%
  dplyr::filter( proceed_day7 == 1 )
n_raw_successday7fu_records <- nrow(day7fu_data)
day7fu_is_not_null <- !is.null(day7fu_data)
day7fu_is_not_empty <- timci::is_not_empty(day7fu_data)

Among the r n_cleaned_allday7fu_records cleaned r db_name record(s), there are r n_raw_successday7fu_records record(s) corresponding to successful Day 7 follow-up(s).

day7fu_data <- day7fu_data %>%
  dplyr::mutate(window = ifelse(days >= 7 & days <= 10, 1, 0))

Duplicate management

Successful follow-up duplicates [Context check r qc_duplicated_day7fu]

write(" o Successful follow-up duplicates", stderr())

r if (is_kenya) {'##### Initial check'}

qc_description <- "It is possible to have more than one successful follow-up records available for the same participant."
qc_rule <- "Identify duplicated records that correspond to different participants (participants for whom duplicates were created at enrolment) and reallocate these records to the appropriate participant."
qc_type <- "duplicates_with_names"
df <- day7fu_data
col_id <- "child_id"
col_date <- "start"
cleaning <- "none"
qc_text <- "duplicated IDs"
qc_idx <- qc_duplicated_day7fu
qc_export_label <- "duplicated_successful_day7fu"
qc_export_description <- "Day 7 follow-ups are duplicated"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))

r if (is_kenya) {'##### Child ID duplicate manual edits'}

out <- timci::correct_day7_duplicates(day7fu_data)
day7fu_data <- out[[1]]
duplicate_edits <- out[[2]]
if (!is.null(duplicate_edits)) {
  duplicate_edits %>%
    select(old_child_id, uuid, new_child_id) %>%
    knitr::kable()
}

r if (is_kenya) {'##### Final check after manual edits'}

qc_description <- "It is possible to have more than one successful follow-up records available for the same participant. In this case, following the guidance from the statistical analysis plan, only the most recent successful Day 7 follow-up is kept."
qc_rule <- "Delete all older records and keep only the most recent when more than one successful follow-up is available for the same participant."
qc_type <- "duplicates_with_names"
df <- day7fu_data
col_id <- "child_id"
col_date <- "start"
cleaning <- "keep_latest"
qc_text <- "duplicated IDs"
qc_idx <- qc_duplicated_day7fu
qc_export_label <- "duplicated_successful_day7fu"
qc_export_description <- "Day 7 follow-ups are duplicated"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_dropped_duplicate_day7fu_records <- nrow(day7fu_data) - nrow(cleaned_df)
day7fu_data <- cleaned_df

Dates

Dates of follow-up

df <- day7fu_data %>% 
  merge(allday0_data %>%
          dplyr::select(child_id,
                        date_visit),
        by = "child_id",
        all.x = TRUE)
ggplot2::ggplot(df %>%
                  dplyr::mutate(diff = floor(difftime(date_call, date_visit, units = "days"))) %>% 
                  dplyr::mutate(week = lubridate::floor_date(as.Date(date_call),
                                                             "week",
                                                             week_start = getOption("lubridate.week.start", 1))),
                ggplot2::aes(x = week, y = diff)) +
  ggplot2::geom_point() +
  ggplot2::labs(x = "Weeks",
                y = "Number of days between enrolment and Day 7 follow-up") +
  ggplot2::scale_x_date(date_breaks = dbreak_duration,
                        date_labels = "%b%y") +
  ggplot2::facet_wrap(~ fid,
                      ncol = nfacetperrow) +
  ggplot2::theme(text = element_text(size = tsize),
                 panel.grid.major.x = element_blank(),
                 panel.grid.minor.x = element_blank())

Follow-up anterior to enrolment [Context check r qc_call_before_enrolment_day7fu]

write(" o Check for follow-up date anterior to date of enrolment", stderr())
qc_description <- "Follow-up should have happened 7 days after enrolment on Day 0 and cannot have happened before the date of enrolment."
qc_rule <- action_alert_no_modification
qc_type <- "date_discrepancy"
col_date1 <- "date_call"
col_date2 <- "date_visit"
qc_text <- "a date of call before the enrolment date"
qc_idx <- qc_call_before_enrolment_day7fu
qc_export_label <- "followup_before_enrolment"
qc_export_description <- "the follow-up was conducted before the enrolment date"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))

n_fu_prior_day0_day7fu <- n_detected

Missing date of hospitalisation [Mandatory check r qc_missing_hospit_date_day7fu]

write(" o Missing date of hospitalisation", stderr())
qc_description <- "All hospitalised participants should have complete information about their hospital visit/hospitalisation."
qc_rule <- action_alert_no_modification
qc_type <- "missing_value"
df <- day7fu_data %>%
  dplyr::mutate(all_hf_visit_day7 = ifelse(status_day7 == 2 | admission == 1,
                                           1,
                                           0)) %>%
  dplyr::filter(all_hf_visit_day7 == 1)
col_value <- "date_hosp_day7"
qc_text <- "missing hospitalisation date"
qc_idx <- qc_missing_hospit_date_day7fu
qc_export_label <- "missing_hospitalisation_date"
qc_export_description <- "the date of hospital visit/hospitalisation is missing"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_missing_hospit_date <- n_detected

Dates of hospitalisation

write(" o Visualisation of dates of hospitalisation", stderr())
df <- day7fu_data %>% 
  merge(allday0_data %>%
          dplyr::select(child_id,
                        date_visit),
        by = "child_id",
        all.x = TRUE)
fig_caption <- "Spatiotemporal pattern of number of days between "
facility_col <- "fid"
date_col <- "week"
date_lbl <- "Weeks"
date_format <- "%b%y"
y_col <- "start_time1"
y_lbl <- "Start time of record entry"
y_is_time <- TRUE
time_break_str <- "4 hours"
time_format <- "%H:%M"
comparison <- "type"
ggplot2::ggplot(df %>%
                  dplyr::mutate(diff = floor(difftime(date_hosp_day7, date_visit, units = "days"))) %>% 
                  dplyr::mutate(week = lubridate::floor_date(as.Date(date_call),
                                                             "week",
                                                             week_start = getOption("lubridate.week.start", 1))),
                ggplot2::aes(x = week, y = diff)) +
  ggplot2::geom_point() +
  ggplot2::labs(x = "Weeks",
                y = "Number of days between enrolment and hospitalisation") +
  ggplot2::scale_x_date(date_breaks = dbreak_duration,
                        date_labels = "%b%y") +
  ggplot2::facet_wrap(~ fid,
                      ncol = nfacetperrow) +
  ggplot2::theme(text = element_text(size = tsize),
                 panel.grid.major.x = element_blank(),
                 panel.grid.minor.x = element_blank())

Hospitalisations anterior to enrolment [Context check r qc_hospit_before_enrolment_day7fu]

write(" o Invalid date of hospitalisation", stderr())
qc_description <- "The hospital visit should have happened after enrolment on Day 0."
qc_rule <- action_alert_no_modification
qc_type <- "date_discrepancy_fu"
df <- day7fu_data %>% 
  merge(allday0_data %>%
          dplyr::select(child_id,
                        date_visit,
                        hospit,
                        journey,
                        prev_hf_type,
                        prev_hosp),
        by = "child_id",
        all.x = TRUE)
col_date1 <- "date_hosp_day7"
col_date2 <- "date_visit"
fu_cols <- c("hospit", "journey", "prev_hf_type", "prev_hosp")
qc_text <- "a date of hospitalisation before the enrolment date"
qc_idx <- qc_hospit_before_enrolment_day7fu
qc_export_label <- "hospit_before_enrolment"
qc_export_description <- "the reported date of hospitalisation was before the enrolment date"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_hospit_prior_day0_day7fu <- n_detected

Missing date of death [Mandatory check r qc_missing_death_date_day7fu]

write(" o Missing date of death", stderr())
qc_description <- "All deceased participants should have complete information about their death."
qc_rule <- action_alert_no_modification
qc_type <- "missing_value"
col_value <- "date_death_day7"
df <- day7fu_data %>%
  dplyr::filter(status_day7 == 3)
qc_text <- "missing death date"
qc_idx <- qc_missing_death_date_day7fu
qc_export_label <- "missing_death_date"
qc_export_description <- "the date of death is missing"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_missing_death_date <- n_detected

Dates of death

df <- day7fu_data %>% 
  merge(allday0_data %>%
          dplyr::select(child_id,
                        date_visit),
        by = "child_id",
        all.x = TRUE)
ggplot2::ggplot(df %>%
                  dplyr::mutate(diff = floor(difftime(date_death_day7, date_visit, units = "days"))) %>% 
                  dplyr::mutate(week = lubridate::floor_date(as.Date(date_call),
                                                             "week",
                                                             week_start = getOption("lubridate.week.start", 1))),
                ggplot2::aes(x = week, y = diff)) +
  ggplot2::geom_point() +
  ggplot2::labs(x = "Weeks",
                y = "Number of days between enrolment and death (as reported at Day 7 follow-up)") +
  ggplot2::scale_x_date(date_breaks = dbreak_duration,
                        date_labels = "%b%y") +
  ggplot2::facet_wrap(~ fid,
                      ncol = nfacetperrow) +
  ggplot2::theme(text = element_text(size = tsize),
                 panel.grid.major.x = element_blank(),
                 panel.grid.minor.x = element_blank())
ggplot2::ggplot(df %>%
                  dplyr::mutate(diff = floor(difftime(date_death_day7, date_hosp_day7, units = "days"))) %>% 
                  dplyr::mutate(week = lubridate::floor_date(as.Date(date_call),
                                                             "week",
                                                             week_start = getOption("lubridate.week.start", 1))),
                ggplot2::aes(x = week, y = diff)) +
  ggplot2::geom_point() +
  ggplot2::labs(x = "Weeks",
                y = "Number of days between enrolment and hospitalisation") +
  ggplot2::scale_x_date(date_breaks = dbreak_duration,
                        date_labels = "%b%y") +
  ggplot2::facet_wrap(~ fid,
                      ncol = nfacetperrow) +
  ggplot2::theme(text = element_text(size = tsize),
                 panel.grid.major.x = element_blank(),
                 panel.grid.minor.x = element_blank())

Deaths anterior to enrolment [Context check r qc_death_before_enrolment_day7fu]

write(" o Check for date of death anterior to date of enrolment", stderr())
qc_description <- "Death should have happened after enrolment on Day 0."
qc_rule <- action_alert_no_modification
qc_type <- "date_discrepancy"
col_date1 <- "date_death_day7"
col_date2 <- "date_visit"
qc_text <- "a date of death before the enrolment date"
qc_idx <- qc_death_before_enrolment_day7fu
qc_export_label <- "death_before_enrolment"
qc_export_description <- "the reported date of death was before the enrolment date"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))

n_death_prior_day0_day7fu <- n_detected

Deaths anterior to hospitalisation [Context check r qc_death_before_hospit_day7fu]

write(" o Check for date of death anterior to date of hospitalisation", stderr())
qc_description <- "Death should have happened after hospital visit/hospitalisation."
qc_rule <- action_alert_no_modification
qc_type <- "date_discrepancy"
col_date1 <- "date_death_day7"
col_date2 <- "date_hosp_day7"
qc_text <- "a date of death before the hospital visit/hospitalisation date"
qc_idx <- qc_death_before_hospit_day7fu
qc_export_label <- "death_before_hospit"
qc_export_description <- "the reported date of death was before the hospital visit/hospitalisation date"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_death_prior_hospit_day7fu <- n_detected

Pseudonymisation

write(" o Pseudonymisation", stderr())

The columns listed in the table below are dropped from the cleaned r db_name database.

day7fu_pii_drops %>%
  dplyr::select(new) %>%
  knitr::kable(row.names = FALSE,
               col.names = c("Database reference"),
               caption = "Columns dropped for the cleaned data export")
day7fu_data_no_pii <- day7fu_data %>%
  dplyr::select(dplyr::any_of(c(day7fu_deidentified_dict$new)))

Pseudonymisation is performed using a cryptographic hash function (md5) that takes strings as input (variables uuid,child_id, and device_id) and produces a random-like fixed-length output.

day7fu_data_no_pii <- day7fu_data_no_pii %>%
  dplyr::rowwise() %>%
  dplyr::mutate(uuid = ifelse(uuid != "", digest(uuid, algo = crypto_algo), ""),
                child_id = ifelse(child_id != "", digest(child_id, algo = crypto_algo), ""),
                device_id = ifelse(device_id != "", digest(device_id, algo = crypto_algo), "")) %>%
  dplyr::ungroup()
n_cleaned_day7fu_records <- nrow(day7fu_data_no_pii)

Data cleaning summary

write(" o Data cleaning summary", stderr())
timci::create_day7fu_outcome_qc_flowchart(n_raw_successday7fu_records,
                                          n_dropped_duplicate_day7fu_records,
                                          n_fu_prior_day0_day7fu,
                                          n_death_prior_day0_day7fu,
                                          n_hospit_prior_day0_day7fu,
                                          n_death_prior_hospit_day7fu,
                                          n_cleaned_day7fu_records)

Data overview

day7fu_data_no_pii <- day7fu_data_no_pii %>%
  dplyr::mutate(across(c(device_id,
                         cg_reached,
                         cg_ok,
                         status_day7,
                         cure_day7,
                         admission),
                       factor)) %>%
  dplyr::mutate(across(c(date_call,
                         date_hosp_day7),
                       ~format(as.Date(.), "%Y-%m-%d"))) %>% 
  dplyr::mutate(dplyr::across(c(start),
                              ~format(as.POSIXct(.))))

if ( "location_death_day7" %in% colnames(day7fu_data_no_pii) ) {
  day7fu_data_no_pii <- day7fu_data_no_pii %>%
    dplyr::mutate(dplyr::across(c(location_death_day7),
                                as.character))
}
day7fu_data_no_pii <- day7fu_data_no_pii %>%
  dplyr::mutate(across(c(call_ok),
                       factor))
skimr::skim(day7fu_data_no_pii)

Data export

write(" o Data export", stderr())
timci::dataset_export(raw_successday7fu_data,
                      "04b",
                      "timci_followup_successful_day7_data",
                      rctls_dir,
                      "Raw successful Day 7 follow-up only)")
timci::dataset_export(day7fu_data_no_pii,
                      "04b",
                      "timci_followup_successful_day7_data",
                      locked_db_dir,
                      "Cleaned successful Day 7 follow-up data")
rm(raw_successday7fu_data)
gc()


Thaliehln/timci documentation built on April 8, 2024, 3:38 p.m.