\newpage

write("Export Day 28 follow-up outcome data (successful follow-ups only) and run corresponding quality checks", stderr())

Day 28 follow-up outcome data quality checks

n_fu_prior_day0_day28fu <- 0
n_death_prior_day0_day28fu <- 0
n_hospit_prior_day0_day28fu <- 0
n_death_prior_hospit_day28fu <- 0

This section only focus on successful follow-ups, i.e. follow-ups where the participant was successfully reached and where follow-up outcomes were collected.

day28fu_data <- allday28fu_data %>%
  dplyr::filter( proceed_day28 == 1 )
n_raw_successday28fu_records <- nrow(day28fu_data)
day28fu_is_not_null <- !is.null(day28fu_data)
day28fu_is_not_empty <- timci::is_not_empty(day28fu_data)

Among the r n_cleaned_allday28fu_records cleaned r db_name record(s), there are r n_raw_successday28fu_records record(s) corresponding to successful Day 28 follow-up(s).

day28fu_data <- day28fu_data %>%
  dplyr::mutate(window = ifelse(days >= 28 & days <= 32, 1, 0))

Duplicate management

Successful follow-up duplicates [Compliance check r qc_duplicated_day28fu]

qc_description <- "It is possible to have more than one successful follow-up records available for the same participant. In this case, following the guidance from the statistical analysis plan, Only the most recent successful Day 28 follow-up is kept."
qc_rule <- "Delete all older records and keep only the most recent when more than one successful follow-up is available for the same participant."
qc_type <- "duplicates"
df <- day28fu_data
col_id <- "child_id"
col_date <- "start"
cleaning <- "keep_latest"
qc_text <- "duplicated IDs"
qc_idx <- qc_duplicated_day28fu
qc_export_label <- "duplicated_successful_day28fu"
qc_export_description <- "Day 28 follow-ups are duplicated"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
n_dropped_duplicate_day28fu_records <- nrow(day28fu_data) - nrow(cleaned_df)
day28fu_data <- cleaned_df

Missing date of hospitalisation [Mandatory check r qc_missing_hospit_date_day28fu]

write(" o Missing date of hospitalisation", stderr())

Invalid date of hospitalisation [Context check r qc_hospit_before_enrolment_day28fu]

write(" o Invalid date of hospitalisation", stderr())
qc_description <- "The reported hospital visit should have happened between enrolment at Day 0 and the Day 28 follow-up."
qc_rule <- action_alert_no_modification
qc_type <- "date_discrepancy_fu"
df <- day28fu_data %>%
  merge(day0_data %>%
          dplyr::select(child_id,
                        date_visit,
                        hospit,
                        journey,
                        prev_hf_type,
                        prev_hosp),
        by = "child_id",
        all.x = TRUE) %>%
  merge(day7fu_data %>%
          dplyr::select(child_id,
                        date_hosp_day7),
        by = "child_id",
        all.x = TRUE)

col_date1 <- "date_hosp_day28"
col_date2 <- "date_visit"
fu_cols <- c("hospit", "journey", "prev_hf_type", "prev_hosp")
qc_text <- "a date of hospitalisation before the enrolment date"
qc_idx <- qc_hospit_before_enrolment_day28fu
qc_export_label <- "hospit_before_enrolment"
qc_export_description <- "the reported date of hospitalisation was before the enrolment date"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Missing date of death [Mandatory check r qc_missing_death_date_day28fu]

write(" o Missing date of death", stderr())

Invalid date of death [Context check r qc_death_before_enrolment_day28fu]

write(" o Invalid date of death", stderr())
qc_description <- "The reported death should have happened between enrolment at Day 0 and the Day 28 follow-up."
qc_rule <- action_alert_no_modification
qc_type <- "date_discrepancy"
df <- day28fu_data %>%
  merge(day0_data %>%
          dplyr::select(child_id,
                        date_visit),
        by = "child_id",
        all.x = TRUE)
col_date1 <- "date_death_day28"
col_date2 <- "date_visit"
qc_text <- "a date of death before the enrolment date"
qc_idx <- qc_death_before_enrolment_day28fu
qc_export_label <- "death_before_enrolment"
qc_export_description <- "the reported date of death was before the enrolment date"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))

Pseudonymisation

write(" o Pseudonymisation", stderr())

The columns listed in the table below are dropped from the cleaned r db_name database.

day28fu_pii_drops %>%
  dplyr::select(new) %>%
  knitr::kable(row.names = FALSE,
               col.names = c("Database reference"),
               caption = "Columns dropped for the cleaned data export")
day28fu_data_no_pii <- day28fu_data %>%
  dplyr::select(dplyr::any_of(c(day28fu_deidentified_dict$new)))

Pseudonymisation is performed using a cryptographic hash function (md5) that takes strings as input (variables uuid,child_id, and device_id) and produces a random-like fixed-length output.

day28fu_data_no_pii <- day28fu_data_no_pii %>%
  dplyr::rowwise() %>%
  dplyr::mutate(uuid = ifelse(uuid != "", digest(uuid, algo = crypto_algo), ""),
                child_id = ifelse(child_id != "", digest(child_id, algo = crypto_algo), ""),
                device_id = ifelse(device_id != "", digest(device_id, algo = crypto_algo), "")) %>%
  dplyr::ungroup()
n_cleaned_day28fu_records <- nrow(day28fu_data_no_pii)

Data cleaning summary

write(" o Data cleaning summary", stderr())
timci::create_day28fu_outcome_qc_flowchart(n_raw_successday28fu_records,
                                           n_dropped_duplicate_day28fu_records,
                                           n_fu_prior_day0_day28fu,
                                           n_death_prior_day0_day28fu,
                                           n_hospit_prior_day0_day28fu,
                                           n_death_prior_hospit_day28fu,
                                           n_cleaned_day28fu_records)

Data overview

skimr::skim(day28fu_data)

Data export

write(" o Data export", stderr())
timci::dataset_export(raw_successday28fu_data,
                      "06b",
                      "timci_followup_successful_day28_data",
                      rctls_dir,
                      "Raw successful Day 28 follow-up only)")
timci::dataset_export(day28fu_data_no_pii,
                      "06b",
                      "timci_followup_successful_day28_data",
                      locked_db_dir,
                      "Cleaned successful Day 28 follow-up data")


Thaliehln/timci documentation built on April 8, 2024, 3:38 p.m.