knitr::opts_chunk$set(echo = FALSE,
                      warning = FALSE,
                      message = FALSE)

Observations by children

write(" o Observations by children", stderr())
db_name <- "time-flow main"
qc_tf_before_startdate <- "DQC_TF_01"
qc_tf_after_lockdate <- "DQC_TF_02"
qc_tf_nonvalid_fids <- "DQC_TF_03"
qc_tf_nonvalid_ids <- "DQC_TF_04"
tf_data_main <- timci::match_from_tf_xls_dict(tf_data[[1]])
n_raw_tf_data_main <- nrow(tf_data_main)

There are r n_raw_tf_data_main records in the raw r db_name database.

Dates and times

write("   o Date and time checks", stderr())
locked_tf_data_main <- tf_data_main %>%
  dplyr::mutate(date_visit = as.Date(date_visit))

Study start date [Context check r qc_tf_before_startdate]

write("   o Study start date context", stderr())
qc_description <- "Time-flow observations are considered valid only from the study start date. Data may have been entered before this date for training purposes."
qc_rule <- paste0("Records entered before the study start date on ", start_date, " are deleted")
qc_type <- "anterior_to_startdate"
df <- tf_data_main
qc_text <- paste0("an entry date anterior to the study start date on **", start_date, "**")
qc_idx <- qc_tf_before_startdate
qc_export_label <- "anterior_to_startdate"
qc_export_description <- "the entry date is anterior to the study start date"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
tf_data_before_start <- qc_df %>%
  dplyr::select(date_visit, child_id)
n_before_startdate_records <- n_detected
locked_tf_data_main <- cleaned_df

Study lock date [Context check r qc_tf_after_lockdate]

write("   o Study lock date context", stderr())
qc_description <- "Time-flow observations are considered valid only until the date for the lock. Note that follow-up data will be managed differently, since they are considered valid after the lock data as soon as they correspond to a participant enrolled before the date of the lock."
qc_rule <- paste0("Records entered after the lock date on ", lock_date, " are deleted")
qc_type <- "posterior_to_lockdate"
df <- locked_tf_data_main
qc_text <- paste0("an entry date posterior to the lock date on **", lock_date, "**")
qc_idx <- qc_tf_after_lockdate
qc_export_label <- "posterior_to_lockdate"
qc_export_description <- "the entry date is posterior to the lock date"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
tf_data_after_lock <- qc_df %>%
  dplyr::select(date_visit, child_id)
n_after_lockdate_records <- n_detected
locked_tf_data_main <- cleaned_df

Facilities

Non-valid facility IDs [compliance check r qc_tf_nonvalid_fids]

write("   o Non-valid facility IDs", stderr())
cleaned_df <- NULL
qc_df <- NULL
qc_description <- "Facility ID does not refer to a SPA facility and is not consistent with the facility code in the child ID. Check the facility ID from the first 5 characters of the child ID versus from the research assistant's manual entry."
qc_rule <- action_alert_no_modification
qc_type <- "tf_inconsistent_facility_info"
df <- locked_tf_data_main
qc_text <- "inconsistent facility info"
qc_idx <- paste0(qc_tf_nonvalid_fids, "a")
qc_export_label <- "tf_data_inconsistent_facility"
qc_export_description <- "facility information between the child ID and the research assistant's entry is inconsistent"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
mc_description <- "Correct time-flow facility ID"
to_correct_df <- locked_tf_data_main
correction_type <- "correct_tf_inconsistent_facilities"

# Parameters for the quality check following manual corrections
qc_idx <- paste0(qc_tf_nonvalid_fids, "b")
qc_export_label <- "tf_data_inconsistent_facility"

cat(knitr::knit_child('database_export_sub_corrections.Rmd',
                      envir = environment(),
                      quiet = TRUE))

n_edit_inconsistent_fid <- n_mc
locked_tf_data_main <- corrected_df

Participants

Non-valid participant IDs [compliance check r qc_tf_nonvalid_ids]

write("   o Non-valid participant IDs", stderr())
cleaned_df <- NULL
qc_description <- "Children can be part of the time-flow data even if they have not been enrolled in the RCT/LS and are not on their Day 0 visit."
qc_rule <- "Flag with value 1 in column *matched* the IDs of children who are found in the locked Day 0 database."
qc_type <- "spatf_nonvalid_ids"
df <- locked_tf_data_main
idcol1 <- "child_id"
refdf <- facility_data
idcol2 <- "child_id"
qc_text <- "participant ID not valid"
qc_idx <- qc_tf_nonvalid_ids
qc_export_label <- "timci_timeflow_data_unknown_ids"
qc_export_description <- "child IDs are not found in the Day 0 dataset"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
locked_tf_data_main <- cleaned_df

Pseudonymisation

write(" o Pseudonymisation", stderr())

Pseudonymisation is performed using a cryptographic hash function (md5) that takes strings as input and produces a random-like fixed-length output.

locked_tf_data_main <- locked_tf_data_main %>%
  dplyr::rowwise() %>%
  dplyr::mutate(child_id = ifelse(child_id != "", digest(child_id, algo = crypto_algo), ""),
                device_id = ifelse(device_id != "", digest(device_id, algo = crypto_algo), "")) %>%
  dplyr::ungroup()
n_cleaned_tf_main_records <- nrow(locked_tf_data_main)

Data cleaning summary

There are r n_cleaned_tf_main_records records in the locked r db_name database.

Data overview

skimr::skim(locked_tf_data_main)

Export

write("Export time-flow main data", stderr())
timci::dataset_export(tf_data_main,
                      "15a",
                      "timci_timeflow_data",
                      params$spa_dir,
                      "Raw time-flow data")
timci::dataset_export(locked_tf_data_main,
                      "15a",
                      "timci_timeflow_data",
                      locked_db_dir,
                      "Time-flow step data")

Steps

db_name <- "time-flow step"
qc_tf_negative_times <- "DQC_TF_05"
tf_data_steps <- tf_data[[2]]
n_raw_tf_data_steps <- nrow(tf_data_steps)
tf_data_steps <- tf_data_steps %>%
  dplyr::mutate(step_dur_sec = difftime(strptime(time_end, "%Y-%m-%d %H:%M:%S"),
                                        strptime(time_start, "%Y-%m-%d %H:%M:%S"))) %>%
  dplyr::mutate(cdsa_dur_sec = difftime(strptime(`cdsa-time_cdsaend`, "%Y-%m-%d %H:%M:%S"),
                                        strptime(`cdsa-time_cdsastart`, "%Y-%m-%d %H:%M:%S"))) %>%
  dplyr::mutate(pox_dur_sec = difftime(strptime(`pox-time_poxend`, "%Y-%m-%d %H:%M:%S"),
                                       strptime(`pox-time_poxstart`, "%Y-%m-%d %H:%M:%S")))

There are r n_raw_tf_data_steps records in the raw r db_name database.

Negative step duration [Logic check r qc_tf_negative_times]

cleaned_df <- NULL
qc_description <- "Detect negative step durations and how frequently it happened for a given time-flow observation to assess the quality of the whole time-flow record."
qc_rule <- "Delete steps where the duration is negative."
qc_type <- "tf_negative_value"
df <- tf_data_steps
qc_text <- "negative duration"
qc_idx <- qc_tf_negative_times
qc_export_label <- "timci_timeflow_negative_durations"
qc_export_description <- "step duration is negative"
cat(knitr::knit_child('database_export_sub_quality_check.Rmd',
                      envir = environment(),
                      quiet = TRUE))
locked_tf_data_steps <- cleaned_df
locked_tf_data_steps <- locked_tf_data_steps %>%
  dplyr::mutate(step_name = enc2utf8(step_name))

Export

write("Export time-flow step data", stderr())
timci::dataset_export(tf_data_steps,
                      "15b",
                      "timci_timeflow_steps",
                      params$spa_dir,
                      "Time-flow step data")
timci::dataset_export(locked_tf_data_steps,
                      "15b",
                      "timci_timeflow_steps",
                      locked_db_dir,
                      "Time-flow step data")

Audit data

i <- length(tf_data)

Export

write("Export time-flow audit data", stderr())
timci::dataset_export(tf_data[[i]],
                      "15z",
                      "timci_timeflow_audit",
                      params$spa_dir,
                      "Time-flow audit log data")


Thaliehln/timci documentation built on April 8, 2024, 3:38 p.m.