knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
write(" o Observations by children", stderr())
db_name <- "time-flow main" qc_tf_before_startdate <- "DQC_TF_01" qc_tf_after_lockdate <- "DQC_TF_02" qc_tf_nonvalid_fids <- "DQC_TF_03" qc_tf_nonvalid_ids <- "DQC_TF_04"
tf_data_main <- timci::match_from_tf_xls_dict(tf_data[[1]]) n_raw_tf_data_main <- nrow(tf_data_main)
There are r n_raw_tf_data_main
records in the raw r db_name
database.
write(" o Date and time checks", stderr())
locked_tf_data_main <- tf_data_main %>% dplyr::mutate(date_visit = as.Date(date_visit))
r qc_tf_before_startdate
]write(" o Study start date context", stderr())
qc_description <- "Time-flow observations are considered valid only from the study start date. Data may have been entered before this date for training purposes." qc_rule <- paste0("Records entered before the study start date on ", start_date, " are deleted") qc_type <- "anterior_to_startdate" df <- tf_data_main qc_text <- paste0("an entry date anterior to the study start date on **", start_date, "**") qc_idx <- qc_tf_before_startdate qc_export_label <- "anterior_to_startdate" qc_export_description <- "the entry date is anterior to the study start date" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) tf_data_before_start <- qc_df %>% dplyr::select(date_visit, child_id) n_before_startdate_records <- n_detected
locked_tf_data_main <- cleaned_df
r qc_tf_after_lockdate
]write(" o Study lock date context", stderr())
qc_description <- "Time-flow observations are considered valid only until the date for the lock. Note that follow-up data will be managed differently, since they are considered valid after the lock data as soon as they correspond to a participant enrolled before the date of the lock." qc_rule <- paste0("Records entered after the lock date on ", lock_date, " are deleted") qc_type <- "posterior_to_lockdate" df <- locked_tf_data_main qc_text <- paste0("an entry date posterior to the lock date on **", lock_date, "**") qc_idx <- qc_tf_after_lockdate qc_export_label <- "posterior_to_lockdate" qc_export_description <- "the entry date is posterior to the lock date" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE)) tf_data_after_lock <- qc_df %>% dplyr::select(date_visit, child_id) n_after_lockdate_records <- n_detected
locked_tf_data_main <- cleaned_df
r qc_tf_nonvalid_fids
]write(" o Non-valid facility IDs", stderr())
cleaned_df <- NULL qc_df <- NULL
qc_description <- "Facility ID does not refer to a SPA facility and is not consistent with the facility code in the child ID. Check the facility ID from the first 5 characters of the child ID versus from the research assistant's manual entry." qc_rule <- action_alert_no_modification qc_type <- "tf_inconsistent_facility_info" df <- locked_tf_data_main qc_text <- "inconsistent facility info" qc_idx <- paste0(qc_tf_nonvalid_fids, "a") qc_export_label <- "tf_data_inconsistent_facility" qc_export_description <- "facility information between the child ID and the research assistant's entry is inconsistent" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE))
mc_description <- "Correct time-flow facility ID" to_correct_df <- locked_tf_data_main correction_type <- "correct_tf_inconsistent_facilities" # Parameters for the quality check following manual corrections qc_idx <- paste0(qc_tf_nonvalid_fids, "b") qc_export_label <- "tf_data_inconsistent_facility" cat(knitr::knit_child('database_export_sub_corrections.Rmd', envir = environment(), quiet = TRUE)) n_edit_inconsistent_fid <- n_mc locked_tf_data_main <- corrected_df
r qc_tf_nonvalid_ids
]write(" o Non-valid participant IDs", stderr())
cleaned_df <- NULL
qc_description <- "Children can be part of the time-flow data even if they have not been enrolled in the RCT/LS and are not on their Day 0 visit." qc_rule <- "Flag with value 1 in column *matched* the IDs of children who are found in the locked Day 0 database." qc_type <- "spatf_nonvalid_ids" df <- locked_tf_data_main idcol1 <- "child_id" refdf <- facility_data idcol2 <- "child_id" qc_text <- "participant ID not valid" qc_idx <- qc_tf_nonvalid_ids qc_export_label <- "timci_timeflow_data_unknown_ids" qc_export_description <- "child IDs are not found in the Day 0 dataset" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE))
locked_tf_data_main <- cleaned_df
write(" o Pseudonymisation", stderr())
Pseudonymisation is performed using a cryptographic hash function (md5) that takes strings as input and produces a random-like fixed-length output.
locked_tf_data_main <- locked_tf_data_main %>% dplyr::rowwise() %>% dplyr::mutate(child_id = ifelse(child_id != "", digest(child_id, algo = crypto_algo), ""), device_id = ifelse(device_id != "", digest(device_id, algo = crypto_algo), "")) %>% dplyr::ungroup()
n_cleaned_tf_main_records <- nrow(locked_tf_data_main)
There are r n_cleaned_tf_main_records
records in the locked r db_name
database.
skimr::skim(locked_tf_data_main)
write("Export time-flow main data", stderr())
timci::dataset_export(tf_data_main, "15a", "timci_timeflow_data", params$spa_dir, "Raw time-flow data")
timci::dataset_export(locked_tf_data_main, "15a", "timci_timeflow_data", locked_db_dir, "Time-flow step data")
db_name <- "time-flow step" qc_tf_negative_times <- "DQC_TF_05"
tf_data_steps <- tf_data[[2]] n_raw_tf_data_steps <- nrow(tf_data_steps)
tf_data_steps <- tf_data_steps %>% dplyr::mutate(step_dur_sec = difftime(strptime(time_end, "%Y-%m-%d %H:%M:%S"), strptime(time_start, "%Y-%m-%d %H:%M:%S"))) %>% dplyr::mutate(cdsa_dur_sec = difftime(strptime(`cdsa-time_cdsaend`, "%Y-%m-%d %H:%M:%S"), strptime(`cdsa-time_cdsastart`, "%Y-%m-%d %H:%M:%S"))) %>% dplyr::mutate(pox_dur_sec = difftime(strptime(`pox-time_poxend`, "%Y-%m-%d %H:%M:%S"), strptime(`pox-time_poxstart`, "%Y-%m-%d %H:%M:%S")))
There are r n_raw_tf_data_steps
records in the raw r db_name
database.
r qc_tf_negative_times
]cleaned_df <- NULL
qc_description <- "Detect negative step durations and how frequently it happened for a given time-flow observation to assess the quality of the whole time-flow record." qc_rule <- "Delete steps where the duration is negative." qc_type <- "tf_negative_value" df <- tf_data_steps qc_text <- "negative duration" qc_idx <- qc_tf_negative_times qc_export_label <- "timci_timeflow_negative_durations" qc_export_description <- "step duration is negative" cat(knitr::knit_child('database_export_sub_quality_check.Rmd', envir = environment(), quiet = TRUE))
locked_tf_data_steps <- cleaned_df
locked_tf_data_steps <- locked_tf_data_steps %>% dplyr::mutate(step_name = enc2utf8(step_name))
write("Export time-flow step data", stderr())
timci::dataset_export(tf_data_steps, "15b", "timci_timeflow_steps", params$spa_dir, "Time-flow step data")
timci::dataset_export(locked_tf_data_steps, "15b", "timci_timeflow_steps", locked_db_dir, "Time-flow step data")
i <- length(tf_data)
write("Export time-flow audit data", stderr())
timci::dataset_export(tf_data[[i]], "15z", "timci_timeflow_audit", params$spa_dir, "Time-flow audit log data")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.