knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
[Check description:]{custom-style="underlined"} r qc_description
[Action:]{custom-style="underlined"} r qc_rule
qc_df <- NULL qc_df2 <- NULL qc_reuse_df <- NULL # reuse non de-identified dataframe for subsequent checks cleaned_df <- NULL cols <- colnames(df) n_df <- nrow(df) n_detected <- 0 n_to_remove <- 0 cleaned_df_status_update <- ""
duplicate_checks <- c("duplicates", "duplicates_with_names", "true_duplicates", "day0_duplicates", "repeat_duplicates") is_duplicate_check <- (qc_type %in% duplicate_checks)
out <- timci::remove_facilities_for_other_studies(df, all_facilities, excluded_facilities) qc_df <- out[[1]] cleaned_df <- out[[2]]
out <- timci::allocate_screening_facility2(df, research_facilities) qc_df <- out[[1]] cleaned_df <- out[[2]] # outcols <- c("date_visit", "child_id", "fid_from_device", "child_id_manual", "device_id", "uuid") # qc_df <- qc_df %>% # dplyr::select(outcols)
df <- df %>% dplyr::mutate(study_start_date = start_date) %>% dplyr::mutate(days_since_start = floor(difftime(as.Date(date_visit, "%Y-%m-%d"), study_start_date, units = "days"))) qc_df <- df %>% dplyr::filter(days_since_start < 0) %>% dplyr::select(date_visit, child_id, uuid, days_since_start) %>% dplyr::arrange(date_visit = as.Date(date_visit, "%Y-%m-%d")) # Order entries by date cleaned_df <- df %>% dplyr::filter(days_since_start >= 0) %>% dplyr::arrange(date_visit = as.Date(date_visit, "%Y-%m-%d")) # Order entries by date
research_facilities %>% dplyr::select(facility_id, facility_start) %>% filter(!is.na(facility_start)) %>% knitr::kable()
df <- df %>% merge(research_facilities %>% dplyr::select(facility_id, facility_start), by.x = "fid_from_device", by.y = "facility_id") %>% dplyr::mutate(days_since_start = floor(difftime(as.Date(date_visit, "%Y-%m-%d"), facility_start, units = "days"))) qc_df <- df %>% dplyr::filter(!is.na(days_since_start) & days_since_start < 0) %>% dplyr::select(date_visit, child_id, uuid, days_since_start) %>% dplyr::arrange(date_visit = as.Date(date_visit, "%Y-%m-%d")) # Order entries by date cleaned_df <- df %>% dplyr::filter(is.na(days_since_start) | (!is.na(days_since_start) & days_since_start >= 0)) %>% dplyr::arrange(date_visit = as.Date(date_visit, "%Y-%m-%d")) # Order entries by date
df <- df %>% dplyr::mutate(study_lock_date = lock_date) %>% dplyr::mutate(days_to_lock = floor(difftime(as.Date(date_visit, "%Y-%m-%d"), study_lock_date, units = "days"))) qc_df <- df %>% dplyr::filter(days_to_lock > 0) %>% dplyr::select(date_visit, child_id, uuid, days_to_lock) %>% dplyr::arrange(date_visit = as.Date(date_visit, "%Y-%m-%d")) # Order entries by date cleaned_df <- df %>% dplyr::filter(days_to_lock <= 0) %>% dplyr::arrange(date_visit = as.Date(date_visit, "%Y-%m-%d")) # Order entries by date
out <- timci::identify_ids_outside_lock_range(df = df, col_id = idcol, day0_df = day0_data, start_date = start_date, end_date = lock_date) qc_df <- out[[1]]
qc_df <- df[!is.na(df$enrolled), ] qc_df <- qc_df[qc_df$enrolled == 1 & qc_df$cg_eligibility == 0, ] cleaned_df <- df cleaned_df$consent[cleaned_df$uuid %in% qc_df$uuid] <- 0 cleaned_df$enrolled[cleaned_df$uuid %in% qc_df$uuid] <- 0
out <- timci::identify_nonvalid_ids(df, idcol1, refdf, idcol2) qc_df <- out[[1]]
out <- timci::identify_nonvalid_ids(df, idcol1, refdf, idcol2) qc_df <- out[[1]] cleaned_df <- out[[2]]
out <- timci::identify_nonvalid_ids2(df, idcol1, refdf, idcol2) qc_df <- out[[1]] cleaned_df <- out[[2]]
out <- timci::identify_nonvalid_ids_flagged(df, idcol1, refdf, idcol2) qc_df <- out[[1]] cleaned_df <- out[[2]]
out <- timci::identify_nonvalid_ids_with_matched_names(df1 = df, col_id1 = idcol1, df2 = refdf, col_id2 = idcol2, col_date1 = datecol, ldate_diff = ldate_diff, udate_diff = udate_diff, matched_names = matched_names, cleaning = cleaning) qc_df <- out[[1]] cleaned_df <- out[[2]]
out <- timci::detect_inconsistent_dates(df, col_date_start = col_date1, col_date_end = col_date2, cleaning) qc_df <- out[[1]] cleaned_df <- out[[2]]
out <- timci::detect_inconsistent_dates(df, col_date_start = col_date1, col_date_end = col_date2, cleaning, list_of_cols = fu_cols) qc_df <- out[[1]] cleaned_df <- out[[2]]
out <- timci::identify_duplicates_by_dates(df, col_id, col_date, cleaning) qc_df <- out[[1]] cleaned_df <- out[[2]] qc_df2 <- out[[3]]
out <- timci::identify_duplicates_with_names(df, col_id, col_date, cleaning) qc_df <- out[[1]] cleaned_df <- out[[2]] qc_df2 <- out[[3]]
out <- timci::identify_day0_duplicates_and_fu(df = df, day7fu_df = ref_df, cleaning = cleaning) qc_df <- out[[1]] cleaned_df <- out[[2]] qc_df2 <- out[[3]]
out <- timci::identify_repeat_duplicate(df, col_id, col_date, cleaning) qc_df <- out[[1]] cleaned_df <- out[[2]]
out <- timci::identify_true_duplicate(df, col_id, col_date, cleaning) qc_df <- out[[1]] cleaned_df <- out[[2]]
qc_df <- df %>% dplyr::filter(fid != "" & !is.na(fid), fid != fid_from_device, fid != fid_from_main_device) # This row has been added for the Tanzania if ( "longitude" %in% cols ) { qc_df <- qc_df %>% timci::find_closest_facility(research_facilities) } qccols <- colnames(qc_df) outcols <- c("date_visit", "child_id", "fid", "district", "fid_from_device", "fid_from_main_device") if ( "facility_id" %in% qccols ) { outcols <- c(outcols, "facility_id") } if ( "fid_ra" %in% qccols ) { outcols <- c(outcols, "fid_ra", "district_ra") } if ( "child_id_manual" %in% qccols ) { outcols <- c(outcols, "child_id_manual") } if ( "latitude" %in% qccols ) { outcols <- c(outcols, "latitude") } if ( "longitude" %in% qccols ) { outcols <- c(outcols, "longitude") } if ( "gps_accuracy" %in% qccols ) { outcols <- c(outcols, "gps_accuracy") } if ( "dist" %in% qccols ) { outcols <- c(outcols, "dist") } outcols <- c(outcols, "uuid", "device_id") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- df %>% dplyr::filter(fid != "" & !is.na(fid), fid_ra != "" & !is.na(fid_ra), fid != fid_ra) if ( "longitude" %in% cols ) { qc_df <- qc_df %>% timci::find_closest_facility(research_facilities) %>% dplyr::filter(facility_id != "" & !is.na(facility_id)) } qccols <- colnames(qc_df) outcols <- c("date_visit", "child_id", "fid", "district") if ( "fid_from_device" %in% qccols ) { outcols <- c(outcols, "fid_from_device") } if ( "fid_from_main_device" %in% qccols ) { outcols <- c(outcols, "fid_from_main_device") } if ( "fid_ra" %in% qccols ) { outcols <- c(outcols, "fid_ra") } if ( "district_ra" %in% qccols ) { outcols <- c(outcols, "district_ra") } if ( "facility_id" %in% qccols ) { outcols <- c(outcols, "facility_id") } if ( "child_id_manual" %in% qccols ) { outcols <- c(outcols, "child_id_manual") } if ( "latitude" %in% qccols ) { outcols <- c(outcols, "latitude") } if ( "longitude" %in% qccols ) { outcols <- c(outcols, "longitude") } if ( "gps_accuracy" %in% qccols ) { outcols <- c(outcols, "gps_accuracy") } if ( "dist" %in% qccols ) { outcols <- c(outcols, "dist") } outcols <- c(outcols, "uuid", "device_id") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- df %>% dplyr::filter(!is.na(fid) & fid != "") %>% dplyr::filter(fid != fid_from_main_device) if ( "longitude" %in% cols ) { qc_df <- qc_df %>% timci::find_closest_facility(research_facilities) } qccols <- colnames(qc_df) outcols <- c("date_visit", "child_id", "fid", "district", "fid_from_device", "fid_from_main_device") if ( "facility_id" %in% qccols ) { outcols <- c(outcols, "facility_id") } if ( "fid_ra" %in% qccols ) { outcols <- c(outcols, "fid_ra", "district_ra") } if ( "child_id_manual" %in% qccols ) { outcols <- c(outcols, "child_id_manual") } if ( "latitude" %in% qccols ) { outcols <- c(outcols, "latitude") } if ( "longitude" %in% qccols ) { outcols <- c(outcols, "longitude") } if ( "gps_accuracy" %in% qccols ) { outcols <- c(outcols, "gps_accuracy") } if ( "dist" %in% qccols ) { outcols <- c(outcols, "dist") } outcols <- c(outcols, "uuid", "device_id") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- df %>% dplyr::filter(fid != "" & !is.na(fid), child_id != "" & !is.na(child_id), fid != substr(child_id, 3, 7)) qccols <- colnames(qc_df) outcols <- c("date_visit", "child_id", "fid", "district") if ( "facility_id" %in% qccols ) { outcols <- c(outcols, "facility_id") } outcols <- c(outcols, "uuid", "device_id") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- df %>% dplyr::filter(`facility_identification-fcode` != "" & !is.na(`facility_identification-fcode`), `child_identification-pid` != "" & !is.na(`child_identification-pid`), `facility_identification-fcode` != substr(`child_identification-pid`, 3, 7)) qccols <- colnames(qc_df) outcols <- c("date", "child_identification-pid", "facility_identification-fcode", "facility_identification-district") if ( "facility_id" %in% qccols ) { outcols <- c(outcols, "facility_id") } outcols <- c(outcols, "meta-instanceID", "DeviceID") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::rename(uuid = `meta-instanceID`) %>% dplyr::arrange(`facility_identification-fcode`)
qc_df <- timci::detect_missing_value(df, col_value) outcols <- c("child_id", "fid", "uuid") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- timci::detect_blank_value(df, col_value) outcols <- c("child_id", "fid", "uuid") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- timci::detect_negative_value(df, col_value) outcols <- c("child_id", "fid", "uuid") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- df %>% dplyr::filter(step_dur_sec < 0) %>% dplyr::select(step_type, step_name, time_start, time_end, step_dur_sec, PARENT_KEY) %>% dplyr::count(PARENT_KEY) %>% dplyr::arrange(desc(n)) cleaned_df <- df %>% dplyr::mutate(step_dur_sec = ifelse(step_dur_sec >= 0, step_dur_sec, NA))
qc_df <- timci::detect_missing_clinical_presentation(df)
qc_df <- timci::detect_missing_diagnosis(df) outcols <- c("child_id", "fid", "date_visit", "uuid") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- timci::detect_missing_referral(df) outcols <- c("child_id", "fid", "date_visit", "uuid") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- timci::detect_missing_treatment(df) outcols <- c("child_id", "fid", "date_visit", "uuid") qc_df <- qc_df %>% dplyr::select(outcols) %>% dplyr::arrange(fid)
qc_df <- df[(is.na(df$rx_amoxicillin) & !is.na(df$rx_misc_oth)) & (is.na(df$rx_amoxicillin_hf) & !is.na(df$rx_misc_oth_hf)),] outcols <- c("child_id", "fid", "date_visit", "uuid")
drug_disp_df1 <- df %>% dplyr::filter(!!rlang::sym(col_id) == 1) %>% dplyr::select(free_text1, uuid, submission_date) %>% dplyr::rename(free_text = free_text1) drug_disp_df2 <- df %>% dplyr::filter(!!rlang::sym(paste0(col_id, "_hf")) == 1) %>% dplyr::select(free_text2, uuid, submission_date) %>% dplyr::rename(free_text = free_text2) qc_df <- rbind(drug_disp_df1, drug_disp_df2)
drug_disp_df1 <- df %>% dplyr::rowwise() %>% dplyr::mutate(selected = ifelse(value %in% as.integer(unlist(strsplit(!!rlang::sym(col_id), split = ";"))), 1, 0)) %>% ungroup() %>% dplyr::filter(selected == 1) %>% dplyr::select(free_text1, uuid, submission_date) %>% dplyr::rename(free_text = free_text1) drug_disp_df2 <- df %>% dplyr::rowwise() %>% dplyr::mutate(selected = ifelse(value %in% as.integer(unlist(strsplit(!!rlang::sym(paste0(col_id, "_hf")), split = ";"))), 1, 0)) %>% ungroup() %>% dplyr::filter(selected == 1) %>% dplyr::select(free_text2, uuid, submission_date) %>% dplyr::rename(free_text = free_text2) qc_df <- rbind(drug_disp_df1, drug_disp_df2)
qc_df <- timci::detect_drug(df, drug_val_col, drug_text_col, drug_vec) outcols <- c("child_id", "fid", "date_visit", drug_val_col, drug_text_col, "uuid") qc_df <- qc_df %>% dplyr::select(outcols)
out <- timci::detect_inconsistent_names_between_visits(refdf = refdf, fudf = df, col_date = datecol, ldate_diff = ldate_diff, udate_diff = udate_diff, matched_names = matched_names, cleaning = cleaning) qc_df <- out[[1]] cleaned_df <- out[[2]]
out <- timci::detect_inconsistent_names_between_visits(refdf = refdf, fudf = df, col_date = datecol, ldate_diff = ldate_diff, udate_diff = udate_diff, matched_names = matched_names, cleaning = cleaning, repeats = TRUE) qc_df <- out[[1]] cleaned_df <- out[[2]]
qc_df <- df %>% dplyr::filter(edits > 0)
df <- df %>% rename(child_id = "child_identification-pid", back_from_lab = "child_identification-back_from_lab") df1 <- df %>% dplyr::filter(back_from_lab == 0) df2 <- df %>% dplyr::filter(back_from_lab == 1) qc_df <- df2[!df2$child_id %in% df1$child_id, ]
n_detected <- nrow(qc_df)
date_cols <- grep("date", names(qc_df)) remove_date_cols <- grep("_date", names(qc_df)) date_cols <- date_cols[!date_cols %in% remove_date_cols] x <- unname((unlist(qc_df[colnames(qc_df[date_cols])]))) n_detected <- sum(!is.na(x)) n_to_remove <- n_detected - nrow(qc_df)
[Check output:]{custom-style="underlined"} The check has detected r n_detected
record(s) (i.e. r sprintf('%.1f', 100 * n_detected / n_df)
% of all records) with r qc_text
in the r db_name
database.
partial_disp <- FALSE full_disp <- FALSE qc_df_is_not_empty <- timci::is_not_empty(qc_df)
timci::quality_check_export(df = qc_df, idx = qc_idx, label = qc_export_label, cdir = qc_dir, description = qc_export_description) qc_reuse_df <- qc_df
partial_disp <- qc_df_is_not_empty & ( nrow(qc_df) > 10 ) full_disp <- qc_df_is_not_empty & ( nrow(qc_df) <= 10 ) cols <- colnames(qc_df) names_to_remove <- grep("name", colnames(qc_df)) if ( length(names_to_remove) ) { qc_df <- qc_df %>% dplyr::select(-names_to_remove) } reformat_cond <- ( length(qc_df) > 10 ) & ( qc_type != "date_discrepancy" ) if ( reformat_cond ) { if ( 'date_visit' %in% cols ) { kcols <- c("date_visit") if ( 'child_id' %in% cols ) { kcols <- c(kcols, "child_id") } else if ( 'prev_id' %in% cols ) { kcols <- c(kcols, "prev_id") } if ( 'fid' %in% cols ) { kcols <- c(kcols,"fid") } else if ( 'fid_from_device' %in% cols ) { kcols <- c(kcols, "fid_from_device") } qc_df <- qc_df %>% dplyr::select(kcols) } } n_cols <- length(qc_df)
qc_df %>% select(1:min(n_cols, 5)) %>% head(5) %>% knitr::kable(row.names = FALSE, caption = "Five first rows")
qc_df %>% select(1:min(n_cols, 5)) %>% tail(5) %>% knitr::kable(row.names = FALSE, caption = "Five last rows")
qc_df %>% select(1:min(n_cols, 5)) %>% knitr::kable(row.names = FALSE)
n_cleaned_df <- nrow(cleaned_df)
cleaned_df_status_update <- ifelse(n_cleaned_df < n_df, paste0('After deletion of all the records detected by this check, there are now **', n_cleaned_df, '** record(s) in the ', db_name, ' database.'), paste0('There are still **', n_cleaned_df, '** record(s) in the ', db_name, ' database.'))
r cleaned_df_status_update
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.