Nothing
#' @title Values Check
#' @description This function checks for potential errors in the VALUES columns by ensuring (1) required format of `VALUE=MEANING` (e.g., 0=Yes or 1=No); (2) no leading/trailing spaces near the equals sign; (3) all variables of TYPE encoded have VALUES entries; and (4) all variables with VALUES entries are listed as TYPE encoded.
#' @param DD.dict Data dictionary.
#' @param verbose When TRUE, the function prints the Message out, as well as a list of variables that fail one of the values checks.
#' @return Tibble, returned invisibly, containing: (1) Time (Time stamp); (2) Name (Name of the function); (3) Status (Passed/Failed); (4) Message (A copy of the message the function printed out); (5) Information (Details of which checks passed/failed for which value=meaning instances).
#' @export
#' @import tidyr
#' @examples
#' # Example 1: Fail check
#' data(ExampleE)
#' values_check(DD.dict.E)
#' print(values_check(DD.dict.E, verbose=FALSE))
#'
#' # Example 2: Pass check
#' data(ExampleA)
#' values_check(DD.dict.A)
#' print(values_check(DD.dict.A, verbose=FALSE))
values_check <- function(DD.dict, verbose=TRUE) {
# Add required_check as pre-check
r <-
values_precheck(
dict = DD.dict
)
if (any(r$Status == "Failed")) {
Time <- Sys.time()
Function <- "values_check"
Status <- "Not attempted"
row <- grep("Failed", r$Status)
Message <- paste0("ERROR: Required pre-check ", r$Function[row], " failed.")
Message2 <- tibble(r$Function, r$Message)
Information <- r$Information[row]
return_to_user <-
lst(Note = "Pre-check failed.",
Message = Message2,
Information = Information)
} else {
chk <- dup_values(DD.dict)
if (chk==TRUE) {
# Check 1: Is an = present in all values columns (VALUE=MEANING)?
# Check 2: Are there leading/trailing spaces near the first equal sign?
# Check 3: Do all variables of TYPE encoded have at least one VALUES entry?
# Check 4: Are all variables with at least one VALUES entry of TYPE encoded?
# Temporarily remove any entries in the VALUES columns that are "INTEGERS", "DECIMALS", OR "STRINGS"
col <- which(names(DD.dict)=="VALUES")
for (i in col:ncol(DD.dict)){
DD.dict[i][DD.dict[,i]=="INTEGERS" | DD.dict[,i]=="DECIMALS" | DD.dict[,i]=="STRINGS" ] <- NA
}
# Perform checks 1-2 via loop (as the number of VALUES columns can vary greatly across data sets)
##### Check 1: Is an equals sign present for all non-NA values columns? #####
# Store number of the first VALUES column
vcol <- which(names(DD.dict)=="VALUES")
# Create empty object to store results in
v.check <- NULL
# Create dummy names for blank-named columns beyond `VALUES`
DD.dict.orig <- DD.dict
i <- 1
for (col in vcol:ncol(DD.dict)) {
if (names(DD.dict)[col] == "") {
names(DD.dict)[col] = paste0("VALUES",i)
}
i <- i + 1
}
# Loop through all VALUES columns to perform check
for (col in vcol:ncol(DD.dict)) {
# If dim=0, TRUE; else, FALSE
# For values columns that are not NA, are there equals signs in them?
values.check <- (dim(DD.dict[!is.na(DD.dict[col]) & !grepl("=", unlist(DD.dict[col])), c("VARNAME", names(DD.dict)[col])])[1]==0)
# Store column name
column_name <- names(DD.dict[,col])
# Store information listed in values column
if (values.check==FALSE){
vname <- unlist(DD.dict[!is.na(DD.dict[col]) & !grepl("=", unlist(DD.dict[col])), c("VARNAME")])
type <- unlist(DD.dict[!is.na(DD.dict[col]) & !grepl("=", unlist(DD.dict[col])), c("TYPE")])
problematic_description <- unlist(DD.dict[!is.na(DD.dict[col]) & !grepl("=", unlist(DD.dict[col])), c(names(DD.dict)[col])])
} else {
vname <- NA
type <- NA
problematic_description <- NA
}
# Create data frame with column name, check value, and problematic description (if applicable)
v.check.comb <- data.frame(column_name, values.check, vname, type, problematic_description)
# Bind each loop into final check
v.check <- bind_rows(v.check.comb, v.check)
}
DD.dict <- DD.dict.orig
# Clean up check 1 results
rownames(v.check) <- 1:nrow(v.check)
v.check$check <- "Check 1: Is an equals sign present for all values columns?"
VALUES.CHECK1 <- (isTRUE(all(v.check$values.check)))
VALUES.CHECK1.details <- v.check
if (isTRUE(VALUES.CHECK1)) {
check.name <- "Check 1"
check.description <- "Is an equals sign present for all values columns?"
check.status <- "Passed"
check1.final <- data.frame(check.name, check.description, check.status)
}
##### Check 2: Are there any leading/trailing spaces near the first equals sign? #####
v.check <- NULL
vcol <- which(names(DD.dict)=="VALUES")
for (col in vcol:ncol(DD.dict)) {
# Step 1: Extract the column
a <- data.frame(DD.dict[,col])
# Step 2: Split extracted column by first equals sign
names(a)[1] <- "VALUE.RAW"
VALUE.RAW <- NULL
suppressWarnings(temp <- separate(data = a, col = VALUE.RAW, c("value","meaning"), sep="=", extra="merge"))
# Step 3: Grep for trailing space in first column (value)
trouble_rows1 <- grep("\\s+$", temp$value)
# Step 4: Grep for leading space in second column (meaning)
trouble_rows2 <- grep("^\\s+", temp$meaning)
# Step 5: Merge trouble row numbers
trouble_rows <- c(unique(trouble_rows1, trouble_rows2))
# Step 6: Determine pass/fail check
if (length(trouble_rows) == 0){
values.check <- TRUE
} else {
values.check <- FALSE
}
# Step 7: Extract detailed information about trouble_rows
column_name <- names(DD.dict[,col])
if (length(trouble_rows) != 0){
vname <- unlist(rownames(DD.dict)[trouble_rows])
type <- unlist(DD.dict$TYPE)[trouble_rows]
problematic_description <- unlist(DD.dict[col])[trouble_rows]
} else {
vname <- NA
type <- NA
problematic_description <- NA
}
v.check.comb <- data.frame(column_name, values.check, vname, type, problematic_description)
v.check <- bind_rows(v.check.comb, v.check)
}
rownames(v.check) <- 1:nrow(v.check)
v.check$check <- "Check 2: Are there any leading/trailing spaces near the first equals sign?"
VALUES.CHECK2 <- (isTRUE(all(v.check$values.check)))
VALUES.CHECK2.details <- v.check
if (isTRUE(VALUES.CHECK2)) {
check.name <- "Check 2"
check.description <- "Are there any leading/trailing spaces near the first equals sign?"
check.status <- "Passed"
check2.final <- data.frame(check.name, check.description, check.status)
}
##### Check 3: Do all variables of TYPE encoded have at least one VALUES entry? #####
v.check <- NULL
values.check <- (dim(DD.dict[is.na(DD.dict$VALUES) & grepl("encoded value", DD.dict$TYPE), c("VARNAME","VALUES")])[1]==0)
column_name <- "VALUES"
if (values.check==FALSE){
vname <- unlist(DD.dict[is.na(DD.dict$VALUES) & grepl("encoded value", DD.dict$TYPE), c("VARNAME")])
type <- unlist(DD.dict[is.na(DD.dict$VALUES) & grepl("encoded value", DD.dict$TYPE), c("TYPE")])
problematic_description <- unlist(DD.dict[is.na(DD.dict$VALUES) & grepl("encoded value", DD.dict$TYPE), c("VALUES")])
} else {
vname <- NA
type <- NA
problematic_description <- NA
}
v.check.comb <- data.frame(column_name, values.check, vname, type, problematic_description)
v.check <- bind_rows(v.check.comb, v.check)
rownames(v.check) <- 1:nrow(v.check)
v.check$check <- "Check 3: Do all variables of TYPE encoded have at least one VALUES entry?"
VALUES.CHECK3 <- (isTRUE(all(v.check$values.check)))
VALUES.CHECK3.details <- v.check
if (isTRUE(VALUES.CHECK3)) {
check.name <- "Check 3"
check.description <- "Do all variables of TYPE encoded have at least one VALUES entry?"
check.status <- "Passed"
check3.final <- data.frame(check.name, check.description, check.status)
}
##### Check 4: Are all variables with VALUES entries of TYPE encoded? #####
v.check <- NULL
values.check <- (dim(DD.dict[!grepl("encoded value", DD.dict$TYPE) & grepl("=", DD.dict$VALUES), c("VARNAME","TYPE", "VALUES")])[1]==0)
column_name <- "VALUES"
if (values.check==FALSE){
vname <- unlist(DD.dict[!grepl("encoded value", DD.dict$TYPE) & grepl("=", DD.dict$VALUES), c("VARNAME")])
type <- unlist(DD.dict[!grepl("encoded value", DD.dict$TYPE) & grepl("=", DD.dict$VALUES), c("TYPE")])
problematic_description <- unlist(DD.dict[!grepl("encoded value", DD.dict$TYPE) & grepl("=", DD.dict$VALUES), c("VALUES")])
} else {
vname <- NA
type <- NA
problematic_description <- NA
}
v.check.comb <- data.frame(column_name, values.check, vname, type, problematic_description)
v.check <- bind_rows(v.check.comb, v.check)
rownames(v.check) <- 1:nrow(v.check)
v.check$check <- "Check 4: Are all variables with VALUES entries of TYPE encoded?"
VALUES.CHECK4 <- (isTRUE(all(v.check$values.check)))
VALUES.CHECK4.details <- v.check
if (isTRUE(VALUES.CHECK4)) {
check.name <- "Check 4"
check.description <- "Are all variables with VALUES entries of TYPE encoded?"
check.status <- "Passed"
check4.final <- data.frame(check.name, check.description, check.status)
}
# Compile report
Time <- Sys.time()
Function <- "values_check"
VALUES.CHECK <- unlist(lst(VALUES.CHECK1, VALUES.CHECK2, VALUES.CHECK3, VALUES.CHECK4))
if (all(VALUES.CHECK==TRUE)) {
Status <- "Passed"
Message <- c("Passed: all four VALUES checks look good.")
Information <- bind_rows(check1.final, check2.final, check3.final, check4.final)
#Information <- VALUES.CHECK
return_to_user <- lst(Message, Information)
} else {
Status <- "Failed"
Message <- c("ERROR: at least one VALUES check flagged potentials issues. See Information for more details.")
Information2 <- bind_rows(VALUES.CHECK1.details, VALUES.CHECK2.details, VALUES.CHECK3.details, VALUES.CHECK4.details)
Information <- subset(Information2, values.check==FALSE)
#Problem_Variables <- Information %>% select(column_name, vname, type, problematic_description, check)
return_to_user <- lst(Message, Information)
}
} else {
Time <- Sys.time()
Function <- "values_check"
Status <- "Not attempted"
Message <- "VALUES column name duplicated or missing. Check not performed."
Information <- names(DD.dict)[duplicated(names(DD.dict))]
return_to_user <- lst(Message)
}
}
report <- tibble(Time, Function, Status, Message, Information=lst(Information))
if (verbose==TRUE){
print(return_to_user)
}
return(invisible(report))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.