#' Getting Invalid Examples and Summaries for Age_Reported and Age_Units_Reported
#'
#' @description
#' This function will generate two data frames: first, a frame to be used later to extract invalid examples from;
#' second, a frame that contains facility-level summaries for counts and percentages of invalid Age_Reported
#' and Age_Units_Reported. The valid values for Age_Units_Reported were taken from the `PHVS_AgeUnit_SyndromicSurveillance_V1.xls` file from
#' Public Health Information Network Vocabulary Access and Distribution System value sets. We also allow each of these units to be in
#' the plural (i.e., we allow both YEAR and YEARS). Age_Reported is considered invalid if: (a) it is reported, but no units are reported, (b)
#' years reported are less than 2 or greater than 120, (c) months reported are less than 1 or greater than 24, (d) weeks reported are less
#' than 1 or greater than 20, or (e) days reported are less than 1 or greater than 30.
#'
#' @details
#' You can view the concept names that are considered valid by calling data("age_units").
#'
#' @param data The raw data on which you will do the invalid ages and age units checks.
#' @return A list of two data frames: examples and summary for invalid Age_Reported and Age_Units_Reported.
#' @import dplyr
#' @export
age_invalid <- function(data) {
# import the valid admit values
data("age_units", envir=environment())
valid_age_units <- age_units %>% # take data
select(Concept.Name) %>% # the variable we want is called concept name
filter(!is.na(Concept.Name)) %>% # get rid of any nas
c() %>% # turn this into a vector
unlist() %>% # unlist them from the concept name object
unname() %>% # remove the names to leave bare values
toupper() # upper case everything
# allow the units to be plural, as well
temp <- c() # initialize temporary vector
for (i in 1:length(valid_age_units)) { # for each element in the valid_age_values vector
if (valid_age_units[i] != "UNKNOWN") { # for ones that do not equal unknown
temp <- c(temp, paste0(valid_age_units[i], "S")) # adding an s to that and appending it to the empty vector
}
}
valid_age_units <- c(valid_age_units, temp) # combine vector with new plurals to the list from the dataset
# generate examples file
age_examples <- data %>% # take data
select(c(C_Facility_ID, C_Visit_ID, Age_Reported, Age_Units_Reported)) %>% # get only the variables we need right now
mutate(Age_Units_Reported=toupper(Age_Units_Reported), # upper casing everything
Invalid_Age_Units=case_when(
is.na(Age_Units_Reported) ~ NA, # if it is na, keep it na
Age_Units_Reported %in% valid_age_units ~ FALSE, # if units reported are valid, then false invalid
!Age_Units_Reported %in% valid_age_units ~ TRUE # if units are not in there, then true invalid
),
Invalid_Age=case_when(
is.na(Age_Units_Reported) & !is.na(Age_Reported) ~ TRUE, # invalid if they don't report age units, but report age
(Age_Units_Reported=="YEAR" | Age_Units_Reported=="YEARS" | Age_Units_Reported=="YEAR [TIME]") & Age_Reported >= 2 & Age_Reported <=120 ~ FALSE, # if in years and between 2 and 120, then not invalid
(Age_Units_Reported=="MONTH" | Age_Units_Reported=="MONTHS" | Age_Units_Reported=="MONTH [TIME]") & Age_Reported >= 1 & Age_Reported <=24 ~ FALSE, # if in months and between 1 and 24, then not invalid
(Age_Units_Reported=="WEEK" | Age_Units_Reported=="WEEKS" | Age_Units_Reported=="WEEK [TIME]") & Age_Reported >= 1 & Age_Reported <=20 ~ FALSE, # if in weeks and between 1 and 20, then not invalid
(Age_Units_Reported=="DAY" | Age_Units_Reported=="DAYS" | Age_Units_Reported=="DAY [TIME]") & Age_Reported >= 1 & Age_Reported <=30 ~ FALSE, # if in days and between 1 and 30, then not invalid
TRUE ~ TRUE, # otherwise, it is invalid
))
age_summary <- age_examples %>% # take these examples
group_by(C_Visit_ID) %>% # group by patient visit
mutate(
Any_Invalid_Age_Units=case_when(
all(is.na(Invalid_Age_Units)) ~ NA, # if all invalid checks are na, keep na
sum(Invalid_Age_Units, na.rm=TRUE) == 0 ~ FALSE, # if zero, all invalid checks are false, so visit is good
TRUE ~ TRUE # if it is not, invalid is true
),
Any_Invalid_Age=case_when(
all(is.na(Invalid_Age)) ~ NA, # all na, then na
sum(Invalid_Age, na.rm=TRUE) == 0 ~ FALSE, # if zero, all checks are false, so visit is good
TRUE ~ TRUE # if it is not, invalid is true
)
) %>%
slice(1) %>% # get one observation per visit
ungroup() %>% # explicitly ungroup
group_by(C_Facility_ID) %>% # group by facility
summarise(Age_Reported.Percent=round(mean(Any_Invalid_Age, na.rm=TRUE)*100,2), # get percent
Age_Units_Reported.Percent=round(mean(Any_Invalid_Age_Units, na.rm=TRUE)*100,2), # get percent
Age_Reported.Count=sum(Any_Invalid_Age, na.rm=TRUE), # get count
Age_Units_Reported.Count=sum(Any_Invalid_Age_Units, na.rm=TRUE))
return(
list(age_examples=age_examples,
age_summary=age_summary)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.