#' Read CSV file containing COVID19 Sample Collection and
#' deidentified Demographics Data from REDDI
#'
#' @param filepath Path to the CSV file containing Sample Collection and
#' deidentified Demographics Data
#' @param date_fmt Format used to specify dates (Default: MM/DD/YYYY)
#' @param time_zone Time zone for collection time (Default: "America/New_York")
#'
#' @return A tibble with the Sample Collection and
#' the deidentified Demographics Data
#'
#' @importFrom magrittr "%>%"
read_demographics_csv <- function(filepath,
date_fmt = c("%m/%d/%y"),
time_zone = "America/New_York") {
test_tbl <- readr::read_csv(filepath,
na = c(
"", "NA", "N/A", "<NA>", "null",
"Null", "Missing", "Error 404"
),
n_max = 1,
show_col_types = FALSE
)
stopifnot(all(c(
"Testing Group Name",
"Patient City",
"Patient Zip Code",
"Patient State",
"Year of Birth",
"Patient Gender",
"Pregnant",
"Patient Ethnic Group",
"Patient Race",
"Patient ID",
"TestKit ID",
"Result description",
"Result Date",
"Collection Date",
"SKU",
"Order Priority"
) %in% colnames(test_tbl)))
output_tbl <- readr::read_csv(filepath,
na = c(
"", "NA", "N/A", "<NA>",
"null", "Null", "Missing", "Error 404"
),
show_col_types = FALSE
) %>%
dplyr::rename(
"test_group" = "Testing Group Name",
"city" = "Patient City",
"zip_code" = "Patient Zip Code",
"state" = "Patient State",
"birth_year" = "Year of Birth",
"gender" = "Patient Gender",
"pregnancy_status" = "Pregnant",
"ethnicity" = "Patient Ethnic Group",
"race" = "Patient Race",
"patient_id" = "Patient ID",
"testkit_id" = "TestKit ID",
"rymedi_result" = "Result description",
"result_date" = "Result Date",
"teskit_sku" = "SKU",
"order_priority" = "Order Priority"
)
if ("Time Zone" %in% colnames(output_tbl)) {
output_tbl <- output_tbl %>%
dplyr::select(-c("Time Zone"))
}
output_tbl <- output_tbl %>%
dplyr::mutate(
`Collection Date` = lubridate::parse_date_time(`Collection Date`,
orders = date_fmt,
tz = time_zone
),
collection_date = lubridate::as_datetime(`Collection Date`,
tz = time_zone
),
result_date = lubridate::as_date(lubridate::parse_date_time(result_date,
orders = date_fmt,
tz = time_zone
),
tz = time_zone
)
)
output_tbl <- output_tbl %>%
dplyr::mutate(birth_year = tidy_up_birth_year(birth_year,
max_year = max(lubridate::year(collection_date))
))
return(output_tbl)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.