#' Get COVID-19 positive samples after excluding redundant samples in a
#' probable single infection event
#'
#' @param sc_tbl Sample Collection table from deidentifiedDB database
#' @param start_date Filter sample collection table for samples collected on or
#' after this date. Provide date as in "2021-08-01" for August 1, 2021
#' @param end_date Filter sample collection table for samples collected on or
#' before this date. Provide date as in "2021-08-01" for August 1, 2021
#' @param n_days Duration of time (in days) for which only the first collected
#' COVID-19 positive sample for a patient is retained
#' @param time_zone Time zone for collection time (Default: "America/New_York")
#'
#' @return Tibble containing sample collection data after excluding redundant
#' COVID-19 positive samples
#' @export
#'
#' @importFrom magrittr "%>%"
get_sc_wo_redundant <- function(sc_tbl,
start_date = "2021-01-01",
end_date = as.character(
lubridate::date(lubridate::now())
),
n_days = 30,
time_zone = "America/New_York") {
stopifnot(all(c(
"testkit_id",
"rymedi_result",
"collection_date",
"population",
"order_priority",
"gender",
"patient_id",
"performing_facility"
) %in% colnames(sc_tbl)))
if (!(lubridate::is.timepoint(sc_tbl$collection_date))) {
sc_tbl <- sc_tbl %>%
dplyr::mutate(collection_date = lubridate::date(
lubridate::as_datetime(collection_date,
tz = time_zone
)
))
}
start_date <- lubridate::as_date(start_date)
end_date <- lubridate::as_date(end_date)
sc_tbl_no_missing <- sc_tbl %>%
dplyr::mutate(
collection_year = lubridate::year(collection_date),
collection_week = lubridate::week(collection_date),
collection_month = zoo::as.yearmon(collection_date),
dplyr::across(
c(
rymedi_result,
population,
order_priority,
gender,
performing_facility
),
stringr::str_to_upper
)
) %>%
dplyr::filter(
!(is.na(collection_date) | is.na(patient_id)),
collection_date >= start_date,
collection_date <= end_date,
stringr::str_detect(
performing_facility,
"CLEMSON"
),
rymedi_result %in% c("POSITIVE", "NEGATIVE")
) %>%
dplyr::select(
"testkit_id",
"rymedi_result",
"collection_year",
"collection_week",
"collection_month",
"collection_date",
"population",
"order_priority",
"gender",
"patient_id",
"performing_facility"
) %>%
dplyr::arrange(
collection_date,
rymedi_result,
population,
order_priority,
performing_facility
)
## Only retain the first COVID19 +ve sample from a patient in a period of n_days days
## Number of such multi-test possibly-single-infection samples :
testkit_ids_to_exclude <- get_redundant_samples(
sc_tbl_no_missing,
n_days
)
output_tbl <- sc_tbl_no_missing %>%
dplyr::filter(!(testkit_id %in% testkit_ids_to_exclude))
return(output_tbl)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.