## code to prepare a fake dataset for turnr package
# this is meant to mimic raw input data for the package
library(tidyverse)
set.seed(123)
# TargetNames -------------------------------------------------------------
all_paths <- c('Human Metapneumovirus', 'PCR2 Control', 'RNA Process Control',
'Coronavirus NL63', 'Influenza A H1-2009', 'PCR1 Control',
'Human Rhinovirus/Enterovirus', 'Bocavirus', 'Influenza A H3',
'Coronavirus 229E', 'Respiratory Syncytial Virus',
'Coronavirus OC43', 'Adenovirus', 'Coronavirus HKU1',
'Mycoplasma pneumoniae', 'Parainfluenza Virus 1',
'Parainfluenza Virus 4', 'Parainfluenza Virus 3', 'Influenza B',
'Bordetella pertussis', 'Chlamydophila pneumoniae',
'Influenza A H1', 'Parainfluenza Virus 2',
'Influenza A (no subtype detected)', 'Human RNA Control',
'Coronavirus OC43 (RP)', 'Bordetella parapertussis (IS1001)',
'Bordetella pertussis (ptxP)', 'Chlamydia pneumoniae')
controls <- all_paths[str_detect(all_paths, "[Cc]ontrol")]
rp_paths <- all_paths[!all_paths %in% controls]
neg_rate <- 0.4 # proportion negatives
GI_paths <- c('Aeromonas', 'Astrovirus', 'C. cayetanensis', 'Campylobacter',
'EPEC', 'ETEC', 'G. lamblia', 'Norovirus', 'Salmonella',
'E. coli O157', 'Adenovirus F', 'C. difficile', 'Cryptosporidium',
'STEC', 'Vibrio', 'EAEC', 'Sapovirus', 'Shigella/EIEC',
'Rotavirus A', 'V. cholerae', 'E. histolytica',
'P. shigelloides', 'Y. enterocolitica')
# site info ---------------------------------------------------------------
SiteIDs <- stringi::stri_rand_strings(7, length = 10)
Serials <- stringi::stri_rand_strings(56, length = 10)
dates <- seq(from = lubridate::ymd("2017-01-01"),
to = lubridate::ymd("2019-01-01"),
by = 1)
panels <- c("Respiratory_Panel", "Gastro_Intestinal")
# fake site info
info1 <- tibble(Region = c("UT", "NY", "CA"),
ZipCode = c(12345, 99999, 11111),
Country = "United States of America")
info2 <- tibble(SiteID = SiteIDs,
Region = c("UT", "UT", "UT", "NY","NY", "NY", "CA")) %>%
left_join(info1, by = "Region")
lookup_SiteID <- rep(SiteIDs, length(Serials)/length(SiteIDs))
names(lookup_SiteID) <- Serials
versions <- c("FA1.5", "FA2.0", "Torch")
info3 <- tibble(InstrumentSerialNumber = Serials,
SiteID = lookup_SiteID[Serials],
InstrumentVersion = sample(versions, length(Serials), replace = TRUE)) %>%
left_join(info2, by = "SiteID")
# generating data based on fake TUR -----------------------------------
x <- 1:length(dates)
# expected TUR
y <- 7*sin(x/50) + 20
# plot(y~x, type = "l")
# generating values of TUR
TUR <- rpois(length(dates), y)
# plot(TUR ~ dates, type = "l")
df1 <- tibble(date = dates,
TUR = TUR)
df2 <- df1 %>%
group_by(date) %>%
nest(TUR = TUR) %>%
mutate(RunDataID = map(TUR, stringi::stri_rand_strings, length = 15)) %>%
unnest(cols = c("TUR", "RunDataID")) %>%
ungroup() %>%
mutate(InstrumentSerialNumber = sample(Serials, size = nrow(.),
replace = TRUE),
# 70% of pouchtitles should be RP
what_panel = rbinom(n = nrow(.), 1, 0.3),
PouchTitle = panels[(what_panel + 1)]) %>%
select(-what_panel)
stopifnot(sum(duplicated(df2$RunDataID)) == 0) # should all be unique
df3 <- df2 %>%
group_by(date, RunDataID) %>%
nest() %>%
mutate(TargetName = map(data, function(x){
# function input (x) not actually needed--just so map() runs
out <- sample(controls, 2) # add two controls
if (runif(1) < neg_rate) {
return(out)
} else {
# shooting for abouta 5 % co-detection rate
num_paths <- rbinom(1, 20, 0.05/20) +1 # number of pathogens
out <- c(out, sample(rp_paths, num_paths))
}
out
})) %>%
unnest(cols = c("data", "TargetName")) %>%
ungroup() %>%
# switching non-rp tests to other fake pathogens
mutate(TargetName = ifelse(PouchTitle != "Respiratory_Panel" &
!TargetName %in% controls,
sample(GI_paths, nrow(.), replace = TRUE),
TargetName),
ResultType = ifelse(TargetName %in% controls,
"control",
"organism"),
AssayName = TargetName)
# adding duplicate rows (to help mimic multiple Assays)
df_org <- df3 %>%
filter(ResultType == "organism")
extra_rows <- df_org[sample(1:nrow(df_org), size = nrow(df_org)/3, replace = FALSE),] %>%
mutate(AssayName = paste(AssayName, "assay 2")) # fake second assay
df4 <- bind_rows(df3, extra_rows) %>%
ungroup() %>%
left_join(info3, by = "InstrumentSerialNumber") %>%
select(-TUR) %>%
mutate(StartTime = paste(as.character(date), "07:26:49.000"),
FlaggedAsValidation = 0,
TargetShortName = NA,
TargetResult = ifelse(ResultType == "control",
"Pass", "Positive"),
# in actual data set there are some negative assay results
AssayResult = "Positive") %>%
select(-date)
# turnr::initial_check(df4)
rp_raw <- df4
# save for use in package
usethis::use_data(rp_raw, overwrite = TRUE)
# creating processed files ------------------------------------------------
# creating downstream files, so that examples don't have to run
# all the processing functions
# daily TUR
TUR_dat <- turnr::pre_process(turnr::rp_raw) %>%
turnr::calc_active_instruments()
usethis::use_data(TUR_dat, overwrite = TRUE)
# count by pathogen/site
path_dat <- turnr::pre_process(rp_raw) %>%
turnr::co_detection() %>%
turnr::calc_count_by_site_inst()
usethis::use_data(path_dat, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.