knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = TRUE, message = FALSE, warning = FALSE ) knitr::opts_chunk$set(fig.width=10, fig.height=4) options(tidyverse.quiet = TRUE)
Here we present three examples, definitions and related references:
library(serosurvey)
# additional library(tidyverse) library(srvyr) library(survey) library(tictoc) library(furrr) library(purrr) # theme theme_set(theme_bw())
data(api) datasurvey <- apiclus2 %>% mutate(survey_all="survey_all") %>% # create variables mutate(outcome_one = awards, outcome_two = cut(pct.resp,breaks = 2), covariate_01 = stype, covariate_02 = both)
# tratamiento de stratos con un solo conglomerado options(survey.lonely.psu = "certainty") # uu_clean_data %>% count(CONGLOMERADO,VIVIENDA) # diseƱo muestral de la encuesta --------------------------------- design <- datasurvey %>% filter(!is.na(outcome_one)) %>% #CRITICAL! ON OUTCOME filter(!is.na(pw)) %>% #NO DEBEN DE HABER CONGLOMERADOS SIN WEIGHT as_survey_design( id=c(dnum, snum), #~dnum+snum, # primary secondary sampling unit # strata = strata, #clusters need to be nested in the strata weights = pw # factores de expancion )
# denominadores covariate_set01 <- datasurvey %>% select(covariate_01, #sch.wide, #comp.imp, covariate_02) %>% colnames() # numerators within outcome covariate_set02 <- datasurvey %>% select(#stype, #sch.wide, #comp.imp, covariate_02) %>% colnames()
survey
: Estimate single prevalencesFrom a srvyr
survey design object, serosvy_proportion
estimates:
weighted prevalence (prop
),
total
),raw_prop
), cv
), deff
)serosvy_proportion(design = design, denominator = covariate_01, numerator = outcome_one)
example("serosvy_proportion")
survey
: Estimate multiple prevalencesIn the Article tab we provide a workflow to estimate multiple prevalences:
using different set of covariates and outcomes as numerators or denominators,
# crear matriz # # set 01 of denominator-numerator # expand_grid( design=list(design), denominator=c("covariate_01","covariate_02"), # covariates numerator=c("outcome_one","outcome_two") # outcomes ) %>% # # set 02 of denominator-numerator (e.g. within main outcome) # union_all( expand_grid( design=list(design), denominator=c("outcome_one","outcome_two"), # outcomes numerator=c("covariate_02") # covariates ) ) %>% # # create symbols (to be readed as arguments) # mutate( denominator=map(denominator,dplyr::sym), numerator=map(numerator,dplyr::sym) ) %>% # # estimate prevalence # mutate(output=pmap(.l = select(.,design,denominator,numerator), .f = serosvy_proportion)) %>% # # show the outcome # select(-design,-denominator,-numerator) %>% unnest(cols = c(output)) %>% print(n=Inf)
serology
: Estimate prevalence Under misclassificationWe gather one frequentist approach [@ROGAN1978], available in different Github repos, that deal with misclassification due to an imperfect diagnostic test [@Azman2020; @Takahashi2020]. Check the Reference tab.
We provide tidy outputs for bayesian approaches developed in @Larremore2020unk here and @Larremore2020kno here:
You can use them with purrr
and furrr
to efficiently iterate
and parallelize this step for multiple prevalences.
Check the workflow
in Article tab.
serosvy_known_sample_posterior( #in population positive_number_test = 321, total_number_test = 321+1234, # known performance sensitivity = 0.93, specificity = 0.975 )
tidy_result <- serosvy_known_sample_posterior( #in population positive_number_test = 321, total_number_test = 321+1234, # known performance sensitivity = 0.93, specificity = 0.975 ) tidy_result_out <- tidy_result %>% select(summary) %>% unnest(cols = c(summary)) tidy_result %>% select(posterior) %>% unnest(cols = c(posterior)) %>% ggplot(aes(x = r1)) + geom_histogram(aes(y=..density..),binwidth = 0.0005) + geom_density() + geom_vline(aes(xintercept=tidy_result_out %>% pull(numeric.mean)), color="red",lwd=1) + geom_vline(aes(xintercept=tidy_result_out %>% pull(numeric.p05)), color="red") + geom_vline(aes(xintercept=tidy_result_out %>% pull(numeric.p95)), color="red") + scale_x_continuous(breaks = scales::pretty_breaks())
example("serosvy_known_sample_posterior")
# result_unk <- sample_posterior_r_mcmc_testun( # samps = 10000, # #in population # pos = 692, #positive # n = 3212, #total # # in lab (local validation study) # tp = 670,tn = 640,fp = 202,fn = 74)
serosvy_unknown_sample_posterior_ii( #in population positive_number_test = 321, total_number_test = 321+1234, # in lab (local validation study) true_positive = 670, true_negative = 640, false_positive = 202, false_negative = 74)
result_unk <- serosvy_unknown_sample_posterior_ii( #in population positive_number_test = 321, total_number_test = 321+1234, # in lab (local validation study) true_positive = 670, true_negative = 640, false_positive = 202, false_negative = 74) result_unk %>% select(posterior) %>% unnest(posterior) %>% rownames_to_column() %>% pivot_longer(cols = -rowname, names_to = "estimates", values_to = "values") %>% ggplot(aes(x = values)) + geom_histogram(aes(y=..density..),binwidth = 0.0005) + geom_density() + facet_grid(~estimates,scales = "free_x")
example("serosvy_unknown_sample_posterior")
Feel free to fill an issue or contribute with your functions or workflows in a pull request.
Here are a list of publications with interesting approaches using R:
@Silveira2020 and @Hallal2020 analysed a serological survey accounting for sampling design and test validity using parametric bootstraping, following @Lewis2012.
@Flor2020 implemented a lot of frequentist and bayesian methods for test with known sensitivity and specificity. Code is available here.
@Gelman2020 also applied Bayesian inference with hierarchical regression and post-stratification to account for test uncertainty with unknown specificity and sensitivity. Here a case-study.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.