knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = TRUE, message = FALSE, warning = FALSE ) knitr::opts_chunk$set(fig.width=10, fig.height=4) options(tidyverse.quiet = TRUE)
Problem
Method details
survey:
serology:
Here we applied a Bayesian method to estimate the posterior probability of seroprevalence for a test with unknown performance [@Larremore2020unk].
As inputs we used the expanded point estimate number of positive tests and total population that resulted from the sampling weights adjusted seroprevalence estimation.
To maintain the sampling design uncertainty, we applied the same method for each estimated confidence interval.
Since test sensitivity and specificity were unknown, we used the reported number of true positives, false positives, true negatives and false negatives from a local test performance evaluation.
For this worlflow we used lab validation results from an
example available in the epiR
package: ?epiR::epi.tests
for sensitivity: Of 744 patients that were disease positive, 670 were test positive.
Workflow details
In this reprex the survey design does not include any explicit strata. Include it if available in I.3.
The family of functions serosvy_*_sample_posterior
to correct
prevalence due to test performance require positive integers.
This is adressed in II.2.1.
We use purrr
and furrr
within a for-loop
in II.2.2 to efficiently update prevalence posterior distributions for multiple prevalences.
Limitations
We do not apply a multiple subpopulation aproach for the inference of prevalence. This is currently available for a test with known performance [@Larremore2020kno].
This workflow is only applicable to categorical variables.
set.seed
could not control the variability generated by test correction functions.
# package library(serosurvey) # additional library(tidyverse) library(srvyr) library(survey) library(tictoc) library(furrr) library(purrr) # theme theme_set(theme_bw())
datasurvey
is the input dataframeoutcome_one
and outcome_two
covariate_01
and covariate_02
data(api) datasurvey <- apiclus2 %>% mutate(survey_all="survey_all") %>% # conviniently create variables # to identify outcomes and covariates throughout the script mutate(outcome_one = awards, # this have the serology results outcome_two = cut(pct.resp,breaks = 2), covariate_01 = stype, covariate_02 = both, # two stage cluster samples primary_sampling_unit = dnum, secondary_sampling_unit = snum, # strata = census_tracts, (activate if available) sampling_weights = pw) %>% # recode factor leves to replicate serology results mutate(outcome_one=fct_recode(outcome_one, "negative"="No","positive"="Yes")) %>% mutate(outcome_two=fct_recode(outcome_two, "negative"="(50,100]","positive"="(-0.1,50]"))
### `srvyr` workflow dstrata <- datasurvey %>% as_survey_design(strata = covariate_01, weights = pw) # dstrata2 <- apistrat %>% # mutate(pw2=1) %>% # as_survey_design(strata = covariate_01, weights = pw2) dstrata %>% summarise(pct = survey_mean(outcome_one=="positive",proportion = TRUE)) # dstrata2 %>% # summarise(pct = survey_mean(outcome_one=="positive",proportion = TRUE))
select
and colnames
we create strings of covariates# outcome outcome_set00 <- datasurvey %>% select(outcome_one, outcome_two) %>% colnames() # denominators covariate_set01 <- datasurvey %>% select(covariate_01, #covariate_03, #covariate_04, covariate_02) %>% colnames() # numerators within outcome covariate_set02 <- datasurvey %>% select(#covariate_01, #covariate_03 #covariate_04 covariate_02) %>% colnames()
srvyr
survey design object# define how to treat strata with only one census tract options(survey.lonely.psu = "certainty") # uu_clean_data %>% count(dnum, snum) # create survey sampling design --------------------------------- design <- datasurvey %>% # the outcome must have complete observations filter(!is.na(outcome_one)) %>% # all census tracts must contain sampling weights filter(!is.na(sampling_weights)) %>% as_survey_design( # 1 - primary and secondary sampling unit id=c(primary_sampling_unit, secondary_sampling_unit), # 2 - activate if available # strata = strata, #clusters need to be nested in the strata # 3- weights or expansion factors weights = sampling_weights )
How to use serosvy_proportion
for a single estimation
Here we exchage the numerator and denominator
serosvy_proportion(design = design, denominator = covariate_01, numerator = outcome_one) %>% select(-ends_with("_low"),-ends_with("_upp"), -ends_with("_cv"),-ends_with("_deff")) serosvy_proportion(design = design, denominator = outcome_one, numerator = covariate_01) %>% select(-ends_with("_low"),-ends_with("_upp"), -ends_with("_cv"),-ends_with("_deff"))
outcome_01_pre <- # create a expanded grid of numerators and denominators # # set 01 of denominator-numerator # expand_grid( design=list(design), denominator=covariate_set01, # covariates numerator=outcome_set00 # outcomes ) %>% # # set 02 of denominator-numerator (e.g. within main outcome) # union_all( expand_grid( design=list(design), denominator=outcome_set00, # outcomes numerator=covariate_set02 # covariates ) ) %>% # # create symbols (to be readed as arguments) # mutate( denominator=map(denominator,dplyr::sym), numerator=map(numerator,dplyr::sym) ) %>% # # estimate prevalence # mutate(output=pmap(.l = select(.,design,denominator,numerator), .f = serosvy_proportion)) %>% # # show the outcome # select(-design,-denominator,-numerator) %>% unnest(cols = c(output)) #%>% outcome_01_pre
Here we use:
Filter the rows with the results of the serology test: outcome_one
Round population estimates to use positive integers in the next step
Add the results from the available validation study as columns
# _ 2. test performance --------------------------------------------------- # __ filter + add values -------------------------------------------------- outcome_01_adj_pre <- outcome_01_pre %>% # only serological results filter(numerator=="outcome_one") %>% # only positives filter(numerator_level=="positive") %>% # round numbers are required mutate_at(.vars = vars(total,total_den, total_low,total_den_low, total_upp,total_den_upp), .funs = list("round"=round),digits = 0) %>% # unknown test local validation results # for sensitivity: # Of 744 patients that were disease positive, 670 were test positive. # for specificity: # Of 842 patients that were disease negative, 640 were test negative. # source: ?epiR::epi.tests mutate( true_positive = 670, false_negative = 74, false_positive = 202, true_negative = 640 ) %>% rownames_to_column() %>% mutate(rowname=as.numeric(rowname))
Here we use purrr
and furrr
within a for-loop
to efficiently update prevalence posterior distributions for multiple prevalences.
We apply the procedure to each point estimate and confidence interval
# 56-60sec por covariable # 4GB RAM # parallelized in 8 cores using purrr y furrr plan(multisession, workers = availableCores()) tic() out <- tibble() for (i in 1:nrow(outcome_01_adj_pre)) { out <- outcome_01_adj_pre %>% slice(i) %>% # dot mutate(adj_dot_unk=future_pmap( .l = select(., positive_number_test=total_round, total_number_test=total_den_round), .f = possibly(serosvy_unknown_sample_posterior,otherwise = NA_real_), true_positive = true_positive, false_negative = false_negative, false_positive = false_positive, true_negative = true_negative)) %>% serosvy_extract_posterior(variable = adj_dot_unk) %>% # low mutate(adj_low_unk=future_pmap( .l = select(., positive_number_test=total_low_round, total_number_test=total_den_low_round), .f = possibly(serosvy_unknown_sample_posterior,otherwise = NA_real_), true_positive = true_positive, false_negative = false_negative, false_positive = false_positive, true_negative = true_negative)) %>% serosvy_extract_posterior(variable = adj_low_unk) %>% # upp mutate(adj_upp_unk=future_pmap( .l = select(., positive_number_test=total_upp_round, total_number_test=total_den_upp_round), .f = possibly(serosvy_unknown_sample_posterior,otherwise = NA_real_), true_positive = true_positive, false_negative = false_negative, false_positive = false_positive, true_negative = true_negative)) %>% serosvy_extract_posterior(variable = adj_upp_unk) %>% # union all outputs union_all(out) # activate to print the progress! (recommended) # out %>% print() #%>% # # known test # mutate(adj_dot_kno=future_pmap( # .l = select(., # positive_number_test=total_round, # total_number_test=total_den_round), # .f = possibly(serosvy_known_sample_posterior,otherwise = NA_real_), # sensitivity=0.999, # specificity=0.960)) } toc() outcome_01_adj <- out %>% mutate(rowname=as.numeric(rowname)) %>% arrange(rowname) %>% select(-rowname)
Here we join all the results to the original dataset
We also create unite*_
columns summarizing the results
outcome_01_adj_tbl <- # start from original dataset outcome_01_pre %>% # only positives filter(numerator_level=="positive"|denominator_level=="positive") %>% # left join with db with test performance update left_join(outcome_01_adj) %>% # naniar::miss_var_summary() %>% # avallecam::print_inf() %>% # apply format unite_dotwhiskers(variable_dot = raw_prop, variable_low = raw_prop_low, variable_upp = raw_prop_upp, digits_dot = 2, digits_low = 1, digits_upp = 2) %>% unite_dotwhiskers(variable_dot = prop, variable_low = prop_low, variable_upp = prop_upp, digits_dot = 2, digits_low = 1, digits_upp = 2) %>% unite_dotwhiskers(variable_dot = adj_dot_unk_p50, variable_low = adj_low_unk_p50, variable_upp = adj_upp_unk_p50, digits_dot = 3, digits_low = 2, digits_upp = 3)
Take a quick look to the output:
raw_or_unadjusted_prev
crude or raw prevalence
sampling_weighted_prev
sampling weights adjusted prevalencesampling_and_test_prev
sampling weights and test performance adjusted prevalenceoutcome_01_adj_tbl %>% select(1:4,raw_num,raw_den, starts_with("unite1_"), prop_cv) %>% rename("raw_or_unadjusted_prev"=unite1_raw_prop, "sampling_weighted_prev"=unite1_prop, "sampling_and_test_prev"=unite1_adj_dot_unk_p50) %>% mutate_if(.predicate = is.numeric,.funs = ~round(.x,2)) %>% # DT::datatable() knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = "striped", full_width = F)
outcome_01_adj_tbl %>% filter(str_detect(numerator,"outcome")) %>% ggplot_prevalence( denominator_level = denominator_level, numerator = numerator, proportion = prop, proportion_upp = prop_upp, proportion_low = prop_low) + # theme(axis.text.x = element_text(angle = 0, vjust = 0, hjust=0)) + scale_y_continuous( labels = scales::percent_format(accuracy = 1), breaks = scales::pretty_breaks(n = 5)) + # coord_flip() + facet_wrap(denominator~.,scales = "free") + # facet_grid(denominator~.,scales = "free_y") + colorspace::scale_color_discrete_qualitative() + labs(title = "Prevalence of numerators across denominators", y = "Prevalence",x = "")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.