knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo = TRUE,
  message = FALSE,
  warning = FALSE
)
knitr::opts_chunk$set(fig.width=10, fig.height=4)
options(tidyverse.quiet = TRUE)

Context

Problem

Method details

survey:

serology:

Workflow details

Limitations

Import packages

# package
library(serosurvey)
# additional
library(tidyverse)
library(srvyr)
library(survey)
library(tictoc)
library(furrr)
library(purrr)
# theme
theme_set(theme_bw())

I. Workflow: Create a survey design object

I.1. Import data

data(api)

datasurvey <- apiclus2 %>% 
  mutate(survey_all="survey_all") %>% 
  # conviniently create variables
  # to identify outcomes and covariates throughout the script 
  mutate(outcome_one = awards, # this have the serology results
         outcome_two = cut(pct.resp,breaks = 2),
         covariate_01 = stype,
         covariate_02 = both,
         # two stage cluster samples
         primary_sampling_unit = dnum,
         secondary_sampling_unit = snum,
         # strata = census_tracts, (activate if available)
         sampling_weights = pw) %>% 
  # recode factor leves to replicate serology results
  mutate(outcome_one=fct_recode(outcome_one,
                                "negative"="No","positive"="Yes")) %>% 
  mutate(outcome_two=fct_recode(outcome_two,
                                "negative"="(50,100]","positive"="(-0.1,50]"))
### `srvyr` workflow
dstrata <- datasurvey %>% as_survey_design(strata = covariate_01, weights = pw)
# dstrata2 <- apistrat %>%
#   mutate(pw2=1) %>%
  # as_survey_design(strata = covariate_01, weights = pw2)
dstrata %>%
  summarise(pct = survey_mean(outcome_one=="positive",proportion = TRUE))
# dstrata2 %>%
#   summarise(pct = survey_mean(outcome_one=="positive",proportion = TRUE))

I.2. Identify sets numerators and denominators

# outcome
outcome_set00 <- datasurvey %>% 
  select(outcome_one,
         outcome_two) %>% 
  colnames()

# denominators
covariate_set01 <- datasurvey %>% 
  select(covariate_01,
         #covariate_03,
         #covariate_04,
         covariate_02) %>% 
  colnames()

# numerators within outcome
covariate_set02 <- datasurvey %>% 
  select(#covariate_01,
         #covariate_03
         #covariate_04
         covariate_02) %>% 
  colnames()

I.3. Create a srvyr survey design object

# define how to treat strata with only one census tract
options(survey.lonely.psu = "certainty")

# uu_clean_data %>% count(dnum, snum)

# create survey sampling design ---------------------------------

design <- datasurvey %>% 

  # the outcome must have complete observations
  filter(!is.na(outcome_one)) %>% 
  # all census tracts must contain sampling weights
  filter(!is.na(sampling_weights)) %>% 

  as_survey_design(
    # 1 - primary and secondary sampling unit
    id=c(primary_sampling_unit, secondary_sampling_unit), 
    # 2 - activate if available
    # strata = strata, #clusters need to be nested in the strata
    # 3- weights or expansion factors
    weights = sampling_weights 
  )

I.4 Example: Single estimation

serosvy_proportion(design = design,
                   denominator = covariate_01,
                   numerator = outcome_one) %>%
  select(-ends_with("_low"),-ends_with("_upp"),
         -ends_with("_cv"),-ends_with("_deff"))

serosvy_proportion(design = design,
                   denominator = outcome_one,
                   numerator = covariate_01) %>%
  select(-ends_with("_low"),-ends_with("_upp"),
         -ends_with("_cv"),-ends_with("_deff"))

II. Workflow: Multiple estimation

II.1. Estimates: raw + weighted

outcome_01_pre <- 
  # create a expanded grid of numerators and denominators
  #
  # set 01 of denominator-numerator
  #
  expand_grid(
    design=list(design),
    denominator=covariate_set01, # covariates
    numerator=outcome_set00 # outcomes
  ) %>% 
  #
  # set 02 of denominator-numerator (e.g. within main outcome)
  #
  union_all(
    expand_grid(
      design=list(design),
      denominator=outcome_set00, # outcomes
      numerator=covariate_set02 # covariates
    )
  ) %>% 
  #
  # create symbols (to be readed as arguments)
  #
  mutate(
    denominator=map(denominator,dplyr::sym),
    numerator=map(numerator,dplyr::sym)
  ) %>% 
  #
  # estimate prevalence
  #
  mutate(output=pmap(.l = select(.,design,denominator,numerator),
                     .f = serosvy_proportion)) %>% 
  #
  # show the outcome
  #
  select(-design,-denominator,-numerator) %>% 
  unnest(cols = c(output)) #%>% 

outcome_01_pre

II.2. Add uncertain test performance

II.2.1 Filter serology results and add validation study results

# _ 2. test performance ---------------------------------------------------

# __ filter + add values --------------------------------------------------

outcome_01_adj_pre <- 
  outcome_01_pre %>% 
  # only serological results
  filter(numerator=="outcome_one") %>% 
  # only positives
  filter(numerator_level=="positive") %>% 
  # round numbers are required
  mutate_at(.vars = vars(total,total_den,
                         total_low,total_den_low,
                         total_upp,total_den_upp),
            .funs = list("round"=round),digits = 0) %>%
  # unknown test local validation results
  # for sensitivity:
  # Of 744 patients that were disease positive, 670 were test positive.
  # for specificity:
  # Of 842 patients that were disease negative, 640 were test negative.
  # source: ?epiR::epi.tests
  mutate(
    true_positive = 670,
    false_negative = 74,
    false_positive = 202,
    true_negative = 640
  ) %>% 
  rownames_to_column() %>%
  mutate(rowname=as.numeric(rowname))

II.2.2 Apply to all rows and extract the results

# 56-60sec por covariable 
# 4GB RAM
# parallelized in 8 cores using purrr y furrr

plan(multisession, workers = availableCores())
tic()

out <- tibble()

for (i in 1:nrow(outcome_01_adj_pre)) {

  out <- outcome_01_adj_pre %>% 

    slice(i) %>% 

    # dot
    mutate(adj_dot_unk=future_pmap(
      .l = select(.,
                  positive_number_test=total_round,
                  total_number_test=total_den_round),
      .f = possibly(serosvy_unknown_sample_posterior,otherwise = NA_real_),
      true_positive = true_positive,
      false_negative = false_negative,
      false_positive = false_positive,
      true_negative = true_negative)) %>% 
    serosvy_extract_posterior(variable = adj_dot_unk) %>% 

    # low
    mutate(adj_low_unk=future_pmap(
      .l = select(.,
                  positive_number_test=total_low_round,
                  total_number_test=total_den_low_round),
      .f = possibly(serosvy_unknown_sample_posterior,otherwise = NA_real_),
      true_positive = true_positive,
      false_negative = false_negative,
      false_positive = false_positive,
      true_negative = true_negative)) %>%
    serosvy_extract_posterior(variable = adj_low_unk) %>%

    # upp
    mutate(adj_upp_unk=future_pmap(
      .l = select(.,
                  positive_number_test=total_upp_round,
                  total_number_test=total_den_upp_round),
      .f = possibly(serosvy_unknown_sample_posterior,otherwise = NA_real_),
      true_positive = true_positive,
      false_negative = false_negative,
      false_positive = false_positive,
      true_negative = true_negative)) %>%
    serosvy_extract_posterior(variable = adj_upp_unk) %>% 

    # union all outputs
    union_all(out)

  # activate to print the progress! (recommended)
  # out %>% print() #%>% 

  # # known test
    # mutate(adj_dot_kno=future_pmap(
    #   .l = select(.,
    #               positive_number_test=total_round,
    #               total_number_test=total_den_round),
    #   .f = possibly(serosvy_known_sample_posterior,otherwise = NA_real_),
    #   sensitivity=0.999,
    #   specificity=0.960))

}

toc()

outcome_01_adj <- out  %>% 
  mutate(rowname=as.numeric(rowname)) %>% 
  arrange(rowname) %>% 
  select(-rowname) 

II.3. Create output format

outcome_01_adj_tbl <- 
  # start from original dataset
  outcome_01_pre %>% 
  # only positives
  filter(numerator_level=="positive"|denominator_level=="positive") %>% 
  # left join with db with test performance update
  left_join(outcome_01_adj) %>% 
  # naniar::miss_var_summary() %>% 
  # avallecam::print_inf() %>% 

  # apply format
  unite_dotwhiskers(variable_dot = raw_prop, 
                    variable_low = raw_prop_low,
                    variable_upp = raw_prop_upp,
                    digits_dot = 2,
                    digits_low = 1,
                    digits_upp = 2) %>% 
  unite_dotwhiskers(variable_dot = prop, 
                    variable_low = prop_low,
                    variable_upp = prop_upp,
                    digits_dot = 2,
                    digits_low = 1,
                    digits_upp = 2) %>% 
  unite_dotwhiskers(variable_dot = adj_dot_unk_p50,
                    variable_low = adj_low_unk_p50,
                    variable_upp = adj_upp_unk_p50,
                    digits_dot = 3,
                    digits_low = 2,
                    digits_upp = 3)

II.4. Evaluate output

outcome_01_adj_tbl %>% 
  select(1:4,raw_num,raw_den,
         starts_with("unite1_"),
         prop_cv) %>% 
  rename("raw_or_unadjusted_prev"=unite1_raw_prop,
         "sampling_weighted_prev"=unite1_prop,
         "sampling_and_test_prev"=unite1_adj_dot_unk_p50) %>% 
  mutate_if(.predicate = is.numeric,.funs = ~round(.x,2)) %>%
  #   DT::datatable()
  knitr::kable() %>%
  kableExtra::kable_styling(bootstrap_options = "striped", full_width = F)

II.5. Output figure

outcome_01_adj_tbl %>%
  filter(str_detect(numerator,"outcome")) %>% 
  ggplot_prevalence(
    denominator_level = denominator_level,
    numerator = numerator,
    proportion = prop,
    proportion_upp = prop_upp,
    proportion_low = prop_low) +
  # theme(axis.text.x = element_text(angle = 0, vjust = 0, hjust=0)) +
  scale_y_continuous(
    labels = scales::percent_format(accuracy = 1),
    breaks = scales::pretty_breaks(n = 5)) +
  # coord_flip() +
  facet_wrap(denominator~.,scales = "free") +
  # facet_grid(denominator~.,scales = "free_y") +
  colorspace::scale_color_discrete_qualitative() +
  labs(title = "Prevalence of numerators across denominators",
       y = "Prevalence",x = "")

Reference



avallecam/serosurvey documentation built on Feb. 12, 2023, 4:13 p.m.