knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo = TRUE,
  message = FALSE,
  warning = FALSE
)
knitr::opts_chunk$set(fig.width=10, fig.height=4)
options(tidyverse.quiet = TRUE)

Introduction

Here we present three examples, definitions and related references:

library(serosurvey)
# additional
library(tidyverse)
library(srvyr)
library(survey)
library(tictoc)
library(furrr)
library(purrr)
# theme
theme_set(theme_bw())
data(api)

datasurvey <- apiclus2 %>% 
  mutate(survey_all="survey_all") %>% 
  # create variables
  mutate(outcome_one = awards,
         outcome_two = cut(pct.resp,breaks = 2),
         covariate_01 = stype,
         covariate_02 = both)
# tratamiento de stratos con un solo conglomerado
options(survey.lonely.psu = "certainty")

# uu_clean_data %>% count(CONGLOMERADO,VIVIENDA)

# diseƱo muestral de la encuesta ---------------------------------

design <- datasurvey %>% 

  filter(!is.na(outcome_one)) %>% #CRITICAL! ON OUTCOME
  filter(!is.na(pw)) %>% #NO DEBEN DE HABER CONGLOMERADOS SIN WEIGHT

  as_survey_design(
    id=c(dnum, snum), #~dnum+snum, # primary secondary sampling unit
    # strata = strata, #clusters need to be nested in the strata
    weights = pw # factores de expancion
  )
# denominadores
covariate_set01 <- datasurvey %>% 
  select(covariate_01,
         #sch.wide,
         #comp.imp,
         covariate_02) %>% 
  colnames()

# numerators within outcome
covariate_set02 <- datasurvey %>% 
  select(#stype,
         #sch.wide,
         #comp.imp,
         covariate_02) %>% 
  colnames()

1. survey: Estimate single prevalences

serosvy_proportion(design = design,
                   denominator = covariate_01,
                   numerator = outcome_one)
example("serosvy_proportion")

2. survey: Estimate multiple prevalences

# crear matriz
  #
  # set 01 of denominator-numerator
  #
expand_grid(
  design=list(design),
  denominator=c("covariate_01","covariate_02"), # covariates
  numerator=c("outcome_one","outcome_two") # outcomes
  ) %>% 
  #
  # set 02 of denominator-numerator (e.g. within main outcome)
  #
  union_all(
    expand_grid(
      design=list(design),
      denominator=c("outcome_one","outcome_two"), # outcomes
      numerator=c("covariate_02") # covariates
    )
  ) %>% 
  #
  # create symbols (to be readed as arguments)
  #
  mutate(
    denominator=map(denominator,dplyr::sym),
    numerator=map(numerator,dplyr::sym)
  ) %>% 
  #
  # estimate prevalence
  #
  mutate(output=pmap(.l = select(.,design,denominator,numerator),
                     .f = serosvy_proportion)) %>% 
  #
  # show the outcome
  #
  select(-design,-denominator,-numerator) %>% 
  unnest(cols = c(output)) %>% 
  print(n=Inf)

3. serology: Estimate prevalence Under misclassification

Known test performance - Bayesian method

serosvy_known_sample_posterior(
  #in population
  positive_number_test = 321,
  total_number_test = 321+1234,
  # known performance
  sensitivity = 0.93,
  specificity = 0.975
)
tidy_result <- serosvy_known_sample_posterior(
  #in population
  positive_number_test = 321,
  total_number_test = 321+1234,
  # known performance
  sensitivity = 0.93,
  specificity = 0.975
)

tidy_result_out <-
  tidy_result %>%
  select(summary) %>%
  unnest(cols = c(summary))

tidy_result %>%
 select(posterior) %>%
 unnest(cols = c(posterior)) %>%
 ggplot(aes(x = r1)) +
 geom_histogram(aes(y=..density..),binwidth = 0.0005) +
 geom_density() +
 geom_vline(aes(xintercept=tidy_result_out %>%
                  pull(numeric.mean)),
            color="red",lwd=1) +
 geom_vline(aes(xintercept=tidy_result_out %>%
                  pull(numeric.p05)),
            color="red") +
 geom_vline(aes(xintercept=tidy_result_out %>%
                  pull(numeric.p95)),
            color="red") +
 scale_x_continuous(breaks = scales::pretty_breaks())
example("serosvy_known_sample_posterior")

Unknown test performance - Bayesian method

# result_unk <- sample_posterior_r_mcmc_testun(
#   samps = 10000,
#   #in population
#   pos = 692, #positive
#   n = 3212, #total
#   # in lab (local validation study)
#   tp = 670,tn = 640,fp = 202,fn = 74)
serosvy_unknown_sample_posterior_ii(
  #in population
  positive_number_test = 321,
  total_number_test = 321+1234,
  # in lab (local validation study)
  true_positive = 670,
  true_negative = 640,
  false_positive = 202,
  false_negative = 74)
result_unk <- serosvy_unknown_sample_posterior_ii(
  #in population
  positive_number_test = 321,
  total_number_test = 321+1234,
  # in lab (local validation study)
  true_positive = 670,
  true_negative = 640,
  false_positive = 202,
  false_negative = 74)

result_unk %>%
  select(posterior) %>% 
  unnest(posterior) %>%
  rownames_to_column() %>%
  pivot_longer(cols = -rowname,
               names_to = "estimates",
               values_to = "values") %>%
  ggplot(aes(x = values)) +
  geom_histogram(aes(y=..density..),binwidth = 0.0005) +
  geom_density() +
  facet_grid(~estimates,scales = "free_x")
example("serosvy_unknown_sample_posterior")

Contributing

Feel free to fill an issue or contribute with your functions or workflows in a pull request.

Here are a list of publications with interesting approaches using R:

References



avallecam/serosurvey documentation built on Feb. 12, 2023, 4:13 p.m.