Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
#packages
library(optimall)
library(DiagrammeR)
library(dplyr)
#data
data(MatWgt_Sim, package = "optimall")
MatWgt_Sim <- MatWgt_Sim %>%
dplyr::mutate(race =
dplyr::case_when(race == "Asian" ~ "Other",
race == "Other" ~ "Other",
race == "White" ~ "White",
race == "Black" ~ "Black"))
phase1 <- dplyr::select(MatWgt_Sim, -mat_weight_true)
phase1$strata <- phase1$race #initialize a strata column first
set.seed(452)
phase1 <- split_strata(data = phase1, strata = "strata", split = NULL,
split_var = "mat_weight_est",
type = "global quantile",
split_at = c(0.25,0.75),
trunc = "MWC_est")
#Trunc argument specifies how to refer to mat_weight_est in new strata names
#Make Phase 1 data dict
phase1_data_dictionary <- data.frame("Variable" = c("id",
"race",
"mat_weight_est",
"diabetes"),
"Description" = c("unique identifier",
"race of mother",
"error-prone estimate of maternal weight change during pregnancy",
"1/0 indicator for diabetes in the mother during pregnancy"))
## -----------------------------------------------------------------------------
MySurvey <- multiwave(phases = 2, waves = c(1,3))
## -----------------------------------------------------------------------------
#To access overall metadata
MySurvey@metadata
#To write overall metadata. We may want to include a title.
MySurvey@metadata <- list(title = "Maternal Weight Survey")
#To access Phase 2 metadata
MySurvey@phases$phase2@metadata
#To access Phase 2, Wave 2 design
MySurvey@phases$phase2@waves$wave2@design
## -----------------------------------------------------------------------------
#To access overall metadata
get_mw(MySurvey, phase = NA, slot = "metadata")
#To access Phase 2 metadata
get_mw(MySurvey, phase = 2, slot = "metadata")
#To access Phase 2, Wave 2 design
get_mw(MySurvey, phase = 2, wave = 2, slot = "design")
## -----------------------------------------------------------------------------
set_mw(MySurvey, phase = NA, slot = "metadata") <- list(title = "Maternal Weight Survey")
## -----------------------------------------------------------------------------
head(phase1)
set_mw(MySurvey, phase = 1, slot = "data") <- phase1
#Make Phase 1 data dict
phase1_data_dictionary <- data.frame(
"Variable" = c( "id", "race", "mat_weight_est", "diabetes", "obesity"),
"Description" = c("unique identifier",
"race of mother",
"error-prone estimate of maternal weight change
during pregnancy",
"1/0 indicator for diabetes in the mother during
pregnancy",
"1/0 indicator for childhood obesity in child"))
head(phase1_data_dictionary)
set_mw(MySurvey, phase = 1, slot = "metadata") <- list(data_dict = phase1_data_dictionary)
## ----eval = FALSE-------------------------------------------------------------
# multiwave_diagram(MySurvey)
## ----fig.align='center', fig.height = 5.5, fig.width = 5.5, echo= FALSE, eval = FALSE----
# multiwave_diagram(MySurvey)
## -----------------------------------------------------------------------------
# Initialize Multiwave
IrisSurvey <- multiwave(phases = 2, waves = c(1,3))
# Add id column to iris dataset
iris <- cbind(datasets::iris, id = 1:150)
# To place iris data in Phase 1
set_mw(IrisSurvey, phase = 1, slot = "data") <-
subset(iris, select = -Sepal.Width)
## -----------------------------------------------------------------------------
IrisSurvey <- apply_multiwave(IrisSurvey, phase = 2, wave = 1,
fun = "optimum_allocation",
strata = "Species", y = "Sepal.Length",
nsample = 30, method = "WrightII")
## -----------------------------------------------------------------------------
set_mw(IrisSurvey, phase = 2, slot = "metadata") <-
list(strata = "Species")
## -----------------------------------------------------------------------------
IrisSurvey <- apply_multiwave(IrisSurvey, phase = 2, wave = 1,
fun = "optimum_allocation",
y = "Sepal.Length",
nsample = 30, method = "WrightII")
## -----------------------------------------------------------------------------
get_mw(IrisSurvey, phase = 2, wave = 1, slot = "design")
## -----------------------------------------------------------------------------
IrisSurvey <- apply_multiwave(IrisSurvey, phase = 2, wave = 1,
fun = "sample_strata", id = "id",
design_strata = "strata",
n_allocated = "stratum_size",
probs = "stratum_fraction")
## -----------------------------------------------------------------------------
get_mw(IrisSurvey, phase = 2, wave = 1, slot = "samples")
## -----------------------------------------------------------------------------
set_mw(IrisSurvey, phase = 2, wave = 1, slot = "sampled_data") <-
iris[iris$id %in% get_mw(IrisSurvey,
phase = 2,
wave = 1,
slot = "samples")$ids,
c("id", "Sepal.Width")]
## -----------------------------------------------------------------------------
IrisSurvey <- merge_samples(IrisSurvey, phase = 2, wave = 1,
id = "id", include_probs = TRUE)
## -----------------------------------------------------------------------------
head(get_mw(IrisSurvey, phase = 2, wave = 1, slot = "data"))
## -----------------------------------------------------------------------------
# Metadata for Phase 2 including description,
# and column names to be used in function calls.
# Note that each element name corresponds to at least one argument of a
# function that will be called later on in the multi-wave workflow.
set_mw(MySurvey, phase = 2, slot = "metadata") <-
list(description = "Phase 2 of Maternal Weight Survey in which we
seek to validate 750 samples across three waves.",
strata = "new_strata", # strata column in data (used in multiple funcs)
id = "id", # name of id column (used in sample_strata and merge_samples)
y = "mat_weight_true", # col for which to minimize variance
# (used in optimum_allocation)
design_strata = "strata", # strata column in designs (used for sample_strata)
n_allocated = "n_to_sample" # n allocated to strata in designs
# (used for sample_strata)
)
# Then, metadata for Wave 1 of Phase 2
set_mw(MySurvey, phase = 2, wave = 1, slot = "metadata") <-
list(description = "First wave of 250
sampled using proportional sampling")
## -----------------------------------------------------------------------------
#Design for Wave 1
MySurvey <- apply_multiwave(MySurvey, phase = 2, wave = 1,
fun = "optimum_allocation",
strata = "new_strata",
y = "mat_weight_est",
nsample = 250, method = "Neyman")
get_mw(MySurvey, phase = 2, wave = 1, slot = "design")
## -----------------------------------------------------------------------------
# Get list of ids to sample using stratified random sampling
# without replacement
set.seed(456)
MySurvey <- apply_multiwave(MySurvey, phase = 2, wave = 1,
fun = "sample_strata",
strata = "new_strata",
id = "id",
wave2a = NULL, #No one has been sampled yet
design_strata = "strata", #from design
n_allocated = "stratum_size"
)
# check that it worked
head(get_mw(MySurvey, phase = 2, wave = 1, slot = "samples")$ids)
length(get_mw(MySurvey, phase = 2, wave = 1, slot = "samples")$ids)
# But, notice that we had already specified most of the arguments to
# sample_strata in the phase metadata. So, we can get the same result
# with a much shorter call to the function
set.seed(456)
MySurvey <- apply_multiwave(MySurvey, phase = 2, wave = 1,
fun = "sample_strata",
n_allocated = "stratum_size")
ids_wave1 <- get_mw(MySurvey, phase = 2, wave = 1, slot = "samples")$ids
#Check that it gives same results
head(ids_wave1)
length(ids_wave1)
## -----------------------------------------------------------------------------
# We can use these ids to get the data:
set_mw(MySurvey, phase = 2, wave = 1, slot = "sampled_data") <-
MatWgt_Sim[MatWgt_Sim$id %in% ids_wave1, c("id", "mat_weight_true")]
## -----------------------------------------------------------------------------
MySurvey <- apply_multiwave(MySurvey, phase = 2, wave = 1, fun = "merge_samples")
## ----include = F--------------------------------------------------------------
# Old, before added sampled_ind you had to specify yourself
set_mw(MySurvey, phase = 2, wave = 1) <-
get_mw(MySurvey, phase = 2, wave = 1) %>%
dplyr::mutate(already_sampled_ind =
ifelse(id %in%
get_mw(MySurvey,
phase = 2,
wave = 1,
slot = "samples"), 1, 0))
## ----eval = FALSE-------------------------------------------------------------
# multiwave_diagram(MySurvey)
## ----fig.align='center', fig.height = 5.5, fig.width = 5.5, echo= FALSE, eval = FALSE----
# multiwave_diagram(MySurvey)
## -----------------------------------------------------------------------------
MySurvey <- apply_multiwave(MySurvey,
phase = 2,
wave = 2,
fun = "allocate_wave",
nsample = 250,
already_sampled = "phase_sample_ind2")
get_mw(MySurvey, phase = 2, wave = 2, slot = "design")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.