# Packages ----------------------------------------------------------------
library(tidyverse)
library(synthpop)
# clean %>%
# filter(banner_id == "H") %>%
# mutate(att_id = str_remove(att_id, "q41_")) %>%
# mutate(value = as.numeric(value >= 10)) %>%
# pivot_wider(
# names_from = att_id,
# values_from = value
# ) %>%
# select(a:o) %>%
# slice_sample(n = nrow(.), replace = TRUE)
# Read in data ------------------------------------------------------------
food.samp <- read_rds("data-make/some-data.rds")
# Check relationships -----------------------------------------------------
cor(food.samp) %>% round(2)
# Flip some relationships -------------------------------------------------
food.samp2 <- mutate(
food.samp,
across(
.cols = c(a, d, g, m),
.fns = ~10 - .x + 1
)
)
# Make binary -------------------------------------------------------------
food.samp3 <- mutate(
food.samp2,
across(
.cols = everything(),
.fns = ~as.numeric(.x >= 9)
)
)
# Synthetic ---------------------------------------------------------------
N <- 500
food.synth <- syn(
data = food.samp3,
method = "cart",
m = 1,
k = N,
minnumlevels = 2,
seed = 101
)
# Complete the data -------------------------------------------------------
set.seed(102)
FoodSample <-
food.synth %>%
pluck("syn") %>%
as_tibble() %>%
mutate(
across(
everything(),
function(x) x %>% as.character() %>% as.numeric()
),
weight = rnorm(n(), 1, 0.15),
weightp = weight / sum(weight),
weight = weightp * n()
) %>%
rowid_to_column("id") %>%
select(id, a:o, weight)
# Inject missing data randomly --------------------------------------------
random_na <- function(x, n = 5) {
idx <- sample(1:length(x), size = n, replace = FALSE)
x[idx] <- NA
x
}
set.seed(103)
FoodSample <-
FoodSample %>%
pivot_longer(
cols = a:o,
names_to = "var",
values_to = "val"
) %>%
group_by(id) %>%
mutate(val = random_na(val, n = 5)) %>%
ungroup() %>%
pivot_wider(
names_from = var,
values_from = val
)
FoodSample %>%
select(a:o) %>%
cor(use = "pairwise") %>%
round(2)
set.seed(109)
FoodSample <-
FoodSample %>%
rename(
NYStrip = a,
Salad = b,
Ribeye = c,
Salmon = d,
PorkChop = e,
Walleye = f,
Chicken = g,
LambChop = h,
VegSoup = i,
Turkey = j,
Bisque = k,
Chili = l,
Duck = m,
Ravioli = n,
Tofu = o
) %>%
select(all_of(sample(names(.), size = 17, replace = FALSE))) %>%
select(id, everything(), -weight, weight)
usethis::use_data(FoodSample, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.