data-make/food-sample-make.R

# Packages ----------------------------------------------------------------

library(tidyverse)
library(synthpop)

    # clean %>%
#     filter(banner_id == "H") %>%
#     mutate(att_id = str_remove(att_id, "q41_")) %>%
#     mutate(value = as.numeric(value >= 10)) %>%
#     pivot_wider(
#         names_from = att_id,
#         values_from = value
#     ) %>%
#     select(a:o) %>%
#     slice_sample(n = nrow(.), replace = TRUE)

# Read in data ------------------------------------------------------------

food.samp <- read_rds("data-make/some-data.rds")


# Check relationships -----------------------------------------------------

cor(food.samp) %>% round(2)


# Flip some relationships -------------------------------------------------

food.samp2 <- mutate(
    food.samp,
    across(
        .cols = c(a, d, g, m),
        .fns = ~10 - .x + 1
    )
)


# Make binary -------------------------------------------------------------

food.samp3 <- mutate(
    food.samp2,
    across(
        .cols = everything(),
        .fns = ~as.numeric(.x >= 9)
    )
)

# Synthetic ---------------------------------------------------------------

N <- 500

food.synth <- syn(
    data = food.samp3,
    method = "cart",
    m = 1,
    k = N,
    minnumlevels = 2,
    seed = 101
)


# Complete the data -------------------------------------------------------

set.seed(102)

FoodSample <-
    food.synth %>%
    pluck("syn") %>%
    as_tibble() %>%
    mutate(
        across(
            everything(),
            function(x) x %>% as.character() %>% as.numeric()
        ),
        weight = rnorm(n(), 1, 0.15),
        weightp = weight / sum(weight),
        weight = weightp * n()
    ) %>%
    rowid_to_column("id") %>%
    select(id, a:o, weight)



# Inject missing data randomly --------------------------------------------

random_na <- function(x, n = 5) {

    idx <- sample(1:length(x), size = n, replace = FALSE)
    x[idx] <- NA
    x
}


set.seed(103)

FoodSample <-
    FoodSample %>%
    pivot_longer(
        cols = a:o,
        names_to = "var",
        values_to = "val"
    ) %>%
    group_by(id) %>%
    mutate(val = random_na(val, n = 5)) %>%
    ungroup() %>%
    pivot_wider(
        names_from = var,
        values_from = val
    )

FoodSample %>%
    select(a:o) %>%
    cor(use = "pairwise") %>%
    round(2)

set.seed(109)

FoodSample <-
    FoodSample %>%
    rename(
        NYStrip = a,
        Salad = b,
        Ribeye = c,
        Salmon = d,
        PorkChop = e,
        Walleye = f,
        Chicken = g,
        LambChop = h,
        VegSoup = i,
        Turkey = j,
        Bisque = k,
        Chili = l,
        Duck = m,
        Ravioli = n,
        Tofu = o
    ) %>%
    select(all_of(sample(names(.), size = 17, replace = FALSE))) %>%
    select(id, everything(), -weight, weight)

usethis::use_data(FoodSample, overwrite = TRUE)
ttrodrigz/onezero documentation built on May 9, 2023, 2:59 p.m.