#'@title Simulate univariate -omics data
#'@description TBD
#'@param df.single a data frame of univariate feature values (e.g. relative abundances)
#' and stratification variable(s)
#'@param dag one of 'null', 'confounder', or 'biomarker' to determine simulation strata;
#' null shuffles abundances leaving y.var and z.var relationship intact,
#' confounder simulates abundances and shuffles y.var values within z.var strata,
#' biomarker simulates abundances and shuffles z.var values within y.var strata
#'@param type one of 'bootstrap', 'gaussian', or 'nbinom', where gaussian is a pseudo-gaussian
#' that replaces all negative simulated counts with 0 as a realistic assumption
#'@param y.var a disease status or other primary stratification variable name
#'@param z.var a covariate or other secondary stratification variable name
#'@param n the number of simulated datasets to produce OR NA if replication carried
#' out elsewhere (e.g. within the batchtools framework)
#'@param sample.size the total desired sample size of the simulated datasets; defaults to n
#'@return a tibble with nested list-column of n simulated datasets (1 if n = NA)
#'@importFrom dplyr sample_n n rename select group_by ungroup mutate mutate_if mutate_at
#'@importFrom purrr map
#'@importFrom tibble as_tibble add_column
#'@importFrom tidyr expand_grid nest unnest
#'@importFrom rlang !! !!! sym syms
#'@export
simulate.single.factor.data <- function(df.single, dag = 'null', type = 'bootstrap',
y.var = NA, z.var = NA, n = NA,
sample.size = n) {
# what is sample.size > dim(df.single) ?
# how to 'bias' a bit to maintain enough cases/controls in case of small df.single size ?
# creates n splits
# not for batchtools framework
if (!is.na(n)) {
switch(dag,
'null' = {
switch(type,
'bootstrap' = {
splits <- df.single %>%
tidyr::expand_grid(id = seq(n)) %>%
dplyr::group_by(id) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map(data, function(df) {
df %>%
dplyr::mutate_at(vars(abundance), ~ sample(., replace = FALSE)) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)})) %>%
dplyr::ungroup() },
'nbinom' = {
splits <- df.single %>%
tidyr::expand_grid(id = seq(n)) %>%
dplyr::group_by(id) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map(data, function(df) {
orig.data <- use_series(df, abundance)
if (all(orig.data == 0)) sim.data <- rep(0, length(orig.data))
else {
mu <- mean(orig.data)
theta <- (mu+mu^2)/var(orig.data) # source?
sim.data <- rnbinom(length(orig.data), mu = mu, size = theta)}
df %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = sim.data, .before = 1) %>%
dplyr::sample_n(size = sample.size, replace = FALSE) })) },
'gaussian' = {
splits <- df.single %>%
tidyr::expand_grid(id = seq(n)) %>%
dplyr::group_by(id) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map(data, function(df) {
df %>%
dplyr::mutate(simulated = ansimo::replace.negative.nums(rnorm(nrow(.),
mean(.$abundance),
sd(.$abundance)))) %>%
dplyr::select(-abundance) %>%
dplyr::rename(abundance = 'simulated') %>%
dplyr::select(abundance, everything()) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)})) %>%
dplyr::ungroup() })},
'confounder' = {
switch(type,
'bootstrap' = {
props <- df.single %>%
dplyr::select(dplyr::one_of('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::group_by(!!! rlang::syms(c(y.var, z.var))) %>%
dplyr::summarize(n.obs = n()) %>%
ungroup()
splits <- df.single %>%
dplyr::select(dplyr::one_of('abundance', all_of(y.var), all_of(z.var))) %>%
# create splits
tidyr::expand_grid(id = seq(n)) %>%
# group by z and id (MET + split num)
dplyr::group_by(id, !!! rlang::syms(z.var)) %>%
# resample w replacement, shuffles proportions
dplyr::sample_n(dplyr::n(), replace = TRUE) %>%
dplyr::group_by(id) %>%
tidyr::nest() %>%
# map over each split to correct proportions
dplyr::mutate(data = purrr::map(data, function(df.split) {
# browser()
df.split %>%
dplyr::group_by(!! rlang::sym(z.var)) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map2(!!sym(z.var), data,
function(z.val, y.strata) {
# browser()
z.props <- props %>%
dplyr::filter(!! rlang::sym(z.var) == z.val) %>%
dplyr::select(-!!rlang::sym(z.var))
labels <- z.props %>%
get(y.var, .) %>%
purrr::map2(z.props$n.obs, ~ rep_len(.x, .y)) %>%
purrr::flatten_dbl()
y.strata %>%
dplyr::select(-!!rlang::sym(y.var)) %>%
tibble::add_column(!!rlang::sym(y.var) := labels) %>%
mutate_at(vars(!!rlang::sym(y.var)), ~ sample(.)) })) %>%
dplyr::ungroup() %>%
tidyr::unnest(data) %>%
dplyr::select('abundance', y.var, z.var) }))
# old scheme - shuffle y.var labels ----
# dplyr::mutate(data = purrr::map(data, function(df.shuffle) {
# #browser()
# df.shuffle %>%
# dplyr::group_by(!!! rlang::syms(z.var)) %>%
# dplyr::mutate_at(vars(!!! rlang::syms(y.var)), ~ sample(.)) %>%
# dplyr::ungroup() %>%
# dplyr::sample_n(size = sample.size, replace = FALSE)}))
},
'nbinom' = {
props <- df.single %>%
dplyr::select(dplyr::one_of('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::group_by(!!! rlang::syms(c(y.var, z.var))) %>%
dplyr::summarize(n.obs = n()) %>%
ungroup()
splits <- df.single %>%
tidyr::expand_grid(id = seq(n)) %>%
dplyr::group_by(id) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map(data, function(df) {
df %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::mutate(mu = purrr::map_dbl(data, ~ mean(.$abundance))) %>%
dplyr::mutate(var = purrr::map_dbl(data, ~ var(.$abundance))) %>%
dplyr::mutate(theta = purrr::map_dbl(data, ~ ((mu+mu^2)/var))) %>%
dplyr::mutate(data = purrr::pmap(list(data, mu, var, theta),
function(data, mu, var, theta) {
# browser()
if (mu == 0) {
data %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = rep(0, nrow(.)),
.before = 1) %>%
mutate_at(vars(!!! rlang::syms(y.var)), ~ sample(.))}
else {
data %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = rnbinom(nrow(.),
mu = mu,
size = theta),
.before = 1) %>%
mutate_at(vars(!!! rlang::syms(y.var)), ~ sample(.))}})) %>%
dplyr::select(-c('mu','var','theta')) %>%
tidyr::unnest(data) %>%
dplyr::ungroup() %>%
dplyr::select(c('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)}))
# old scheme -----------------
# splits <- df.single %>%
# tidyr::expand_grid(id = seq(n)) %>%
# dplyr::group_by(id) %>%
# tidyr::nest() %>% # map over each split
# dplyr::mutate(data = purrr::map(data, function(df) {
# y.strata <- df %>%
# dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
# dplyr::group_by(!!! rlang::syms(y.var)) %>%
# tidyr::nest() %>%
# dplyr::ungroup()
# df %>%
# dplyr::group_by(!!! rlang::syms(z.var)) %>%
# tidyr::nest() %>%
# dplyr::ungroup() %>%
# dplyr::mutate(data = purrr::map2(data, y.strata$data,
# function(z.strat, y.strat) {
# mu <- y.strat %>%
# magrittr::use_series(abundance) %>%
# mean()
# var <- y.strat %>%
# magrittr::use_series(abundance) %>%
# var()
# theta <- ((mu+mu^2)/var)
# if (mu == 0) {
# z.strat %>%
# dplyr::select(-abundance) %>%
# tibble::add_column(abundance = rep(0, nrow(.)),
# .before = 1)}
# else {
# z.strat %>%
# dplyr::select(-abundance) %>%
# tibble::add_column(abundance = rnbinom(nrow(.),
# mu = mu,
# size = theta),
# .before = 1)} })) %>%
# tidyr::unnest(data) %>%
# dplyr::select(c('abundance', y.var, z.var)) %>%
# dplyr::sample_n(size = sample.size, replace = FALSE)}))
},
'gaussian' = {
splits <- df.single %>%
tidyr::expand_grid(id = seq(n)) %>%
dplyr::group_by(id) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map(data, function(df) {
df %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map(data, function(z.strat) {
z.strat %>% # 2 column tbl
dplyr::mutate(simulated = ansimo::replace.negative.nums(rnorm(nrow(.),
mean(.$abundance),
sd(.$abundance)))) %>%
dplyr::select(-abundance) %>%
dplyr::rename(abundance = 'simulated') %>%
dplyr::select(c('abundance', y.var)) %>%
dplyr::ungroup()})) %>%
dplyr::ungroup() %>%
tidyr::unnest(data) %>%
dplyr::select(c('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)}))
# splits <- df.single %>%
# tidyr::expand_grid(id = seq(n)) %>%
# dplyr::group_by(id) %>%
# tidyr::nest() %>%
# dplyr::ungroup() %>% # map over each split
# dplyr::mutate(data = purrr::map(data, function(df) {
# y.strata <- df %>%
# dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
# dplyr::group_by(!!! rlang::syms(y.var)) %>%
# tidyr::nest() %>%
# dplyr::ungroup()
# df %>%
# dplyr::group_by(!!! rlang::syms(z.var)) %>%
# tidyr::nest() %>%
# dplyr::ungroup() %>%
# dplyr::mutate(data = purrr::map2(data, y.strata$data,
# function(z.strat, y.strat) {
# sub.mean <- y.strat %>%
# magrittr::use_series(abundance) %>%
# mean()
# sub.sd <- y.strat %>%
# magrittr::use_series(abundance) %>%
# sd()
# z.strat %>%
# dplyr::mutate(simulated = ansimo::replace.negative.nums(rnorm(nrow(.),
# sub.mean,
# sub.sd))) %>%
# dplyr::select(-abundance) %>%
# dplyr::rename(abundance = 'simulated') %>%
# dplyr::ungroup() })) %>%
# tidyr::unnest(data) %>%
# dplyr::select(c('abundance', y.var, z.var)) %>%
# dplyr::sample_n(size = sample.size, replace = FALSE)}))
} )},
'biomarker' = {
switch(type,
'bootstrap' = {
props <- df.single %>%
dplyr::select(dplyr::one_of('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::group_by(!!! rlang::syms(c(y.var, z.var))) %>%
dplyr::summarize(n.obs = n()) %>%
ungroup()
splits <- df.single %>%
dplyr::select(dplyr::one_of('abundance', all_of(y.var), all_of(z.var))) %>%
tidyr::expand_grid(id = seq(n)) %>%
# group by DIABSTATUS and id
dplyr::group_by(id, !!! rlang::syms(y.var)) %>%
# simulate cases and controls separately for each split
dplyr::sample_n(dplyr::n(), replace = TRUE) %>%
dplyr::group_by(id) %>%
tidyr::nest() %>%
# map over each split
dplyr::mutate(data = purrr::map(data, function(df.split) {
z.strata <- df.split %>%
dplyr::select(dplyr::one_of('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::ungroup()
df.split %>%
dplyr::group_by(!!! rlang::syms(y.var)) %>%
tidyr::nest() %>%
dplyr::ungroup() %>%
dplyr::mutate(data = purrr::map2(data, z.strata$data,
function(y.strat, z.strat) {
# y.strat : all y == y level 1 (eg MET = 0)
#browser()
new.ab <- z.strat %>%
dplyr::sample_n(nrow(y.strat), replace = TRUE) %>%
magrittr::use_series(abundance)
y.strat %>%
dplyr::mutate(abundance = new.ab)})) %>%
# fix proportions
dplyr::mutate(data = purrr::map2(!!rlang::sym(y.var), data,
function(y.val, z.strata) {
# browser()
y.props <- props %>%
dplyr::filter(!! rlang::sym(y.var) == y.val) %>%
dplyr::select(-!!rlang::sym(y.var))
labels <- y.props %>%
get(z.var, .) %>%
purrr::map2(y.props$n.obs, ~ rep_len(.x, .y)) %>%
purrr::flatten_dbl()
z.strata %>%
dplyr::select(-!!rlang::sym(z.var)) %>%
tibble::add_column(!!rlang::sym(z.var) := labels) %>%
mutate_at(vars(!!rlang::sym(z.var)), ~ sample(.))
})) %>%
tidyr::unnest(data) %>%
dplyr::select('abundance', all_of(y.var), all_of(z.var)) %>%
dplyr::sample_n(size = sample.size, replace = FALSE) })) },
'nbinom' = {
splits <- df.single %>%
tidyr::expand_grid(id = seq(n)) %>%
dplyr::group_by(id) %>%
tidyr::nest() %>% # map over each split
dplyr::mutate(data = purrr::map(data, function(df) {
z.strata <- df %>%
dplyr::select(dplyr::one_of('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::ungroup()
df %>%
dplyr::group_by(!!! rlang::syms(y.var)) %>%
tidyr::nest() %>%
dplyr::ungroup() %>%
dplyr::mutate(data = purrr::map2(data, z.strata$data,
function(y.strat, z.strat) {
mu <- z.strat %>%
magrittr::use_series(abundance) %>%
mean()
var <- z.strat %>%
magrittr::use_series(abundance) %>%
var()
theta <- ((mu+mu^2)/var)
if (mu == 0) {
y.strat %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = rep(0, nrow(.)),
.before = 1)}
else {
y.strat %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = rnbinom(nrow(y.strat),
mu = mu,
size = theta),
.before = 1)} })) %>%
tidyr::unnest(data) %>%
dplyr::select(c('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)})) },
'gaussian' = {
splits <- df.single %>%
tidyr::expand_grid(id = seq(n)) %>%
dplyr::group_by(id) %>%
tidyr::nest() %>%
dplyr::ungroup() %>% # map over each split
dplyr::mutate(data = purrr::map(data, function(df) {
z.strata <- df %>%
dplyr::select(dplyr::one_of('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::ungroup()
df %>%
dplyr::group_by(!!! rlang::syms(y.var)) %>%
tidyr::nest() %>%
dplyr::ungroup() %>%
dplyr::mutate(data = purrr::map2(data, z.strata$data,
function(y.strat, z.strat) {
sub.mean <- z.strat %>%
magrittr::use_series(abundance) %>%
mean()
sub.sd <- z.strat %>%
magrittr::use_series(abundance) %>%
sd()
y.strat %>%
dplyr::mutate(simulated = ansimo::replace.negative.nums(rnorm(nrow(.),
sub.mean,
sub.sd))) %>%
dplyr::select(-abundance) %>%
dplyr::rename(abundance = 'simulated') %>%
dplyr::select(c('abundance', z.var)) %>%
dplyr::ungroup() })) %>%
tidyr::unnest(data) %>%
dplyr::select(c('abundance', all_of(y.var), all_of(z.var))) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)}))
}) }) }
# creates a single split
# for batchtools framework
else {
switch(dag,
'null' = {
switch(type,
'bootstrap' = {
df.single %>%
dplyr::mutate_at(vars(abundance), ~ sample(., replace = FALSE)) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)},
'nbinom' = {
orig.data <- use_series(df.single, abundance)
if (all(orig.data == 0)) sim.data <- rep(0, length(orig.data))
else {
mu <- mean(orig.data)
theta <- (mu+mu^2)/var(orig.data) # source?
sim.data <- rnbinom(length(orig.data), mu = mu, size = theta)}
df.single %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = sim.data, .before = 1) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)},
'gaussian' = {
df.single %>%
dplyr::mutate(simulated = ansimo::replace.negative.nums(rnorm(nrow(.),
mean(.$abundance),
sd(.$abundance)))) %>%
dplyr::select(-abundance) %>%
dplyr::rename(abundance = 'simulated') %>%
dplyr::select(abundance, everything()) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)} )},
'confounder' = {
switch(type,
'bootstrap' = {
df.single %>%
dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
dplyr::sample_n(dplyr::n(), replace = TRUE) %>%
dplyr::mutate_at(vars(!!! rlang::syms(y.var)), ~ sample(.)) %>%
dplyr::ungroup() %>%
dplyr::sample_n(size = sample.size, replace = FALSE)},
'nbinom' = {
df.single %>%
dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::mutate(mu = purrr::map_dbl(data, ~ mean(.$abundance))) %>%
dplyr::mutate(var = purrr::map_dbl(data, ~ var(.$abundance))) %>%
dplyr::mutate(theta = purrr::map_dbl(data, ~ ((mu+mu^2)/var))) %>%
dplyr::mutate(data = purrr::pmap(list(data, mu, var, theta),
function(data, mu, var, theta) {
if (mu == 0) {
data %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = rep(0, nrow(.)),
.before = 1) %>%
mutate_at(vars(!!! rlang::syms(y.var)), ~ sample(.))}
else {
data %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = rnbinom(nrow(.),
mu = mu,
size = theta),
.before = 1) %>%
mutate_at(vars(!!! rlang::syms(y.var)), ~ sample(.))}})) %>%
dplyr::select(-c('mu','var','theta')) %>%
tidyr::unnest(data) %>%
dplyr::ungroup() %>%
dplyr::select(c('abundance', y.var, z.var)) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)},
'gaussian' = {
df.single %>%
dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map(data, function(z.strat) {
z.strat %>%
dplyr::mutate(simulated = ansimo::replace.negative.nums(rnorm(nrow(.),
mean(.$abundance),
sd(.$abundance)))) %>%
dplyr::select(-abundance) %>%
dplyr::rename(abundance = 'simulated') %>%
dplyr::select(c('abundance', y.var)) %>%
# might not need to do? generating might be random enough
# dplyr::mutate_at(vars(!!! rlang::syms(y.var)), ~ sample(.)) %>%
dplyr::ungroup()})) %>%
dplyr::ungroup() %>%
tidyr::unnest(data) %>%
dplyr::sample_n(size = sample.size, replace = FALSE) } )},
'biomarker' = {
switch(type,
'bootstrap' = {
z.strata <- df.single %>%
dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::ungroup()
df.single %>%
dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
dplyr::group_by(!!! rlang::syms(y.var)) %>%
tidyr::nest() %>%
dplyr::ungroup() %>%
dplyr::mutate(data = purrr::map2(data, z.strata$data,
function(y.strat, z.strat) {
# browser()
new.ab <- z.strat %>%
dplyr::sample_n(nrow(y.strat), replace = TRUE) %>%
magrittr::use_series(abundance)
y.strat %>%
dplyr::mutate(abundance = new.ab)})) %>%
tidyr::unnest(data) %>%
dplyr::select(c('abundance', y.var, z.var)) %>%
# pull randomly without sampling weights, assume roughly same represenation
dplyr::sample_n(size = sample.size, replace = FALSE) },
'nbinom' = {
z.strata <- df.single %>%
dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::ungroup()
df.single %>%
dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
dplyr::group_by(!!! rlang::syms(y.var)) %>%
tidyr::nest() %>%
dplyr::ungroup() %>%
dplyr::mutate(data = purrr::map2(data, z.strata$data,
function(y.strat, z.strat) {
# browser()
mu <- mean(z.strat$abundance)
var <- var(z.strat$abundance)
theta <- ((mu+mu^2)/var)
if (mu == 0) {
y.strat %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = rep(0, nrow(.)),
.before = 1) %>%
dplyr::mutate_at(vars(!!! rlang::syms(z.var)), ~ sample(.))}
else {
y.strat %>%
dplyr::select(-abundance) %>%
tibble::add_column(abundance = rnbinom(nrow(.),
mu = mu,
size = theta),
.before = 1) %>%
dplyr::mutate_at(vars(!!! rlang::syms(z.var)), ~ sample(.))}})) %>%
tidyr::unnest(data) %>%
dplyr::ungroup() %>%
dplyr::select(c('abundance', y.var, z.var)) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)},
'gaussian' = {
z.strata <- df.single %>%
dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
dplyr::group_by(!!! rlang::syms(z.var)) %>%
tidyr::nest() %>%
dplyr::ungroup()
df.single %>%
dplyr::select(dplyr::one_of('abundance', y.var, z.var)) %>%
dplyr::group_by(!!! rlang::syms(y.var)) %>%
tidyr::nest() %>%
dplyr::ungroup() %>%
dplyr::mutate(data = purrr::map2(data, z.strata$data,
function(y.strat, z.strat) {
sub.mean <- z.strat %>%
magrittr::use_series(abundance) %>%
mean()
sub.sd <- z.strat %>%
magrittr::use_series(abundance) %>%
sd()
y.strat %>%
dplyr::mutate(simulated = ansimo::replace.negative.nums(rnorm(nrow(.),
sub.mean,
sub.sd))) %>%
dplyr::select(-abundance) %>%
dplyr::rename(abundance = 'simulated') %>%
dplyr::select(c('abundance', z.var)) %>%
dplyr::ungroup() })) %>%
tidyr::unnest(data) %>%
dplyr::ungroup() %>%
dplyr::select(c('abundance', y.var, z.var)) %>%
dplyr::sample_n(size = sample.size, replace = FALSE)} )} )}
}
########################
### HELPER FUNCTIONS ###
########################
proportional.labels <- function(df.single, props, y.var, z.var) {
# df.single = single univariate abundance split sim (eg 1982 x 3)
browser()
# check if y-z proportions already ok
cond <- df.single %>%
dplyr::group_by(!!!rlang::syms(c(y.var, z.var))) %>%
dplyr::summarize(n.obs = n()) %>%
dplyr::ungroup() %>%
magrittr::equals(props)
if (all(cond)) return(df.single)
else {
df.single %>%
dplyr::group_by(!! rlang::sym(y.var)) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map2(data, get(y.var, .), function(df.y, y.val) {
# df.y df with a single value of y.var
browser()
z.props <- props %>%
dplyr::filter(!!sym(y.var) == y.val) %>%
dplyr::select(-!!rlang::sym(y.var))
labels <- z.props %>%
get(z.var, .) %>%
purrr::map2(z.props$n.obs, ~ rep_len(.x, .y)) %>%
purrr::flatten_dbl()
df.y %>%
dplyr::select(-!!rlang::sym(y.var)) %>%
tibble::add_column(!!rlang::sym(y.var) := labels) %>%
mutate_at(vars(!!rlang::sym(y.var)), ~ sample(.))
}))
}
}
#'@title Replace negative abundance values
#'@description helper function that sets all negative values from rnorm simulation to 0
#'@param x a vector of simulated abundances
#'@return vector of non-zero simulated abundances
replace.negative.nums <- function(x) {
x[x < 0] <- 0
return(x)
}
check.label.proportions <- function(splits, s.var) {
splits %>%
dplyr::mutate(p = map(data, function(x) {
x %>%
dplyr::group_by(!!! rlang::syms(s.var)) %>%
dplyr::count()}))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.