inst/doc/woodsimulatr_basics.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 5
)

## ----setup--------------------------------------------------------------------
library(WoodSimulatR)
library(magrittr)
library(ggplot2)
pander::panderOptions('knitr.auto.asis', FALSE);

## -----------------------------------------------------------------------------
summ_fun <- function(ds, grp = c('country', 'subsample', 'loadtype')) {
  grp <- intersect(grp, names(ds));
  v <- setdiff(names(ds), grp);
  
  r <- cor(ds[v]);

  ds <- tibble::add_column(ds, n = 1);
  v <- c('n', v);
  ds <- tidyr::gather(ds, 'property', 'value', !!! rlang::syms(v));
  ds <- dplyr::mutate(
    ds,
    property = factor(
      property,
      levels=v,
      labels=ifelse(v=='n', v, paste0(v, '_mean')),
      ordered = TRUE
    )
  );
  
  grp <- c(grp, 'property');
  ds <- dplyr::group_by(ds, !!! rlang::syms(grp));
  
  summ <- dplyr::summarise(
    ds,
    res = if (property[1] == 'n') sprintf('%.0f', sum(value)) else
      sprintf(
      if(property[1] %in% c('f_mean', 'ip_f_mean')) '%.1f (%.0f)' else '%.0f (%.0f)',
      mean(value), 100*sd(value)/mean(value)),
    .groups = 'drop_last'
  );
  pander::pander(
    tidyr::spread(summ, property, res),
    split.tables = Inf
  );
  
  pander::pander(r)
  
  invisible(summ);
}

compare_with_def <- function(ds, ssd, target = c('mean', 'cov')) {
  target <- match.arg(target);
  
  ds <- dplyr::group_by(ds, country);
  summ <- dplyr::summarise(
    ds,
    f_mean.ach = mean(f),
    f_cov.ach = sd(f) / f_mean.ach,
    E_mean.ach = mean(E),
    E_cov.ach = sd(E) / E_mean.ach,
    rho_mean.ach = mean(rho),
    rho_cov.ach = sd(rho) / rho_mean.ach,
    .groups = 'drop_last'
  );
  
  stopifnot(!anyDuplicated(ssd$country));
  summ <- dplyr::left_join(
    summ,
    dplyr::select(
      dplyr::mutate(ssd, f_cov = f_sd / f_mean, E_cov = E_sd / E_mean, rho_cov = rho_sd / rho_mean), 
      country, f_mean, f_cov, E_mean, E_cov, rho_mean, rho_cov
    ),
    by = 'country'
  );
  
  summ <- tidyr::pivot_longer(
    summ,
    -country,
    names_to = c('gdpname', '.value'),
    names_sep = '_'
  );
  summ <- dplyr::mutate(
    summ,
    gdpname = factor(gdpname, levels = c('f', 'E', 'rho'), ordered = TRUE)
  );

  if (target == 'mean') {
    ggplot(data = summ, aes(mean.ach, mean)) +
      geom_abline(slope = 1, intercept = 0) +
      geom_text(aes(label = country)) +
      geom_point(alpha = 0.5) +
      facet_wrap(vars(gdpname), scales = 'free') +
      theme(axis.text.x = element_text(angle = 90));
  } else {
    ggplot(data = summ, aes(cov.ach, cov)) +
      geom_abline(slope = 1, intercept = 0) +
      geom_text(aes(label = country)) +
      geom_point(alpha = 0.5) +
      facet_wrap(vars(gdpname), scales = 'free') +
      theme(axis.text.x = element_text(angle = 90));
  }
}

## ----results='asis'-----------------------------------------------------------
dataset_0 <- simulate_dataset(random_seed = 2345);

summ_fun(dataset_0);

## ----results='asis'-----------------------------------------------------------
get_subsample_definitions(loadtype = 't') %>% 
  dplyr::select(-species, -loadtype) %>%
  dplyr::arrange(country) %>%
  pander::pander(split.table = Inf);

## ----results='asis'-----------------------------------------------------------
get_subsample_definitions(loadtype = 'be') %>% 
  dplyr::select(-species, -loadtype) %>%
  dplyr::arrange(country) %>%
  pander::pander(split.table = Inf);

## ----results='asis'-----------------------------------------------------------
ssd_c <- get_subsample_definitions(
  country = c('at', 'de', 'fi', 'pl', 'se', 'si', 'sk'),
  loadtype = 't'
);

dataset_c <- simulate_dataset(
  random_seed = 12345,
  n = 5000,
  subsets = ssd_c
);

summ_fun(dataset_c);

## -----------------------------------------------------------------------------
compare_with_def(dataset_c, ssd_c, 'm')

## -----------------------------------------------------------------------------
compare_with_def(dataset_c, ssd_c, 'cov')

## ----results='asis'-----------------------------------------------------------
ssd_cn <- get_subsample_definitions(
  country = c(at = 1, de = 3, fi = 1.5, pl = 2, se = 3, si = 1, sk = 1),
  loadtype = 't'
);

dataset_cn <- simulate_dataset(
  random_seed = 12345,
  n = 5000,
  subsets = ssd_cn
);

summ_fun(dataset_cn);

## -----------------------------------------------------------------------------
ssd_custom <- tibble::tribble(
  ~width, ~thickness, ~f_mean, ~f_sd,
      80,     40,      27.5,    9.0,
     140,     40,      29.4,    9.7,
     160,     60,      31.6,    9.3,
     200,     50,      30.2,   11.4, 
     240,     95,      25.5,    4.8,
     250,     40,      25.3,   11.2
);

dataset_custom <- simulate_dataset(
  random_seed = 12345,
  n = 5000,
  subsets = ssd_custom
);

summ_fun(dataset_custom, grp = c('width', 'thickness', 'loadtype'));

## -----------------------------------------------------------------------------
plot_sim_gdp <- function(ds, simb, simulated_vars, ...) {
  extra_aes <- rlang::enexprs(...);
  ds <- dplyr::rename(ds, f_ref = f, E_ref = E, rho_ref = rho);
  if (!any(simulated_vars %in% names(ds))) ds <- simulate_conditionally(data = ds, simbase = simb);
  ds <- tidyr::pivot_longer(
    data = ds,
    cols = tidyselect::any_of(c('f_ref', 'E_ref', 'rho_ref', simulated_vars)),
    names_to = c('name', '.value'),
    names_sep = '_'
  );
  ds <- dplyr::mutate(
    ds,
    name = factor(name, levels = c('f', 'E', 'rho'), ordered = TRUE)
  );
  simname <- names(ds);
  simname <- simname[dplyr::cumany(simname == 'name')];
  simname <- setdiff(simname, c('name', 'ref'));
  stopifnot(length(simname) == 1);
  ggplot(data = ds, mapping = aes(.data[[simname]], ref, !!!extra_aes)) +
    geom_point(alpha = .2, shape = 20) +
    geom_abline(slope = 1, intercept = 0, alpha = .5, linetype = 'twodash') +
    facet_wrap(vars(name), scales = 'free') +
    theme(axis.text.x = element_text(angle = 90));
} # undebug(plot_sim_gdp)

## -----------------------------------------------------------------------------
sb_untransf <- dataset_0 %>%
  dplyr::rename(f_siml = f, E_siml = E, rho_siml = rho) %>%
  simbase_covar(
    variables = c('f_siml', 'E_siml', 'rho_siml', 'ip_f', 'E_dyn', 'ip_rho')
  );

sb_untransf;

## ----results='asis'-----------------------------------------------------------
dataset_c_sim <- simulate_conditionally(dataset_c, sb_untransf);
names(dataset_c_sim) %>% pander::pander();

## -----------------------------------------------------------------------------
plot_sim_gdp(dataset_c_sim, sb_untransf, c('f_siml', 'E_siml', 'rho_siml'));

## -----------------------------------------------------------------------------
sb_transf <- dataset_0 %>%
  dplyr::rename(f_simt = f, E_simt = E, rho_simt = rho) %>%
  simbase_covar(
    variables = c('f_simt', 'E_simt', 'rho_simt', 'ip_f', 'E_dyn', 'ip_rho'),
    transforms = list(f_simt = scales::log_trans())
  );
dataset_c_sim <- simulate_conditionally(dataset_c_sim, sb_transf);
plot_sim_gdp(dataset_c_sim, sb_transf, c('f_simt', 'E_simt', 'rho_simt'));

## -----------------------------------------------------------------------------
sb_group <- dataset_0 %>%
  dplyr::group_by(country) %>%
  dplyr::rename(f_simg = f, E_simg = E, rho_simg = rho) %>%
  simbase_covar(
    variables = c('f_simg', 'E_simg', 'rho_simg', 'ip_f', 'E_dyn', 'ip_rho'),
    transforms = list(f_simg = scales::log_trans())
  );

sb_group

## -----------------------------------------------------------------------------
dataset_0_sim <- simulate_conditionally(dataset_0, sb_group);
plot_sim_gdp(dataset_0_sim, sb_group, c('f_simg', 'E_simg', 'rho_simg'), colour=country);

## -----------------------------------------------------------------------------
sb_group_c <- dataset_c %>%
  dplyr::group_by(country) %>%
  simbase_covar(
    variables = c('f', 'E', 'rho', 'ip_f', 'E_dyn', 'ip_rho'),
    transforms = list(f = scales::log_trans())
  );

sb_group_c

## ----results='asis'-----------------------------------------------------------
dataset_cn2 <- simulate_dataset(
  random_seed = 12345,
  n = 5000,
  subsets = ssd_cn,
  simbase = sb_group_c
);

summ_fun(dataset_cn2);

Try the WoodSimulatR package in your browser

Any scripts or data that you put into this service are public.

WoodSimulatR documentation built on June 20, 2022, 9:05 a.m.