In HectorRDB/condimentsPaper: Package to accompany the condiments Paper

library(knitr)
opts_chunk$set(
  fig.pos = "!h", out.extra = "", warning = F, 
  fig.align = "center"
)

libs <- c("here", "dplyr", "stringr", "tidyr", "SingleCellExperiment", "slingshot",
          "condiments", "cowplot", "dyngen", "ggplot2", "condimentsPaper", "ggraph")
suppressMessages(
  suppressWarnings(sapply(libs, require, character.only = TRUE))
)
rm(libs)
theme_set(theme_classic())

The simulations are based on the dyngen package[@Cannoodt2020].

Running the simulations

This can be very long, we recommand running this in a separate script. None of the code in this section is run while compiling the vignettes.

Setting the parameters

library(BiocParallel)
set.seed(97174)
options(Ncpus = 16L)
nCores <- 16
ratio <- c(.5, .8, .9, .95)
n_boot_real <- 50
frac_real <- .1
n_boot_null <- 100
frac_null <- .05

sub_sample <- function(sce, frac) {
  df <- colData(sce) %>% as.data.frame() %>%
    mutate(id = as.numeric(rownames(.))) %>%
    group_by(condition) %>%
    sample_frac(frac)
  return(sce[, df$id])
}

Bifurcating trajectory with two conditions

res <- list()
for (multiplier in c(ratio, round(1 / ratio, 2)[4:1])) {
  sce <- create_bifurcating_simu(multiplier = multiplier)
  if (multiplier == .5) fork_sce <- sce
  set.seed(2019)
  res_loc <- mclapply(seq_len(n_boot_real), function(b, frac) {
      return(anayze_all(sub_sample(sce, frac = frac)))
    }, frac = frac_real, mc.cores = nCores)
    worked <- lapply(res_loc, function(res){
      return(!"SingleCellExperiment" %in% class(res))
    }) %>% unlist()
    res_loc <- res_loc[worked]
    res[[as.character(multiplier)]] <- bind_rows(res_loc)
}
real <- bind_rows(res, .id = "multiplier") %>%
  clean_results()

sce <- create_bifurcating_simu(multiplier = 1, nSim = 400)
set.seed(2019)
res_loc <- mclapply(seq_len(n_boot_null), function(b, frac) {
  return(anayze_all(sub_sample(sce, frac = frac)))
  }, frac = frac_null, mc.cores = nCores)
worked <- lapply(res_loc, function(res){
  return(!"SingleCellExperiment" %in% class(res))
}) %>% unlist()
res_loc <- res_loc[worked]
null <- bind_rows(res_loc) %>%
  mutate(multiplier = 1) %>%
  clean_results()

fork <- bind_rows("Real" = real, "Null" = null, .id = "Type") %>%
  arrange(multiplier)

Two bifurcations and two conditions

res <- list()
for (multiplier in c(ratio, round(1 / ratio, 2)[4:1])) {
 sce <- create_consecutive_bifurcating_simu(multiplier = multiplier)
 if (multiplier == .5) tree_sce <- sce
  set.seed(2019)
  res_loc <- mclapply(seq_len(n_boot_real), function(b, frac) {
    return(anayze_all(sub_sample(sce, frac = frac), shape = 3))
  }, frac = frac_real, mc.cores = nCores)
  worked <- lapply(res_loc, class) %>% unlist()
  worked <- worked == "data.frame"
  res_loc <- res_loc[worked]
  res[[as.character(multiplier)]] <- bind_rows(res_loc)
}
real <- bind_rows(res, .id = "multiplier") %>%
  clean_results()

sce <- create_consecutive_bifurcating_simu(multiplier = 1, nSim = 400)
set.seed(2019)
res_loc <- mclapply(seq_len(n_boot_null), function(b, frac) {
  return(anayze_all(sub_sample(sce, frac = frac), shape = 3))
}, frac = frac_null, mc.cores = nCores)
worked <- lapply(res_loc, class) %>% unlist()
worked <- worked == "data.frame"
res_loc <- res_loc[worked]
null <- bind_rows(res_loc) %>%
  mutate(multiplier = 1) %>%
  clean_results()

tree <- bind_rows("Real" = real, "Null" = null, .id = "Type") %>%
  arrange(multiplier)

Bifurcating trajectory with three conditions

res <- list()
for (multiplier in ratio) {
 sce <- create_bifurcating_three_conditions_simu(multiplier = multiplier)
 if (multiplier == .5) complex_sce <- sce
  set.seed(2019)
  res_loc <- mclapply(seq_len(n_boot_real), function(b, frac) {
    return(anayze_multiple_conditions(sub_sample(sce, frac = frac)))
  }, frac = frac_real, mc.cores = nCores)
  worked <- lapply(res_loc, function(res){
    return(!"SingleCellExperiment" %in% class(res))
  }) %>% unlist()
  res_loc <- res_loc[worked]
  res[[as.character(multiplier)]] <- bind_rows(res_loc)
}
real <- bind_rows(res, .id = "multiplier") %>%
  clean_results()

sce <- create_bifurcating_three_conditions_simu(multiplier = 1, nSim = 400)
set.seed(2019)
res_loc <- mclapply(seq_len(n_boot_null), function(b, frac) {
  return(anayze_multiple_conditions(sub_sample(sce, frac = frac)))
}, frac = frac_null, mc.cores = nCores)
worked <- lapply(res_loc, function(res){
  return(!"SingleCellExperiment" %in% class(res))
}) %>% unlist()
res_loc <- res_loc[worked]
null <- bind_rows(res_loc) %>%
  mutate(multiplier = 1) %>%
  clean_results()

complex <- bind_rows("Real" = real, "Null" = null, .id = "Type") %>%
  arrange(multiplier)

Trajectory with five lineages and two conditions

ratio <- c(.1, .2, .3, .5, 2, 3, 5, 10)
res <- list()
for (multiplier in ratio) {
 sce <- create_5_lineages_simu(multiplier = multiplier)
 if (multiplier == .5) complex_sce <- sce
  set.seed(2019)
  res_loc <- mclapply(seq_len(n_boot_real), function(b, frac) {
    return(anayze_multiple_conditions(sub_sample(sce, frac = frac)))
  }, frac = frac_real, mc.cores = nCores)
  worked <- lapply(res_loc, function(res){
    return(!"SingleCellExperiment" %in% class(res))
  }) %>% unlist()
  res_loc <- res_loc[worked]
  res[[as.character(multiplier)]] <- bind_rows(res_loc)
}
real <- bind_rows(res, .id = "multiplier") %>%
  clean_results()

sce <- create_5_lineages_simu(multiplier = 1, nSim = 400)
set.seed(2019)
res_loc <- mclapply(seq_len(n_boot_null), function(b, frac) {
  return(anayze_multiple_conditions(sub_sample(sce, frac = frac)))
}, frac = frac_null, mc.cores = nCores)
worked <- lapply(res_loc, function(res){
  return(!"SingleCellExperiment" %in% class(res))
}) %>% unlist()
res_loc <- res_loc[worked]
null <- bind_rows(res_loc) %>%
  mutate(multiplier = 1) %>%
  clean_results()

five_lins <- bind_rows("Real" = real, "Null" = null, .id = "Type") %>%
  arrange(multiplier)

Wrong step 1 outcome

ratio <- c(.9, .95)
res <- list()
for (multiplier in c(ratio, round(1 / ratio, 2)[2:1])) {
  sce <- create_bifurcating_simu(multiplier = multiplier)
  set.seed(2020)
  res_loc <- mclapply(seq_len(n_boot_real), function(b, frac) {
    return(anayze_all_per_cond(sub_sample(sce, frac = frac)))
  }, frac = frac_real, mc.cores = nCores)
  worked <- lapply(res_loc, class) %>% unlist()
  worked <- worked == "data.frame"
  res_loc <- res_loc[worked]
  res[[as.character(multiplier)]] <- bind_rows(res_loc)
}
real <- bind_rows(res, .id = "multiplier")

sce <- create_consecutive_bifurcating_simu(multiplier = 1, nSim = 400)
set.seed(2019)
null <- mclapply(seq_len(n_boot_null), function(b, frac) {
    return(anayze_all_per_cond(sub_sample(sce, frac = frac)))
  }, frac = frac_null, mc.cores = nCores) %>%
bind_rows()

step1_fail <- bind_rows("Real" = real, "Null" = null, .id = "Type") %>%
  arrange(multiplier) %>%
  mutate(X = X - 1, group = X - X %% 5) %>%
  select(-X) %>%
  mutate(multiplier = as.numeric(multiplier))

Unstability

add_noise_to_sce <- function(sce, noise_pst = .1, noise_wgt = .1) {
  pseudotime <- matrix(sce$sim_time, ncol = 2,
                       nrow = ncol(sce), byrow = FALSE)
  weights <- matrix(c(sce$from %in% c("sA", "sB", "sBmid", "sC"),
                      sce$from %in% c("sA", "sB", "sBmid", "sD")),
                    nrow = ncol(sce), byrow = FALSE)
  normWeights <- sweep(weights, 1,
                       FUN = "/",
                       STATS = apply(weights, 1, sum))
  pseudotime <- pseudotime * matrix(rnorm(2 * ncol(sce), mean = 1, sd = noise_pst), ncol = 2)
  normWeights <- apply(normWeights, 1, function(row) {
    switch <- rbinom(1, 1, noise_wgt)
    if (switch) {
      return(1 - row)
    } else {
      return(row)
    }
  })
  return(list("pseudotime" = pseudotime,
              "cellWeights" = t(normWeights)))
}

set.seed(20876)
sce <-  create_bifurcating_simu(multiplier = .85)
i <- 1
res <- list()
for (noise_pst in c(0:40 / 10)) {
  for (noise_wgt in c(0:5 / 10)) {
    for (j in 1:100) {
      print(i)
      estimates <- add_noise_to_sce(sce, noise_pst, noise_wgt)
      res_noise <- progressionTest(pseudotime = estimates$pseudotime, 
                                  cellWeights = estimates$cellWeights,
                                  conditions = sce$condition,
                                  thresh = .01) %>%
        select(-lineage)
      res_noise$noise_pst <- noise_pst
      res_noise$noise_wgt <- noise_wgt
      res[[i]] <- res_noise
      i <- i + 1
    }
  }
}
unstability <- bind_rows(res)

Exploring the simulations

Low dimension representation

data("fork_sce", package = "condimentsPaper")
plot_reduced_dim_together(fork_sce) +
  annotate("text", x = 0, y = -.4, label = "Start", size = 4) +
  annotate("text", x = .35, y = .05, label = "Lineage 1", size = 4) +
  annotate("text", x = -.4, y = .15, label = "Lineage 2", size = 4)

data("tree_sce", package = "condimentsPaper")
plot_reduced_dim_together(tree_sce)  +
  annotate("text", x = -.15, y = .35, label = "Start", size = 4) +
  annotate("text", x = -.3, y = -.1, label = "Lineage 1", size = 4) +
  annotate("text", x = .1, y = -.35, label = "Lineage 2", size = 4) +
  annotate("text", x = .25, y = -.05, label = "Lineage 3", size = 4)

data("complex_sce", package = "condimentsPaper")
plot_reduced_dim_together(complex_sce) +
  annotate("text", x = -.15, y = -.3, label = "Start", size = 4) +
  annotate("text", x = .3, y = .2, label = "Lineage 1", size = 4) +
  annotate("text", x = -.35, y = .2, label = "Lineage 2", size = 4)

data("fivelin_sce", package = "condimentsPaper")
plot_reduced_dim_together(fivelin_sce) +
  annotate("text", x = 0, y = -.1, label = "Start", size = 4) +
  annotate("text", x = .3, y = .15, label = "Lineage 1", size = 4, angle = 30) +
  annotate("text", x = .2, y = .3, label = "Lineage 2", size = 4, angle = 30) +
  annotate("text", x = -.3, y = .05, label = "Lineage 3", size = 4) +
  annotate("text", x = -.15, y = .3, label = "Lineage 4", size = 4) +
  annotate("text", x = 0, y = -.35, label = "Lineage 5", size = 4)

Network branching

Two lineages

set.seed(0989021)
backbone <- backbone_bifurcating()
model_common <-
  initialise_model(
    backbone = backbone,
    num_cells = 100,
    num_tfs = nrow(backbone$module_info),
    num_targets = 250,
    num_hks = 250,
    simulation_params = simulation_default(
      census_interval = 10, 
      ssa_algorithm = ssa_etl(tau = 300 / 3600),
      experiment_params = simulation_type_wild_type(num_simulations = 100)
    )
  ) %>%
  generate_tf_network()
set.seed(298)
net <- plot_backbone_modulenet_simplify(model_common) +
  guides(col = FALSE, edge_width = FALSE) +
  ggraph::scale_edge_width_continuous(range = c(.5, .5)) + 
  theme(plot.background = element_blank()) 
net <- ggdraw() +
  draw_plot(net, 0, 0, scale = 1.2)
net

Three lineages

set.seed(1082)
backbone <- backbone_consecutive_bifurcating()
model_common <-
  initialise_model(
    backbone = backbone,
    num_cells = 100,
    num_tfs = nrow(backbone$module_info),
    num_targets = 250,
    num_hks = 250,
    simulation_params = simulation_default(
      census_interval = 10, 
      ssa_algorithm = ssa_etl(tau = 300 / 3600),
      experiment_params = simulation_type_wild_type(num_simulations = 100)
    )
  ) %>%
  generate_tf_network()
set.seed(2)
net2 <- plot_backbone_modulenet_simplify(model_common) +
  guides(col = FALSE, edge_width = FALSE) +
  ggraph::scale_edge_width_continuous(range = c(.5, .5)) + 
  theme(plot.background = element_blank()) 
net2 <- ggdraw() +
  draw_plot(net2, 0, 0, scale = 1.2)
net2

Five lineages

set.seed(15)
bblego_list <- list(
  bblego_start("A", type = "simple", num_modules = 2),
  bblego_linear("A", to = "B", num_modules = 2),
  bblego_branching("B", c("C", "D", "E")),
  bblego_branching("C", c("F", "G")),
  bblego_branching("E", c("H", "I")),
  bblego_end("D", num_modules = 4), 
  bblego_end("F", num_modules = 4), bblego_end("G", num_modules = 4),
  bblego_end("H", num_modules = 6), bblego_end("I", num_modules = 6)
)
backbone <- bblego(.list = bblego_list)
model_common <-
  initialise_model(
    backbone = backbone,
    num_cells = 100,
    num_tfs = nrow(backbone$module_info),
    num_targets = 250,
    num_hks = 250,
    simulation_params = simulation_default(
      census_interval = 10, 
      ssa_algorithm = ssa_etl(tau = 300 / 3600),
      experiment_params = simulation_type_wild_type(num_simulations = 100)
    )
  ) %>%
  generate_tf_network()
set.seed(14)
net3 <- plot_backbone_modulenet_simplify(model_common) +
  guides(col = FALSE, edge_width = FALSE) +
  scale_edge_width_continuous(range = c(.5, .5)) + 
  theme(plot.background = element_blank()) 
net3 <- ggdraw() +
  draw_plot(net3, 0, 0, scale = 1.2)
net3

Analyzing the simulations

data("fork", package = "condimentsPaper")
data("tree", package = "condimentsPaper")
data("complex", package = "condimentsPaper")
data("five_lins", package = "condimentsPaper")

Comparison of methods

df <- lapply(
  list("fork" = fork, "tree" = tree, "complex" = complex, "5lin" = five_lins),
  all_metrics, cutoff = .05) %>%
  bind_rows(.id = "dataset") %>%
  mutate(value = round(value, 2)) %>%
  mutate(metric = factor(metric, levels = c("TNR", "PPV", "TPR", "NPV", "F1")),
         dataset = case_when(
           dataset == "fork" ~ "a)",
           dataset == "tree" ~ "b)",
           dataset == "complex" ~ "c)",
           dataset == "5lin" ~ "d)"
         ),
         dataset = factor(dataset, levels = c("a)", "b)", "c)", "d)"))) %>%
  arrange(dataset, metric) %>%
  mutate(test_type = factor(test_type, 
                            levels = c("condiments_sling_prog", "condiments_mon_prog",
                                       "condiments_sling_diff", "DAseq", "milo")),
         label = if_else(is.na(value), "NA", as.character(value)))
table_plot <- function(df) {
  p <- ggplot(df, aes(x = test_type, y = metric, label = label,
               fill = as.numeric(label))) +
  geom_tile() +
  geom_text(na.rm = FALSE) +
  scale_fill_viridis_c() +
  scale_x_discrete(labels = c("progressionTest\n(Slingshot)", "progressionTest\n(Monocle)",
                              "fateSelectionTest\n(Slingshot)", "DAseq", "Milo")) +
  labs(x = "", y = "", fill = "(Value - 1)\n") +
  guides(fill = "none") +
  facet_grid(dataset ~ ., switch = "y") +
  theme(axis.text.x = element_text(size = 14, angle = -20, vjust = .2)) +
  theme(legend.position =  "right",
        legend.title = element_text(size = 16),
        legend.text = element_text(size = 14),
        legend.background = element_blank(),
        panel.spacing.y = unit(7, "pt"),
        strip.text.y.left = element_text(angle = 0))
  return(p)
}
table_plot(df)

Wrong step 1 outcome

data("step1_fail", package = "condimentsPaper")
cutoffs <- c((2:9) * .001, (1:9) * .01, .1 * 1:10)
df <- step1_fail %>% 
  mutate(test_type = paste0(method, "_", test_type)) %>% 
  select(test_type, multiplier, p.value, Type) %>%
  dplyr::rename("adjusted_p_value" = "p.value") %>%
  lapply(cutoffs, all_metrics, df = .)
names(df) <- cutoffs
df <- bind_rows(df, .id = "cutoff") %>%
  mutate(cutoff = as.numeric(cutoff)) %>%
  select(cutoff, metric, value, test_type) %>%
  pivot_wider(names_from = metric, values_from = value) %>%
  filter(str_detect(test_type, "sling")) %>%
  mutate(step1 = if_else(str_detect(test_type, "normal"),
                         "Fail to reject the null\n(Common trajectory)",
                         "Reject the null\n(Separate trajectory)"),
         test_type = word(test_type, 3, sep = "_"),
         test_type = if_else(test_type == "diff", "fateSelectionTest", "progressionTest"))
p <- ggplot(df, aes(x = 1 - PPV, y = TPR, col = step1)) +
  geom_path() +
  geom_point(size = 2) +
  facet_wrap(~test_type, scales = "free_y") + 
  scale_color_brewer(palette = "Set1") +
  labs(col = "Step 1 results:") +
  theme_classic() +
  theme(legend.position = "bottom")
p

Unstability

data("unstability", package = "condimentsPaper")
df <- unstability %>%
  group_by(noise_pst, noise_wgt) %>%
  summarise(p.value = median(p.value))
p <- ggplot(df, aes(x = noise_pst, y = p.value)) +
  geom_point(aes(col = as.character(noise_wgt))) +
  labs(x = "Noise on Pseudotime", y = "p value", 
       col = "Noise on\nbranch assignement")
legend <- get_legend(p + theme(legend.position = "bottom"))
p <- p + guides(col = FALSE, fill = FALSE)
plot_grid(plot_grid(p, p + scale_y_log10(), ncol = 2, scale = .95),
          legend, rel_heights = c(10, 1), nrow = 2)

Special look at three condition situation

ggplot(complex, aes(x = as.numeric(factor(multiplier, levels = unique(multiplier))),
                    y = adjusted_p_value)) +
  geom_boxplot(aes(group = interaction(test_type, multiplier),
                   fill = test_type)) +
  geom_hline(yintercept = .05) +
  scale_x_continuous(labels = unique(complex$multiplier),
                     breaks = 1:5) +
  scale_color_brewer(palette = "Pastel2") +
  scale_fill_brewer(palette = "Pastel2") +
  labs(x = "Effect Size: Multiplier on the master regulator",
       y = "adjusted p-value", col = "Test", fill = "Test") +
  theme(legend.position = "bottom")

Session Info

sessionInfo()

References

HectorRDB/condimentsPaper documentation built on Oct. 16, 2021, 7:02 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

HectorRDB/condimentsPaper
Package to accompany the condiments Paper

In HectorRDB/condimentsPaper: Package to accompany the condiments Paper

Running the simulations

Setting the parameters

Bifurcating trajectory with two conditions

Two bifurcations and two conditions

Bifurcating trajectory with three conditions

Trajectory with five lineages and two conditions

Wrong step 1 outcome

Unstability

Exploring the simulations

Low dimension representation

Network branching

Two lineages

Three lineages

Five lineages

Analyzing the simulations

Comparison of methods

Wrong step 1 outcome

Unstability

Special look at three condition situation

Session Info

References

R Package Documentation

Browse R Packages

We want your feedback!

HectorRDB/condimentsPaper Package to accompany the condiments Paper

In HectorRDB/condimentsPaper: Package to accompany the condiments Paper

Running the simulations

Setting the parameters

Bifurcating trajectory with two conditions

Two bifurcations and two conditions

Bifurcating trajectory with three conditions

Trajectory with five lineages and two conditions

Wrong step 1 outcome

Unstability

Exploring the simulations

Low dimension representation

Network branching

Two lineages

Three lineages

Five lineages

Analyzing the simulations

Comparison of methods

Wrong step 1 outcome

Unstability

Special look at three condition situation

Session Info

References

R Package Documentation

Browse R Packages

We want your feedback!

HectorRDB/condimentsPaper
Package to accompany the condiments Paper