R/6_simulation.R

Defines functions kinetics_noise_simple kinetics_noise_none .generate_cells_precompile_reactions simulation_default generate_cells

Documented in generate_cells kinetics_noise_none kinetics_noise_simple simulation_default

#' Simulate the cells
#'
#' [generate_cells()] runs simulations in order to determine the gold standard
#' of the simulations.
#' [simulation_default()] is used to configure parameters pertaining this process.
#'
#' @param model A dyngen intermediary model for which the gold standard been generated with [generate_gold_standard()].
#' @param burn_time The burn in time of the system, used to determine an initial state vector. If `NULL`, the burn time will be inferred from the backbone.
#' @param total_time The total simulation time of the system. If `NULL`, the simulation time will be inferred from the backbone.
#' @param ssa_algorithm Which SSA algorithm to use for simulating the cells with [GillespieSSA2::ssa()]
#' @param census_interval A granularity parameter for the outputted simulation.
#' @param store_reaction_firings Whether or not to store the number of reaction firings.
#' @param store_reaction_propensities Whether or not to store the propensity values of the reactions.
#' @param compute_cellwise_grn Whether or not to compute the cellwise GRN activation values.
#' @param compute_dimred Whether to perform a dimensionality reduction after simulation.
#' @param compute_rna_velocity Whether or not to compute the propensity ratios after simulation.
#' @param experiment_params A tibble generated by rbinding multiple calls of [simulation_type_wild_type()] and [simulation_type_knockdown()].
#' @param kinetics_noise_function A function that will generate noise to the kinetics of each simulation.
#'   It takes the `feature_info` and `feature_network` as input parameters,
#'   modifies them, and returns them as a list. See [kinetics_noise_none()] and [kinetics_noise_simple()].
#'
#' @return A dyngen model.
#'
#' @seealso [dyngen] on how to run a complete dyngen simulation
#'
#' @importFrom GillespieSSA2 ssa
#' @export
#'
#' @examples
#' library(dplyr)
#' model <-
#'   initialise_model(
#'     backbone = backbone_bifurcating(),
#'     simulation = simulation_default(
#'       ssa_algorithm = ssa_etl(tau = .1),
#'       experiment_params = bind_rows(
#'         simulation_type_wild_type(num_simulations = 4),
#'         simulation_type_knockdown(num_simulations = 4)
#'       )
#'     )
#'   )
#' \donttest{
#' data("example_model")
#' model <- example_model |> generate_cells()
#'
#' plot_simulations(model)
#' plot_gold_mappings(model)
#' plot_simulation_expression(model)
#' }
generate_cells <- function(model) {
  # satisfy r cmd check
  time <- NULL

  if (model$verbose) cat("Precompiling reactions for simulations\n")
  model <- .add_timing(model, "6_simulations", "precompile reactions for simulations")
  reactions <- .generate_cells_precompile_reactions(model)

  # simulate cells one by one
  if (model$verbose) cat("Running ", nrow(model$simulation_params$experiment_params), " simulations\n", sep = "")
  model <- .add_timing(model, "6_simulations", "running simulations")
  simulations <-
    pbapply::pblapply(
      X = seq_len(nrow(model$simulation_params$experiment_params)),
      cl = model$num_cores,
      FUN = .generate_cells_simulate_cell,
      model = model,
      reactions = reactions
    )

  # split up simulation data
  model <- .add_timing(model, "6_simulations", "generate output")
  model$simulations <- lst(
    meta = map_df(simulations, "meta"),
    counts = do.call(rbind, map(simulations, "counts")),
    cellwise_grn = do.call(rbind, map(simulations, "cellwise_grn")),
    reaction_firings = do.call(rbind, map(simulations, "reaction_firings")),
    reaction_propensities = do.call(rbind, map(simulations, "reaction_propensities")),
    rna_velocity = do.call(rbind, map(simulations, "rna_velocity")),
    kd_multiplier = do.call(rbind, map(simulations, "kd_multiplier")),
    perturbed_parameters = do.call(rbind, map(simulations, "perturbed_parameters"))
  )

  # predict state
  if (model$verbose) cat("Mapping simulations to gold standard\n", sep = "")
  model <- .add_timing(model, "6_simulations", "map simulations to gold standard")
  if (!is.null(model[["gold_standard"]])) {
    model$simulations$meta <- .generate_cells_predict_state(model)
  } else {
    model$simulations$meta <- model$simulations$meta |> rename(sim_time = time)
  }

  # perform dimred
  model <- .add_timing(model, "6_simulations", "perform dimred")
  if (model$simulation_params$compute_dimred) {
    if (model$verbose) cat("Performing dimred\n", sep = "")
    model <- model |> calculate_dimred()
  }

  # return
  .add_timing(model, "6_simulations", "end")
}

#' @export
#' @rdname generate_cells
#' @importFrom GillespieSSA2 ssa_etl
simulation_default <- function(
  burn_time = NULL,
  total_time = NULL,
  ssa_algorithm = ssa_etl(tau = 30 / 3600),
  census_interval = 4,
  experiment_params = bind_rows(
    simulation_type_wild_type(num_simulations = 32),
    simulation_type_knockdown(num_simulations = 0)
  ),
  store_reaction_firings = FALSE,
  store_reaction_propensities = FALSE,
  compute_cellwise_grn = FALSE,
  compute_dimred = TRUE,
  compute_rna_velocity = FALSE,
  kinetics_noise_function = kinetics_noise_simple(mean = 1, sd = .005)
) {
  lst(
    burn_time,
    total_time,
    ssa_algorithm,
    census_interval,
    experiment_params,
    store_reaction_firings,
    store_reaction_propensities,
    compute_cellwise_grn,
    compute_dimred,
    compute_rna_velocity,
    kinetics_noise_function
  )
}

#' @importFrom GillespieSSA2 compile_reactions
.generate_cells_precompile_reactions <- function(model) {
  # fetch paraneters and settings
  sim_system <- model$simulation_system
  reactions <- sim_system$reactions

  buffer_ids <- unique(unlist(map(reactions, "buffer_ids")))

  # compile prop funs
  comp_funs <- GillespieSSA2::compile_reactions(
    reactions = reactions,
    buffer_ids = buffer_ids,
    state_ids = if (is.matrix(sim_system$initial_state)) colnames(sim_system$initial_state) else names(sim_system$initial_state),
    params = sim_system$parameters,
    hardcode_params = FALSE,
    fun_by = 1000L
  )

  comp_funs
}

#' Add small noise to the kinetics of each simulation
#'
#' @param mean The mean level of noise (should be 1)
#' @param sd The sd of the noise (should be a relatively small value)
#'
#' @return A list of noise generators for the kinetics.
#'
#' @rdname kinetics_noise
#' @export
kinetics_noise_none <- function() {
  function(feature_info, feature_network) {
    lst(feature_info, feature_network)
  }
}

#' @rdname kinetics_noise
#' @export
kinetics_noise_simple <- function(mean = 1, sd = .005) {
  # satisfy r cmd check
  mrna_halflife <- protein_halflife <- NULL

  function(feature_info, feature_network) {
    feature_info <-
      feature_info |>
      mutate_at(c("basal"), ~ pmin(1, . * rnorm(length(.), mean = mean, sd = sd))) |>
      mutate_at(c("transcription_rate", "splicing_rate", "translation_rate", "mrna_halflife", "protein_halflife"), ~ . * rnorm(length(.), mean = mean, sd = sd)) |>
      mutate(
        mrna_decay_rate = log(2) / mrna_halflife,
        protein_decay_rate = log(2) / protein_halflife
      )

    feature_network <-
      feature_network |>
      mutate_at(c("strength", "hill"), ~ . * rnorm(length(.), mean = mean, sd = sd))

    lst(
      feature_info,
      feature_network
    )
  }
}

.generate_cells_simulate_cell <- function(simulation_i, model, reactions, verbose = FALSE, debug = FALSE) {
  # satisfy r cmd check
  time <- NULL

  sim_params <- model$simulation_params
  sim_system <- model$simulation_system
  expr_params <- sim_params$experiment_params |> extract_row_to_list(simulation_i)

  set.seed(expr_params$seed)

  # randomise kinetics to simulate intercellular differences
  out <- sim_params$kinetics_noise_function(model$feature_info, model$feature_network)
  out2 <- .kinetics_calculate_dissociation(out$feature_info, out$feature_network)
  feature_info <- out2$feature_info
  feature_network <- out2$feature_network
  perturbed_parameters <- .kinetics_extract_parameters(feature_info, feature_network)

  # get initial state
  initial_state <- sim_system$initial_state
  if (is.matrix(initial_state)) {
    initial_state <- initial_state[simulation_i, ]
  }

  log_propensity <- sim_params$store_reaction_propensities || sim_params$compute_rna_velocity

  if (sim_params$burn_time > 0) {
    burn_reaction_firings <- reactions
    rem <- setdiff(sim_system$molecule_ids, sim_system$burn_variables)
    if (length(rem) > 0) {
      burn_reaction_firings$state_change[match(rem, sim_system$molecule_ids), ] <- 0
    }

    # burn in
    out <- GillespieSSA2::ssa(
      initial_state = initial_state,
      reactions = burn_reaction_firings,
      final_time = sim_params$burn_time,
      census_interval = sim_params$census_interval,
      params = perturbed_parameters,
      method = sim_params$ssa_algorithm,
      stop_on_neg_state = FALSE,
      verbose = verbose,
      log_buffer = FALSE,
      log_firings = sim_params$store_reaction_firings,
      log_propensity = log_propensity
    )

    burn_meta <-
      tibble(
        time = c(head(out$time, -1), sim_params$burn_time)
      ) |>
      mutate(time = time - max(time))
    burn_counts <- out$state |> Matrix::Matrix(sparse = TRUE)

    burn_reaction_firings <- if (sim_params$store_reaction_firings) out$firings |> Matrix::Matrix(sparse = TRUE) else NULL
    burn_reaction_propensities <- if (log_propensity) out$propensity |> Matrix::Matrix(sparse = TRUE) else NULL

    new_initial_state <- out$state[nrow(out$state), ]
  } else {
    burn_meta <- NULL
    burn_counts <- NULL
    new_initial_state <- initial_state
    burn_reaction_firings <- NULL
    burn_reaction_propensities <- NULL
  }

  total_time <- sim_params$total_time
  if (expr_params$type == "knockdown") {
    total_time <- total_time * expr_params$timepoint
  }

  # actual simulation
  sim <- GillespieSSA2::ssa(
    initial_state = new_initial_state,
    reactions = reactions,
    final_time = total_time,
    census_interval = sim_params$census_interval,
    params = perturbed_parameters,
    method = sim_params$ssa_algorithm,
    stop_on_neg_state = FALSE,
    verbose = verbose,
    log_buffer = FALSE,
    log_firings = sim_params$store_reaction_firings,
    log_propensity = log_propensity,
    return_simulator = TRUE
  )
  if (debug) {
    return(sim)
  }

  # run simulation
  sim$run()

  # get output
  # based on GillespieSSA2:::return_output
  out <- list(
    time = sim$output_time,
    state = sim$output_state,
    propensity = sim$output_propensity,
    firings = sim$output_firings,
    buffer = sim$output_buffer,
    stats = sim$get_statistics()
  )
  # set names of objects
  colnames(out$state) <- names(new_initial_state)
  if (sim$log_propensity) {
    colnames(out$propensity) <- reactions$reaction_ids
  }
  if (sim$log_buffer) {
    colnames(out$buffer) <- reactions$buffer_ids
  }
  if (sim$log_firings) {
    colnames(out$firings) <- reactions$reaction_ids
  }

  # reformat output
  meta <-
    tibble(
      time = c(head(out$time, -1), total_time)
    )
  counts <- out$state |> Matrix::Matrix(sparse = TRUE)
  reaction_firings <- if (sim_params$store_reaction_firings) out$firings |> Matrix::Matrix(sparse = TRUE) else NULL
  reaction_propensities <- if (log_propensity) out$propensity |> Matrix::Matrix(sparse = TRUE) else NULL
  kd_state <- out$state[nrow(out$state), ]

  # simulation of knockdown, if any
  if (expr_params$type == "knockdown") {
    assert_that(!reactions$hardcode_params)

    kd_total_time <- sim_params$total_time - total_time

    kd_gene_candidates <- expr_params$genes
    if (length(kd_gene_candidates) == 1 && kd_gene_candidates == "*") {
      kd_gene_candidates <- model$feature_info$feature_id
    }
    kd_genes <- sample(kd_gene_candidates, expr_params$num_genes)
    kd_wprs <- paste0("transcription_rate_", kd_genes)

    kd_params <- perturbed_parameters
    kd_params[kd_wprs] <- kd_params[kd_wprs] * expr_params$multiplier

    kd_multiplier <- tibble(simulation_i, gene = kd_genes, multiplier = expr_params$multiplier)

    out <- GillespieSSA2::ssa(
      initial_state = kd_state,
      reactions = reactions,
      final_time = kd_total_time,
      census_interval = sim_params$census_interval,
      params = kd_params,
      method = sim_params$ssa_algorithm,
      stop_on_neg_state = FALSE,
      verbose = verbose,
      log_buffer = FALSE,
      log_firings = sim_params$store_reaction_firings,
      log_propensity = log_propensity
    )

    kd_meta <-
      tibble(
        time = total_time + c(head(out$time, -1), kd_total_time)
      )
    kd_counts <- out$state |> Matrix::Matrix(sparse = TRUE)
    kd_reaction_firings <- if (sim_params$store_reaction_firings) out$firings |> Matrix::Matrix(sparse = TRUE) else NULL
    kd_reaction_propensities <- if (log_propensity) out$propensity |> Matrix::Matrix(sparse = TRUE) else NULL
  } else {
    kd_meta <- NULL
    kd_counts <- NULL
    kd_reaction_firings <- NULL
    kd_reaction_propensities <- NULL
    kd_multiplier <- NULL
  }

  if (!is.null(burn_meta)) {
    meta <- bind_rows(burn_meta, meta)
    counts <- rbind(burn_counts, counts)
    reaction_firings <- rbind(burn_reaction_firings, reaction_firings)
    reaction_propensities <- rbind(burn_reaction_propensities, reaction_propensities)
  }
  if (!is.null(kd_meta)) {
    meta <- bind_rows(meta, kd_meta)
    counts <- rbind(counts, kd_counts)
    reaction_firings <- rbind(reaction_firings, kd_reaction_firings)
    reaction_propensities <- rbind(reaction_propensities, kd_reaction_propensities)
  }

  cellwise_grn <-
    if (sim_params$compute_cellwise_grn) {
      .generate_cells_compute_cellwise_grn(feature_info, feature_network, new_initial_state, reactions, counts, sim)
    } else {
      NULL
    }

  rna_velocity <-
    if (sim_params$compute_rna_velocity) {
      .generate_cells_compute_rna_velocity(model, reaction_propensities)
    } else {
      NULL
    }

  meta <- meta |>
    mutate(simulation_i) |>
    select(simulation_i, everything())

  # only store if specifically asked for
  if (!sim_params$store_reaction_propensities) {
    reaction_propensities <- NULL
  }

  lst(
    meta, counts, cellwise_grn,
    reaction_firings, reaction_propensities,
    rna_velocity,
    kd_multiplier, perturbed_parameters
  )
}

.generate_cells_compute_cellwise_grn <- function(feature_info, feature_network, new_initial_state, reactions, counts, sim) {
  # satisfy r cmd check
  feature_id <- transcription_rate <- from <- to <- reg_y_match <- `.` <- j <- transcription_rate <- ko_effect <- NULL

  fn <-
    feature_network |>
    left_join(feature_info |> select(to = feature_id, transcription_rate), by = "to") |>
    transmute(
      reg_y_match = match(paste0("mol_protein_", from), names(new_initial_state)),
      tar_prop_match = match(paste0("transcription_", to), reactions$reaction_ids),
      j = row_number(),
      transcription_rate
    )

  ko_effects <-
    map_df(
      seq_len(nrow(counts)),
      function(counti) {
        sim$state <- counts[counti, ]
        sim$calculate_propensity()

        ko_effects <-
          fn |>
          group_by(reg_y_match) |>
          group_modify(function(.x, .y) {
            df <- .x

            if (nrow(df) == 0) {
              return(tibble())
            }

            orig_reg_state <- sim$state[[.y$reg_y_match]]

            if (orig_reg_state != 0) {
              orig_tar_prop <- sim$propensity[df$tar_prop_match]
              sim$state[[.y$reg_y_match]] <- 0
              sim$calculate_propensity()
              ko_effect <- orig_tar_prop - sim$propensity[df$tar_prop_match]
            } else {
              ko_effect <- rep(0, nrow(df))
            }

            out <-
              df |>
              transmute(
                i = counti,
                j,
                ko_effect = ko_effect / transcription_rate
              )

            if (orig_reg_state != 0) {
              sim$state[[.y$reg_y_match]] <- orig_reg_state
              sim$propensity[df$tar_prop_match] <- orig_tar_prop
            }

            out
          }) |>
          ungroup()
      }
    )

  ko_effects <- ko_effects |> filter(abs(ko_effect) > .001)

  Matrix::sparseMatrix(
    i = ko_effects$i,
    j = ko_effects$j,
    x = ko_effects$ko_effect,
    dims = c(nrow(counts), nrow(feature_network)),
    dimnames = list(
      NULL,
      paste0(feature_network$from, "->", feature_network$to)
    )
  )
}

.generate_cells_predict_state <- function(model) {
  simulation_i <- time <- from <- to <- NULL

  # fetch gold standard data
  gs_meta <- model$gold_standard$meta
  gold_ix <- !gs_meta$burn
  gs_meta <- gs_meta[gold_ix, , drop = FALSE]
  gs_counts <- model$gold_standard$counts[gold_ix, , drop = FALSE]
  gs_dimred <- model$gold_standard$dimred[gold_ix, , drop = FALSE]

  # fetch simulation data
  # (gold standard counts only contains TFs, so filter those)
  sim_meta <- model$simulations$meta
  sim_counts <- model$simulations$counts[, colnames(gs_counts), drop = FALSE]

  # calculate 1NN -> a full distance matrix could be avoided
  if (nrow(sim_counts) > 10000) {
    start <- seq(1, nrow(sim_counts), by = 10000)
    stop <- pmin(start + 9999, nrow(sim_counts))
    best_matches <- pmap(lst(start, stop), function(start, stop) {
      dis <- dynutils::calculate_distance(gs_counts, sim_counts[start:stop, , drop = FALSE], method = model$distance_metric)
      best_match <- apply(dis, 2, which.min)
    })
    best_match <- unlist(best_matches)
  } else {
    dis <- dynutils::calculate_distance(gs_counts, sim_counts, method = model$distance_metric)
    best_match <- apply(dis, 2, which.min)
  }

  # add predictions to sim_meta
  sim_meta <-
    bind_cols(
      sim_meta |> select(simulation_i, sim_time = time),
      gs_meta[best_match, , drop = FALSE] |> select(from, to, time)
    ) |>
    group_by(from, to) |>
    mutate(time = dynutils::scale_minmax(time)) |>
    ungroup()

  # check if all branches are present
  sim_edges <- sim_meta |>
    group_by(from, to) |>
    summarise(n = n())
  network <- full_join(model$gold_standard$network, sim_edges, by = c("from", "to"))

  if (any(is.na(network$n))) {
    warning("Simulation does not contain all gold standard edges. This simulation likely suffers from bad kinetics; choose a different seed and rerun.")
  }

  # return output
  sim_meta
}

.generate_cells_compute_rna_velocity <- function(model, reaction_propensities) {
  # satisfy r cmd check
  feature_id <- NULL

  feature_info <- model$feature_info
  sim_system <- model$simulation_system

  # Get the names of the genes and corresponding mRNA molecules
  feature_ids <- feature_info$feature_id
  relevant_molecules <- c(feature_info$mol_mrna, feature_info$mol_premrna)
  map <- set_names(c(feature_ids, feature_ids), relevant_molecules)

  # Extract for each spliced mRNA its relevant reactions.
  reaction_effects <- map_df(
    seq_along(sim_system$reactions),
    function(reaction_ix) {
      reaction <- sim_system$reactions[[reaction_ix]]
      tibble(
        reaction_ix = reaction_ix,
        feature_id = names(reaction$effect),
        effect = reaction$effect,
        name = reaction$name
      ) |> filter(feature_id %in% relevant_molecules)
    }
  ) |>
    mutate(
      row = row_number(),
      feature_id = map[feature_id],
      molecule_ix = match(feature_id, feature_ids)
    )

  # Get the propensities of the relevant reactions
  propensities <- reaction_propensities[, reaction_effects$reaction_ix]

  # Get the propensities of production reactions, per mRNA
  perreaction_to_pergene <- with(
    reaction_effects,
    Matrix::sparseMatrix(i = row, j = molecule_ix, x = effect, dims = c(nrow(reaction_effects), length(feature_ids)))
  )
  rna_velocity <- propensities %*% perreaction_to_pergene
  rna_velocity@x[is.na(rna_velocity@x)] <- 0
  rna_velocity <- Matrix::drop0(rna_velocity)
  colnames(rna_velocity) <- feature_ids

  rna_velocity
}

#' @param num_simulations The number of simulations to run.
#' @param seed A set of seeds for each of the simulations.
#' @rdname generate_cells
#' @export
simulation_type_wild_type <- function(num_simulations, seed = sample.int(10 * num_simulations, num_simulations)) {
  if (num_simulations == 0) {
    NULL
  } else {
    tibble(
      type = "wild_type",
      seed
    )
  }
}

#' @param timepoint The relative time point of the knockdown
#' @param genes Which genes to sample from. `"*"` for all genes.
#' @param num_genes The number of genes to knockdown.
#' @param multiplier The strength of the knockdown. Use 0 for a full knockout, 0<x<1 for a knockdown, and >1 for an overexpression.
#'
#' @rdname generate_cells
#' @export
simulation_type_knockdown <- function(
  num_simulations,
  timepoint = runif(num_simulations),
  genes = "*",
  num_genes = sample(1:5, num_simulations, replace = TRUE, prob = 0.25^(1:5)),
  multiplier = runif(num_simulations, 0, 1),
  seed = sample.int(10 * num_simulations, num_simulations)
) {
  if (num_simulations == 0) {
    NULL
  } else {
    assert_that(
      (is.list(genes) && length(genes) == num_simulations) ||
        is.character(genes) || is.factor(genes)
    )
    if (!is.list(genes)) genes <- as.list(rep(genes, num_simulations))

    tibble(
      type = "knockdown",
      timepoint,
      genes,
      num_genes,
      multiplier,
      seed
    )
  }
}

Try the dyngen package in your browser

Any scripts or data that you put into this service are public.

dyngen documentation built on March 17, 2026, 5:07 p.m.