# !diagnostics off
# Setting for chunks
knitr::opts_chunk$set(echo = TRUE, eval = TRUE, cache = FALSE)
# Package
library(tidyverse)
library(deSolve)
library(reshape2)
library(microbenchmark)
library(data.table)
library(vegan)

Introduction

This script is to simulate the migration of microbial community in a chemostat environment. The population dynamics of model is based on MacArthur's consume-resource model, in which the dynamics of consumers (microbes) and resources (metabolites) are described by corresponding differential equations. There are three parts of the simulation: initializing species pool, consumer-resource dynamics, and migration.

Regional species pool

Consumer-resource dynamics

Migration

Migration has three stages:

  1. Truncate the community and resource composition from the previous CR dynamics.
  2. Generate migrant community from sampling from species pool.
  3. Pool resident and migrant communities. The pooled community is then put into CR_model as initial condition for the following CR dynamics.

General workflow for simulation

  1. Set up model parameters and functions for simulation.
  2. Build up a regional species pool by `pool_build(save_data = TRUE, data_names = "species_pool")``
  3. From the species pool, sample to make an initial community by using community_generate(I = .01, threshold = 1e-3, r = 1).
  4. Run consumer-resource dynamics by using CR_model()
  5. Sample migration community from species pool. Pool the migrant community to the focal community by community_coalesce(). Repeat the steps above until all migrations are done.
  6. Let the coalesced community to stabilize. Stabilization time is the same as total migration time.

Parameters

# Model parameters for CR dynamics
P <- 2           # Number of resources in the initial community
S_pool <- 1000    # Number of species in the pool
I <- 1            # Inocula drop size
S <- 100           # Number available spaces in the initial community
J <- 1             # Flow rate of chemostat
Rin <- rep(10, P) * c(1, rep(0, P-1)) # Inflow concentration, a numeric vector with length of P
mi <- 0            # Death rate
p <- 1             # Degree of generalist
cross_feeding <- TRUE 
same_stoi <- TRUE
function_group <- 10
same_initial_community <- TRUE

# Migrant parameters
migration_size <- .1
migration_period <- 10
migration_event_times <- 20
same_migrant_community <- FALSE

# Program parameters
time_step <- 1
time_stabilization <- 20 # Time limit
replicate <- 20

Model parameters for consumer-resource model

Model parameters for migration

Program parameters

Functions

Build species regional pool

Function build_species is used to build up the consumer characteristics. This function has the following inputs:

This model generates five consumer-specific characteristics as outputs:

# Function that defines species abilities: m, u, w, and stoi
# Default: cross-feeding, same stoichiometric matrix, functional groups
# Function that defines species abilities: m, u, w, and stoi
build_species <- function (
  P = 10, # Number of resource
  S_pool = 1000, # Number of species in the pool
  mi = 0, # Numberic. Death rate
  cross_feeding = TRUE, # Logical. Explicitly include cross feeding in stoichiometric matrix
  same_stoi = TRUE,
  function_group = 10, # Functional group
  species_abundance = "log-normal",
  seed = 1, # Setting seed
  save_data = TRUE 
) {
  # Seed
  set.seed(seed)

  # Death rate, a numeric vector with length of S
  m <- rep(mi, S_pool) 

  # Uptaking rate ----
  if (function_group == 0) {
    # Randomly drawn from uniform distribution, where u sum is 1
    f <- rep(1, S_pool)
    u <- matrix(runif((P - 1) * S_pool), nrow = P-1, ncol = S_pool) %>%
      rbind(0, 1) %>% apply(2, sort)
    u <- matrix(u[-1, ] - u[-(P + 1), ], P, S_pool)

  } else if (is.numeric(function_group)) {
    # See "Emergent property SI" for how to conduct phylogenetic sampling
    library(MCMCpack) # Load this package for dirichlet distribution

    # Empty uptaking rate
    u <- matrix(NA, nrow = P, ncol = S_pool)

    # Family for speices
    f <- rep(1:P, each = floor(S_pool / P))[1:S_pool]

    # Family: concentration parameter for resource in family
    a_family <- rep(NA, S_pool)
    for (i in 1:S_pool) {
      a_preferred <- rnorm(1, 0.4, 0.01) # Preferred resource
      a_other <- runif(P - 1) # Other resources

      u[f[i], i] <- a_preferred
      u[-f[i], i] <- (1 - a_preferred) * a_other / sum(a_other)
      a_family[i] <- a_preferred
    }

    # Specie: total resource capacity 
    t_species <- rnorm(S_pool, 1, 0.01) 

    # Species: proportion of fixed budget of uptake resources
    K_family <- 100 # High K: species are very similar
    for (i in 1: S_pool) u[,i] <- rdirichlet(1, K_family * u[,i])

    # Uptaking rate
    u <- t_species * u

  }

  # Conversion efficiency ----
  w <- matrix(1, P, S_pool)

  # Stoichiometic matrix ----
  if (cross_feeding == FALSE | (cross_feeding == TRUE & P == 1)) { # One resource
    stoi <- array(0, dim = c(P, P, S_pool))
    for (i in 1:S_pool) diag(stoi[,,i]) <- -1
  } else if (cross_feeding == TRUE & same_stoi == FALSE) {
    # Cross-feeding
    stoi <- array(NA, dim = c(P, P, S_pool))
    for (k in 1:S_pool) {
      for (i in 1:P) {
        w_i <- w[,k] / w[i,k] # denoted as ki in the note
        mu <- runif(1)
        d <- runif(P)
        stoi[i,,k] = (mu * d) / c(d %*% w_i)
      }
    }
    for (i in 1:S_pool) diag(stoi[,,i]) <- -1
  } else if (cross_feeding == TRUE & same_stoi == TRUE) {
    # Cross-feeding
    stoi <- array(NA, dim = c(P, P, S_pool))
    for (i in 1:P) {
      w_i <- w[,1] / w[i,1] # denoted as ki in the note
      mu <- runif(1)
      d <- runif(P)
      stoi[i,,1] = (mu * d) / c(d %*% w_i)
    }
    diag(stoi[,,1]) <- -1
    stoi[,,-1] <- stoi[,,1]
  }

  # Species-abundance distribution ----
  abundance_species <- 
    rlnorm(n = 10000, meanlog = 1, sdlog = 2) %>%
    round(2)
  abundance_species <- abundance_species / sum(abundance_species)

  # Save data in RData form ----
  if (save_data) {
    save(m, f, u, w, stoi, abundance_species, 
         file = "parameter/species_pool.RData")
  }
}

build_species(P = P, S_pool = S_pool,
              cross_feeding = cross_feeding,
              function_group = function_group)


# Build species pool
build_species(P = P, S_pool = S_pool)
load("parameter/species_pool.RData")

Species distribution curve. Should determine the sdlog for model.

abundance <-
  data.frame(abundance = rlnorm(n = S_pool, meanlog = 1, sdlog = 1)) %>%
  mutate(abundance = round(abundance, 2), # Round to the second decimal
         abundance_log = log(abundance))

# Plot the distrobution
ggplot(abundance, aes(x = abundance)) +
  geom_histogram(bins = 50) +
  theme_bw()

# 
abundance <- 
  abundance %>%
  mutate(abundance_sum = sum(abundance),
         abundance_proportion = abundance / abundance_sum) %>%
  select(-abundance_sum)

# NT/Nmax
sum(abundance_species) / max(abundance_species)

Ordinary differential equation

The function that defines the ODE for consumer-resource dynamics.

# ODE that calculates the value of the derivatives at each time value
# Use the names of the variables as defined in the vectors above
CR_ode <- function(t, state, parameters) {
  with(as.list(c(state, parameters)), {

    # Create a vector of R
    RR <- sapply(paste0("R", sprintf("%03d", resource)), function(x) eval(parse(text = x)))

    # Create a vector of X
    XX <- sapply(paste0("X", sprintf("%05d", consumer)), function(x) eval(parse(text = x)))

    # Resource derivatives
    dR <- rep(NA, P_func)
    for (j in 1:P_func) {
      dR[j] <- J * (Rin[j] - RR[j]) + sum(diag(t(stoi_func[j, , ]) %*% (u_func * RR)) * XX)
    }

    # Consumer derivatives
    dX <- rep(NA, S_func) 
    for (i in 1:S_func) { 
      dX[i] <- XX[i] * (sum(w_func[, i] * u_func[, i] * RR) - J - m_func[i])
    }

    # Extinction threshold
    #dR[dR <= 1e-5] <- 0
    #dX[dX <= 1e-5] <- 0

    # Return dR and dX
    return(list(c(dR, dX)))
  })
}

Consumer-resource model

Main function for simulation.

Given that total number of resource type $P$ and total number of consumers $S$ in this system, the growth rate of consumer $i$ is defined by $$\frac{dX_i}{dt}=\sum_{j=1}^{P}{X_i w_{j}^{i} u_{j}^{i} R_{j}}-m_iX_i$$ where $X_i$ and $R_j$ are the biomass of consumer $i$ and resource $j$, respectively. $m_i$ is the density-independent loss rate of consumers $j$, $w_{ij}$ is the conversion efficiency of resource $j$ converted into biomass of consumer $i$, and $u_{ij}$ denotes the uptaking rate for consumer $i$ to use resource $j$.

Resource is supplied in chemostat fashion. Let the dynamics of resource $j\ (j=1,\ 2)$ be given by $$\frac{dR_j}{dt}=J(R_{supply,j}-R_j)+\sum_{i=1}^{S}\sum_{k=1}^{P} D_{kj}^i u_{k}^{i} R_k X_i$$ The first term defines the resource supplement from environment external to this system, while the second term denote that from cross-feeding. In the first term, resource is supplied at flow rate $J$ and inflow concentration $R_{supply,j}$. In the second term, $D_{jk}^i$ is a stoichiometric matrix of consumer that describes the metabolic networks of consumer $i$ which uptakes and transforms resource $k$ into secreted resource $j$.

Function CR_model is used to run the CR simulation. This function has the following inputs:

CR_model <- function (
  community = c(initial_community_generate(S = 10), # Consumers
                setNames(1:10, paste0("R", sprint("%03d", 1:10)))), # Resources
  time_limit = 100, # Time limit
  time_step = 0.5
) {
  # Load regional pool
  load("parameter/species_pool.RData")

  # Consumer and resource in the community
  S_biomass <- community[grepl("X", names(community))]
  P_biomass <- community[grepl("R", names(community))]
  consumer <- names(S_biomass) %>% substr(2, 6) %>% as.numeric()
  resource <- names(P_biomass) %>% substr(2, 6) %>% as.numeric()

  # Check input
  stopifnot(!is.null(names(P_biomass)),
            !is.null(names(S_biomass)))

  # Subset consumer characteristics for this dynamics
  u_func <- u[,consumer]
  w_func <- w[,consumer]
  m_func <- m[consumer]
  stoi_func <- stoi[,,consumer]

  # Parameters for ode, a named list with length of 8
  parameters <- list(
    consumer = consumer,
    resource = resource,
    J = J,
    Rin = Rin,
    S_func = length(consumer), # Richness of initial community
    P_func = length(resource), 
    u_func = u_func,
    w_func = w_func,
    m_func = m_func,
    stoi_func = stoi_func
  )

  # Time
  time <- seq(0, time_limit, by = time_step)

  # Initial condition: a named vector with length S + P
  state <- c(P_biomass, S_biomass)

  # Run ode
  dynamics <- ode(y = state, times = time, func = CR_ode, parms = parameters)

  #
  return(dynamics)
}
CR_model <- function (
  community = c(initial_community_generate(S = 10),
         1:10 %>% setNames(paste0("R", 1:10))), # Named vector with biomass as elements and consumer or resource in names.
  P = 10,            # Number of resources
  J = 1,           # Flow rate of chemostat
  time_limit = 100, # Time limit
  time_step = 0.5,
  Rin = rep(10, P) * c(1, rep(0, P-1))
) {
  # Load regional pool
  load("parameter/species_pool.RData")

  #
  S_biomass <- community[grepl("X", names(community))]
  P_biomass <- community[grepl("R", names(community))]
  sp <- names(S_biomass) %>% substr(2, 6) %>% as.numeric()

  # Check input
  stopifnot(P == length(P_biomass),
            !is.null(names(P_biomass)),
            !is.null(names(S_biomass)))

  # Abundance
  S_func <- length(sp)     

  # Abilities of community 
  u_r <- u[,sp]
  w_r <- w[,sp]
  m_r <- m[sp]
  stoi_r <- stoi[,,sp]

  # Parameters for ode, a named list with length of 8
  parameters <- list(
    sp = sp,
    J = J,
    Rin = Rin,
    P = P,
    S = length(sp),
    u = u_r,
    w = w_r,
    m = m_r,
    stoi = stoi_r
  )

  # Time
  time <- seq(0, time_limit, by = time_step)

  # Initial condition: a named vector with length S + P
  state <- c(P_biomass, S_biomass)

  # Run ode
  dynamics <- ode(y = state, times = time, func = CR_ode, parms = parameters)

  #
  return(dynamics)
}

Initial community

Function that generates initial community for each replicate. Same initial community is used across treatments. The function has three inputs:

Where it returns a vector with length of non-duplicated species.

initial_community_generate <- function (
  S_pool = 10000,
  I = 1,  # Inocula drop size
  S = 1000,  # Number of available space in inocula
  same_initial_community = TRUE,
  treatment = 1, # Treatment
  r = 1 # Replicate specific seed
) {
  # 
  load("parameter/species_pool.RData")


  # Start community
  if (same_initial_community == TRUE) {
    # Seed is replicate-specific
    set.seed(r * 100000)

    x <- sample(1:S_pool, S, replace = T, prob = abundance_species) %>%
      table() %>% as.data.frame()

  } else if(same_initial_community == FALSE) {
    # Seed is treatment- and replicate-specific
    set.seed(r * (treatment + 1000) * 100)

    x <- sample(1:S_pool, S, replace = T, prob = abundance_species) %>%
      table() %>% as.data.frame()
  }

  # Transfrom to vector
  x <- (I * x$Freq / S) %>% setNames(paste0("X", sprintf("%05d", as.numeric(as.character(x[,1])))))

  # Return species name and size
  return(x)
}

initial_community_generate(S = 100, treatment = 2, r = 2)

Migrant community

Function that generates migrant communities for treatment and replicates.

The function returns a named vector with length of non-duplicated migrants (<= S * migration_size).

migrant_community_generate <- function (
  S_pool = 10000,
  I = 1, 
  S = 100, # Available space
  migration_size = 0.1, 
  same_migrant_community = FALSE,  # Same communities among treatments
  treatment = 1, # Treatment
  r = 1, # Replicate
  mig = 1 # Migration order
) {
  # 
  load("parameter/species_pool.RData")

  if (same_migrant_community == TRUE) {
    # Seed is replicate-specific
    set.seed(r * (mig + 100) * 100000)

    # Sample from regional pool 
    x <- sample(1:S_pool, round(S * migration_size), replace = T, prob = abundance_species) %>%
      table() %>% as.data.frame()

    # Transfrom to vector
    x <- (I * x$Freq / S) %>% setNames(paste0("X", sprintf("%05d", as.numeric(as.character(x[,1])))))

  } else if (same_migrant_community == FALSE) {
    # Seed is replicate-specific
    set.seed(r * (mig + 100) * (treatment + 1000) * 100)

    # Sample from regional pool 
    x <- sample(1:S_pool, round(S * migration_size), replace = T, prob = abundance_species) %>%
      table() %>% as.data.frame()

    # Transfrom to vector
    x <- (I * x$Freq / S) %>% setNames(paste0("X", sprintf("%05d", as.numeric(as.character(x[,1])))))
  }

  return(x)
}

migrant_community_generate(treatment = 1, r = 2, mig = 2, same_migrant_community = T)
migrant_community_generate(treatment = 1, r = 1, mig = 4, same_migrant_community = F)

Example run

No migration

Example run for CR model.

system.time({
  result <- 
  # Generate initial community
  #initial_community_generate(S_pool = 10000, S = 10, r = 1) %>%
    c(setNames(rep(.1, 10), paste0("X", sprintf("%05d", c(1:5, 101:105))))) %>%
  # Paste the consumer biomass with resource
    c(setNames(rep(1, P), paste0("R", sprintf("%03d", 1:P)))) %>%
  # Run consumer resource model
    CR_model(time_limit = 10, time_step = 1)
})

result %>%
  as.data.frame() %>%
  group_by(time) %>%
  gather(key = variable, value = value, 2:(ncol(.))) %>%
  mutate(type = substr(variable, 1, 1)) %>%
  ggplot(aes(x = time, y = value, color = variable)) +
  geom_line() +
  facet_grid(type~., scale = "free_y")

With migration

Test run with one migration event. Result in list form. Test only run non-extinct species.

# Initiation
system.time({
  result <- 
    # Generate initial community
    initial_community_generate(S_pool = 10000, S = 10, r = 1) %>%
    # Paste the consumer biomass with resource
    c(setNames(rep(1, 10), paste0("R", sprintf("%03d", 1:10)))) %>%
    # Run consumer resource model
    CR_model(time_limit = 10, time_step = 1) %>%
    as.data.frame()
})

# Tail the last time point from previous dynamics chunk
result_tail <- tail(result, 1)

# Migrant community, gathered
sp_mig_gather <- 
  migrant_community_generate(S_pool = 10000, S = 10, I = 1, 
                             migration_size = 1, same_migrant_community = F,
                             treatment = 1, r = 1, mig = 1) %>%
  # Gather data from named factor to a data frame with column number = 1
  data.frame(key = as.character(names(.)), value = .) %>%
  mutate(key = as.character(key))


# Resident community without extinct species, gathered 
result_tail_gather <- 
  gather(result_tail) %>%
  # Extinction threshold; biomass smaller than 10-5 is set to 0
  filter(value >= 1e-5) 

# Join residential and migrant community 
community_joined <- 
  full_join(result_tail_gather, sp_mig_gather, 
            by = c("key", "value")) %>%
  group_by(key) %>%
  summarize(value = sum(value))

# Reshape the joined community into a named vector
community_joined <- setNames(community_joined$value, community_joined$key)

# The second chunk of consumer-resource model
system.time({
  result_mig <- CR_model(community_joined, time_limit = 10, time_step = 1)
})

Multiple migration events.

migration_event_times <- 3
migration_period <- 50 # Time limit for one chunk
time_stabilization <- 1
S = 100
treatment <- 1 # Treatment = 1
r <- 1 # Replicate = 1


# Create a empty data frame for saving data
result_list <- rep(list(NULL), migration_event_times)

# Initiation
result <- 
  # Generate initial community
  initial_community_generate(S_pool = 10000, S = S, r = 1) %>%
  # Paste the consumer biomass with resource
  c(setNames(rep(1, 10), paste0("R", sprintf("%03d", 1:10)))) %>%
  # Run consumer resource model
  CR_model(time_limit = migration_period, time_step = 1) %>%
  as.data.frame()

result_list[[1]] <- result

mig <- 1

pt <- proc.time()

# Execute loop
for (mig in 1:migration_event_times) {
  # Time the processing
  pt1 <- proc.time()

  # Tail the last time point from previous dynamics chunk
  result_tail <- tail(result, 1)


# Migrant community
sp_mig_gather <- 
  migrant_community_generate(S_pool = 10000, S = S, I = 1, 
                             migration_size = 1, same_migrant_community = F,
                             treatment = 1, r = 1, mig = mig) %>%
  # Gather data from named factor to a data frame with column number = 1
  data.frame(key = as.character(names(.)), value = .) %>%
  mutate(key = as.character(key))


# Resident community without extinct species, gathered 
result_tail_gather <- 
  gather(result_tail) %>%
  # Extinction threshold; biomass smaller than 10-5 is set to 0
  filter(value >= 1e-5) 

# Join residential and migrant community 
community_joined <- 
  full_join(result_tail_gather, sp_mig_gather) %>%
  group_by(key) %>%
  summarize(value = sum(value))

# Reshape the joined community into a named vector
community_joined <- setNames(community_joined$value, community_joined$key)

# The second chunk of consumer-resource model
  # Run consumer-resource dynamics 
  if (mig == migration_event_times) { # Last migration and stabilization
    result <- 
      CR_model(community_joined, time_limit = migration_period * time_stabilization, time_step = 1) %>%
      as.data.frame()

  } else {
    result <- 
      CR_model(community_joined, time_limit = migration_period, time_step = 1) %>%
      as.data.frame()
  }

  # Save the chunk result to a list
  result_list[[(mig + 1)]] <- 
    result %>% 
    mutate(time = time + mig * migration_period) %>%
    # Slice the time at migration_period to prevent duplicated points
    slice(1:(nrow(.)-1))

  # Print progress
  cat(paste0("mig = ", mig, "\n"))
  cat(paste0((proc.time() - pt)[3], "sec \n"))
  cat(paste0((proc.time() - pt1)[3], "sec \n"))
}

Melt and merge dynamics chunks.

# Name the result list 
names(result_list) <- 0:migration_event_times

# Melt and merge the result chunks
result_merged <- 
  lapply(result_list, function (x) melt(x, id.var = "time")) %>%
  rbindlist()


# Plot
result_merged %>%
  # Categorize into R or X type
  mutate(type = substr(variable, 1, 1)) %>%
  ggplot(aes(x = time, y = value, color = variable)) + 
  geom_line() +
  geom_vline(xintercept = seq(migration_period,
                              migration_event_times * migration_period, 
                              migration_period)) +
  theme_bw() +
  theme(legend.position = "none") +
  facet_grid(type~.)


Chang-Yu-Chang/MigrationCommunity documentation built on Aug. 13, 2019, 9:41 p.m.