In pratikunterwegs/patho-move-evol: Analysis code for the _pathomove_ model.

Scenario 1: Evolution of Movement Types

library(data.table)
library(stringr)
library(ggplot2)
library(glue)

Read data

# list all files
files <- list.files(
  path = "data/output",
  pattern = "Rds",
  full.names = TRUE
)

Process each file

# where to send output
gen_data_path <- "data/results/gen_data"
morph_data_path <- "data/results/morph_data"
si_imp_path <- "data/results/si_imp_data"
move_assoc_data_path <- "data/results/move_assoc_data"

# make directories if non existent
lapply(
  list(gen_data_path, morph_data_path, si_imp_path, move_assoc_data_path),
  function(p) {
    if (!dir.exists(p)) {
      # message
      message(
        glue(
          "creating filepath {p}"
        )
      )
      dir.create(p)
    }
  }
)

List parameter files and link with output files.

parameters <- list.files(
  "data/parameters/",
  pattern = "csv", full.names = TRUE
)

# bind together
parameters <- lapply(parameters, fread) |> rbindlist(fill = TRUE)

# link with output
output <- data.table(
  file = files,
  seed = as.integer(str_extract(files, "\\d{10}"))
)

output <- merge(
  output, parameters,
  by = "seed"
)

# use a loop because why not and don't want to load purrr for walk
for (i in seq_len(nrow(output))) {

  # load data
  data <- readRDS(output$file[i])
  # scenario tag
  scenario_tag <- data$scenario_tag

  # prepare generation level data
  gen_data <- pathomove::get_trait_data(
    data$output,
    scaled_preferences = TRUE
  )

  #### prepare individual level data ####
  popsize <- output$popsize[i]

  # get social strategy and social information importance
  gen_data <- pathomove::get_social_strategy(gen_data)
  gen_data <- pathomove::get_si_importance(gen_data)

  # process by social strategy, summarise movement, intake and association
  data_strategy <- gen_data[, list(
    N = .N,
    mean_move = mean(moved),
    mean_assoc = mean(assoc),
    mean_intake = mean(intake),
    mean_energy = mean(energy),
    prop_infec = sum(t_infec > 0) / length(t_infec)
  ), by = c("gen", "social_strat")]

  # calculate the number of associations for each movement distance
  gen_data[, moved := floor(moved)]
  data_strategy_move_assoc <- gen_data[, list(
    mean_assoc = mean(assoc),
    mean_intake = mean(intake),
    n_infected = length(t_infec > 0)
  ), by = c("gen", "social_strat", "moved")]

  # count si_importance morphs
  gen_data[, si_imp := plyr::round_any(si_imp, 0.01)]
  data_si_imp <- gen_data[, list(
    N = .N
  ), by = c("gen", "si_imp")]

  gen_data <- gen_data[, unlist(
    lapply(.SD, function(x) {
      list(
        mean = mean(x),
        sd = sd(x)
      )
    }),
    recursive = FALSE
  ),
  .SDcols = c("energy", "intake", "moved", "assoc"),
  by = c("gen")
  ]

  # add simulation parameters
  params <- as.list(output[i, -c("seed", "file")])
  invisible(
    lapply(
      list(data_strategy, gen_data, data_si_imp, data_strategy_move_assoc),
      function(df) {
        df[, names(params) := params]
        df[, scenario_tag := scenario_tag]
      }
    )
  )

  # save data
  invisible(
    Map(
      list(
        data_strategy, data_strategy_move_assoc, gen_data,
        data_si_imp
      ),
      list(
        morph_data_path, move_assoc_data_path, gen_data_path,
        si_imp_path
      ),
      f = function(df, p) {
        fwrite(
          df,
          file = glue(
            "{p}/data_gen_{scenario_tag}_{output$seed[i]}.csv"
          )
        )
      }
    )
  )
}

files <- list.files("data/output", pattern = "default", full.names = TRUE)
data <- readRDS(files[42])
output <- data$output

output@agent_parameters
output@eco_parameters

a <- pathomove::get_trait_data(output)
pathomove::get_social_strategy(a)
pathomove::get_si_importance(a)

b <- a[, .N, by = c("gen", "social_strat")]
ggplot(b) +
  geom_col(
    aes(gen, N, fill = social_strat),
    width = 5
  )

ggplot(
  a[gen %between% c(3000, 3500), ],
  aes(moved, assoc, col = social_strat)
) +
  scale_y_log10() +
  geom_jitter(size = 0.1)

Infections per generation

# load each file and get parameters and infections over generations
data_infections_gen <- lapply(
  seq_len(nrow(output)), function(i) {
    # load data
    data <- readRDS(output$file[i])
    scenario_tag <- data$scenario_tag

    # generations and infections
    n_infected <- data$output@infections_per_gen
    gens <- seq(0, max(data$output@generations))

    # parameters
    params <- as.list(output[i, -c("file", "seed")])

    df <- data.table(
      gen = gens,
      n_infected = n_infected,
      scenario_tag = scenario_tag
    )
    df[, names(params) := params]
  }
)

# combine data and save
data_infections_gen <- rbindlist(data_infections_gen, fill = TRUE)

# save data
fwrite(
  data_infections_gen,
  file = "data/results/data_infections_gen.csv"
)

Exploratory visualisation

# visualise infections in the case of persistent introduction
data_persistent <- data_infections_gen[scenario == 1]
data_persistent[, scenario := fcase(
  infect_percent == 1, "percent",
  infect_percent == 0, "absolute"
)]

data_persistent |>
  ggplot() +
  geom_line(
    aes(gen, n_infected, col = as.factor(cost), group = repl),
    alpha = 0.2
    # binwidth = c(100, NA),
    # geom = "line"
  ) +
  # scale_x_log10() +
  facet_grid(cost ~ regen)

# visualise infection in the case of sporadic spillover
data_sporadic <- data_infections_gen[scenario == 3]

ggplot(data_sporadic) +
  stat_summary(
    aes(gen, n_infected, col = as.factor(spillover_rate)),
    binwidth = c(100, NA),
    geom = "line"
  ) +
  facet_grid(~dispersal)

sc_vertical <- data_infections_gen[scenario_tag == "vertical", ]
# is the pathogen eliminated
sc_vertical[, pathogen_eliminated := any(
  n_infected[(gen > 500) & (gen < 700)] == 0
),
by = c("cost", "p_v_transmit", "repl")
]

ggplot(sc_vertical) +
  geom_path(
    aes(gen, n_infected, group = repl, col = pathogen_eliminated),
    alpha = 0.5
  ) +
  facet_grid(
    p_v_transmit ~ cost,
    labeller = label_both
  ) +
  colorspace::scale_colour_discrete_sequential(
    palette = "Red-Yellow",
    rev = FALSE,
    l1 = 10, l2 = 80,
    # limits = c(FALSE),
    na.value = "lightblue"
  ) +
  theme_test() +
  xlim(
    490, 600
  )

data_infections_gen[scenario_tag == "vertical", ] |>
  ggplot(aes(gen, n_infected)) +
  geom_point(size = 0.2) +
  facet_grid(p_v_transmit ~ costInfect)