#| echo=FALSE

# inspired by https://www.jumpingrivers.com/blog/knitr-default-options-settings-hooks/
knitr::opts_chunk$set(
  echo = FALSE,
  cache = TRUE,
  cache.path = "cache/apply_model_tQ_ChABC_design/",
  fig.path = "apply_model_tQ_ChABC_design_files/",
  fig.retina = 2, # Control using dpi
  fig.width = 6,  # generated images
  fig.height = 5, # generated images
  fig.pos = "t",  # pdf mode
  fig.align = "center",
  dpi = if (knitr::is_latex_output()) 72 else 300, 
  out.width = "100%")
suppressWarnings(suppressMessages(library(tidyverse)))
library(BayesPharma)
library(GeomIndicator)
suppressWarnings(suppressMessages(library(tidybayes)))

Chondroitinase ABC (ChABC) is a bacterial enzyme that degrades several types of proteoglycans. While natively, it is secreted to digest host tissue to facilitate nutrient acquisition, it has been repurposed as a tool for glycomics and as a therapeutic to treat scar formation, such post-stroke recovery. A limitation of the widely used ChABC Proteus vulgaris for use in biotechnology and medicine is that it is unstable and has a short half life. To overcome this, several groups have tried to stabilize ChABC while maintaining or improving enzyme efficiency. For example, in (Hettiaratchi, et al., 2020), they used Protein One Stop Shop (PROSS) to generate 3 designs ranging from XX-YY number of mutations and measured the enzyme efficiency of the enzyme to break down the substrate Chondroitan A.

#| echo=FALSE
data_path <- here::here(
  "inst", "extdata", "enzyme_kinetics", "Hettiaratchi_2020.xlsx")

data <- tibble::tibble(
  sheet_name = readxl::excel_sheets(data_path)[2:7]) |>
  dplyr::rowwise() |>
  dplyr::do({
    sheet_name <- .$sheet_name
    data <- readxl::read_excel(
      path = data_path,
      sheet = sheet_name,
      range = "R2C2:R65C18",
      col_names = TRUE,
      .name_repair = ~ paste0("column_", 1:length(.))) |>
      dplyr::mutate(
        substrate_concentration_mg_ml = sheet_name |>
          stringr::str_extract("^[0-9.]+") |>
          as.numeric(),
          .before = 1)
  }) |>
  dplyr::ungroup()

names(data)[seq(4, 16, by = 3)] <- paste0(
  data[1,2:16] |> as.character() |> na.omit(), "_1")
names(data)[seq(5, 17, by = 3)] <- paste0(
  data[1,2:16] |> as.character() |> na.omit(), "_2")
names(data)[seq(6, 18, by = 3)] <- paste0(
  data[1,2:16] |> as.character() |> na.omit(), "_3")
names(data)[2] <- "time_s"
names(data)[3] <- "temperature_C"

data <- data |>
  dplyr::filter(
    !is.na(time_s),
    time_s != "Time [s]") |>
  tidyr::pivot_longer(
    cols = -c("substrate_concentration_mg_ml", "time_s", "temperature_C"),
    names_to = "treatment_replica",
    values_to = "product_concentration") |>
  tidyr::separate(
    col = treatment_replica,
    into = c("treatment", "replica"),
    sep = "_") |>
  dplyr::mutate(
    replica = as.factor(replica),
    time_s = as.numeric(time_s),
    temperature_C = as.numeric(temperature_C),
    product_concentration = ifelse(
      product_concentration == "OVER",
      NA, product_concentration),
    product_concentration = as.numeric(product_concentration))

baseline <- data |>
  dplyr::filter(treatment == "Blank") |>
  dplyr::group_by(substrate_concentration_mg_ml, time_s) |>
  dplyr::summarize(
    mean_baseline = mean(product_concentration, na.rm=TRUE),
    .groups = "drop")

data <- data |>
  dplyr::left_join(
    baseline,
    by = c("substrate_concentration_mg_ml", "time_s")) |>
  dplyr::mutate(
    normalized_product_concentration = product_concentration - mean_baseline)
ggplot2::ggplot(
  data = data |>
    dplyr::filter(
      treatment != "Blank",
      !is.na(product_concentration))) + 
  ggplot2::theme_bw() +
  ggplot2::geom_line(
    mapping = ggplot2::aes(
      x = time_s,
      y = normalized_product_concentration,
      color = log(substrate_concentration_mg_ml),
      group = paste0(substrate_concentration_mg_ml, "_", replica))) +
  ggplot2::geom_smooth(
    data = data |>
      dplyr::filter(
        treatment != "Blank",
        time_s <= 200),
    mapping = ggplot2::aes(
      x = time_s,
      y = normalized_product_concentration,
      group = paste0(substrate_concentration_mg_ml, "_", replica)),
    method = "lm",
    formula = y ~ x,
    color = "orange") +
  ggplot2::facet_wrap(
    facets = dplyr::vars(treatment),
    nrow = 1) +
  ggplot2::theme(legend.position = "bottom") +
  ggplot2::scale_color_continuous(
    "Substrate mg/ml",
    breaks = log(c(0.5, 1, 2.5, 5, 10, 15)),
    labels = c("0.5", "1", "2.5", "5", "10", "15")) +
  ggplot2::scale_x_continuous("Time (s)") +
  ggplot2::scale_y_continuous(
    "Product mg/ml",
    breaks = c(0.5, 1, 2.5, 5, 10, 15),
    labels = c("0.5", "1", "2.5", "5", "10", "15")) 
data_simulate <- tidyr::expand_grid(
  kcat = 0.0072,
  kM =  0.0448,
  ET = 1,
  ST = c(0.5, 1, 2.5, 5, 10, 15)) |>
  dplyr::mutate(series_index = dplyr::row_number()) |>
  dplyr::rowwise() |>
  dplyr::do({
    data <- .
    time <- seq(0, 1200, by = 20)
    data <- data.frame(data,
      time = time,
      P = BayesPharma::tQ_model_generate(
        time = time,
        kcat = data$kcat,
        kM = data$kM,
        ET = data$ET,
        ST = data$ST)[,2])
  })

ggplot2::ggplot(data = data_simulate) + 
  ggplot2::theme_bw() +
  ggplot2::geom_line(
    mapping = ggplot2::aes(
      x = time,
      y = 0.8 + 0.16*P,
      color = log(ST),
      group = ST)) +
  ggplot2::theme(legend.position = "bottom") +
  ggplot2::scale_color_continuous(
    "Substrate mg/ml:",
    breaks = log(c(0.5, 1, 2.5, 5, 10, 15)),
    labels = c("0.5", "1", "2.5", "5", "10", "15")) +
  ggplot2::scale_x_continuous("Time (s)") +
  ggplot2::scale_y_continuous(
    "Product mg/ml:",
    breaks = c(0.5, 1, 2.5, 5, 10, 15),
    labels = c("0.5", "1", "2.5", "5", "10", "15"))
model_data <- data |>
  dplyr::arrange(
    treatment,
    substrate_concentration_mg_ml,
    replica,
    time_s)

model_data <- model_data |>
  dplyr::left_join(
    model_data |>
      dplyr::distinct(treatment, substrate_concentration_mg_ml, replica) |>
      dplyr::mutate(series_index = dplyr::row_number()),
    by = c("treatment", "substrate_concentration_mg_ml", "replica")) |>
  dplyr::filter(time_s > 0) |>
  dplyr::filter(!is.na(normalized_product_concentration)) |>
  dplyr::transmute(
    series_index,
    treatment,
    ET = 1,
    ST = substrate_concentration_mg_ml,
    P = product_concentration,
    time = time_s)

model_data <- model_data |>
  dplyr::filter(treatment != "Blank")

model_aa <- BayesPharma::tQ_model(
  data = model_data,
  formula = BayesPharma::tQ_formula(predictors = 0 + treatment),
  prior = BayesPharma::tQ_prior(
    kcat = brms::prior(prior = gamma(1, 200), lb = 0, nlpar = "kcat"),
    kM = brms::prior(prior = gamma(2, 0.2), lb = 0, nlpar = "kM")),
  init = BayesPharma::tQ_init(
    kcat = \() stats::runif(n = 1, 0.001, 0.1),
    kM = \() stats::runif(n = 1, 1, 20),
    sigma = \() stats::runif(n = 1, 0, 2)),
  cores = 8,
  threads = 2,
  control = NULL,
  backend="cmdstanr",
  expose_functions = FALSE)



# trainable offset paramter
model_formula <- brms::brmsformula(
    P ~ tQ_multiple(series_index, time + offset, kcat, kM, ET, ST),
    kcat + kM ~ 0 + treatment,
    offset ~ 1,
    nl = TRUE,
    loop = FALSE)
model_formula$bayes_pharma_info <- list(
    formula_type = "tQ",
    series_index_variable = "series_index",
    treatment_variable = "time",
    treatment_units = "seconds",
    ET_variable = "ET",
    ET_units = "mg/ml",
    ST_variable = "ST",
    ST_units = "mg/ml",
    response_variable = "P",
    response_units = "mg/ml")


model_offset <- BayesPharma::tQ_model(
  data = model_data,
  formula = model_formula,
  prior = BayesPharma::tQ_prior(
    kcat = brms::prior(prior = gamma(1, 200), lb = 0, nlpar = "kcat"),
    kM = brms::prior(prior = gamma(2, 0.2), lb = 0, nlpar = "kM"),
    offset = brms::prior(prior = normal(0, 5), nlpar = "offset"),
    sigma = brms::prior(prior = student_t(3, 0, 2.5), class = "sigma")),
  init = list(
    b_kcat = \() stats::runif(n = 1, 0.001, 0.1),
    b_kM = \() stats::runif(n = 1, 1, 20),
    b_offset = \() stats::runif(n = 1, -2, 2),
    sigma = \() stats::runif(n = 1, 0, 2)) |>
    "class<-"("bpinit"),
  cores = 8,
  threads = 2,
  control = NULL,
  backend="cmdstanr",
  expose_functions = FALSE)

model_offset_rstan <- BayesPharma::tQ_model(
  data = model_data,
  formula = model_formula,
  prior = BayesPharma::tQ_prior(
    kcat = brms::prior(prior = gamma(1, 200), lb = 0, nlpar = "kcat"),
    kM = brms::prior(prior = gamma(2, 0.2), lb = 0, nlpar = "kM"),
    offset = brms::prior(prior = normal(0, 5), nlpar = "offset"),
    sigma = brms::prior(prior = student_t(3, 0, 2.5), class = "sigma")),
  init = list(
    b_kcat = \() stats::runif(n = 1, 0.001, 0.1),
    b_kM = \() stats::runif(n = 1, 1, 20),
    b_offset = \() stats::runif(n = 1, -2, 2),
    sigma = \() stats::runif(n = 1, 0, 2)) |>
    "class<-"("bpinit"),
  chains = 0,
  cores = 8,
  threads = 2,
  control = NULL,
  expose_functions = FALSE)
model_offset |>
  BayesPharma::plot_prior_posterior_densities()

model <- model_offset

model_prior <- model |>
  update(sample_prior = "only")

draws_prior <- model_prior |>
  tidybayes::tidy_draws() |>
  tidybayes::gather_variables() |>
  dplyr::filter(.variable |> stringr::str_detect("b_(kcat|kM)")) |>
  tidyr::separate(.variable, into = c("class", "parameter", "treatment"), sep = "_") |>
  dplyr::mutate(treatment = treatment |> stringr::str_replace("treatment", ""))

draws_posterior <- model |>
  tidybayes::tidy_draws() |>
  tidybayes::gather_variables() |>
  dplyr::filter(.variable |> stringr::str_detect("b_(kcat|kM)")) |>
  tidyr::separate(.variable, into = c("class", "parameter", "treatment"), sep = "_") |>
  dplyr::mutate(treatment = treatment |> stringr::str_replace("treatment", ""))


draws_prior_pairs <- draws_prior |>
  tidyr::pivot_wider(
    id_cols = c(".chain", ".iteration", ".draw", "treatment"),
    names_from = "parameter",
    values_from = ".value")

draws_posterior_pairs <- draws_posterior |>
  tidyr::pivot_wider(
    id_cols = c(".chain", ".iteration", ".draw", "treatment"),
    names_from = "parameter",
    values_from = ".value")

ggplot2::ggplot() +
  ggplot2::theme_bw() +
    ggplot2::geom_point(
      data = draws_prior_pairs,
      mapping = ggplot2::aes(
        x = log10(kcat),
        y = log10(kM)),
      color = "orange",
      size = .8,
      shape = 16,
      alpha = .3) +  
    ggplot2::geom_point(
      data = draws_posterior_pairs,
      mapping = ggplot2::aes(
        x = log10(kcat),
        y = log10(kM)),
      color = "blue",
      size = .8,
      shape = 16,
      alpha = .3) +
  ggplot2::scale_x_continuous("Catalytic constant log10(kcat)") +
  ggplot2::scale_y_continuous("Michaelis constant log10(kM)") +
  ggplot2::facet_wrap(facets = dplyr::vars(treatment), nrow = 1)
model <- model_offset


newdata_levels <- model$data |>
    dplyr::select(tidyselect::any_of(c("treatment", "ET", "ST", "time"))) |>
    as.list() |>
    purrr::map(unique)
newdata_levels$time = seq(-120, 1200, 100)
newdata <- do.call(tidyr::expand_grid, newdata_levels)

# add series_index
newdata <- newdata |>
  dplyr::left_join(
    newdata |>
      dplyr::distinct(treatment, ET, ST) |>
      dplyr::mutate(series_index = dplyr::row_number()),
    by = c("treatment", "ET", "ST"))

pp_data <- model |>
  tidybayes::add_predicted_draws(
    newdata = newdata,
    value = "P",
    re_formula = NA,
    ndraws = 200)  |>
  ggdist:: median_qi(.width = c(.5, .8, .95))

ggplot2::ggplot() +
  ggplot2::theme_bw() +
  ggplot2::theme(legend.position = "bottom") +
  ggplot2::scale_fill_discrete(
    "Median Quantile Interval",
    labels = c("95%", "80%", "50%")) +
  ggdist::geom_lineribbon(
    data = pp_data,
    mapping = ggplot2::aes(
      x = .data[["time"]],
      y = .data[["P"]],
      ymin = .data[[".lower"]],
      ymax = .data[[".upper"]],
      color = log10(.data[["ST"]]),
      group = .data[["ST"]]),
    alpha = .15) +
  ggplot2::geom_point(
    data = model$data,
    mapping = ggplot2::aes(
      x = .data[["time"]],
      y = .data[["P"]],
      color = log10(.data[["ST"]])),
    size = 0.8,
    alpha = 0.7) +
  ggplot2::facet_wrap(facets = ~treatment) +
  ggplot2::labs(title = "Predicted tQ Enzyme Progress Curve") +
  ggplot2::scale_color_viridis_c(
    "Substrate Concentration (uM)",
    labels = c("0.5", "2.5", "15"),
    breaks = log10(c(0.5, 2.5, 15)),
    begin = 0.2, end = 0.9) +
  ggplot2::scale_y_continuous("Product Concentration") +
  ggplot2::scale_x_continuous("Time (s)")

ggplot2::ggsave(
  filename = "/tmp/model_offset_posterior_draws.pdf",
  width = 10,
  height = 6,
  useDingbats = FALSE)


maomlab/BayesPharma documentation built on Aug. 24, 2024, 8:45 a.m.