#| echo=FALSE # inspired by https://www.jumpingrivers.com/blog/knitr-default-options-settings-hooks/ knitr::opts_chunk$set( echo = FALSE, cache = TRUE, cache.path = "cache/apply_model_tQ_ChABC_design/", fig.path = "apply_model_tQ_ChABC_design_files/", fig.retina = 2, # Control using dpi fig.width = 6, # generated images fig.height = 5, # generated images fig.pos = "t", # pdf mode fig.align = "center", dpi = if (knitr::is_latex_output()) 72 else 300, out.width = "100%")
suppressWarnings(suppressMessages(library(tidyverse))) library(BayesPharma) library(GeomIndicator) suppressWarnings(suppressMessages(library(tidybayes)))
Chondroitinase ABC (ChABC) is a bacterial enzyme that degrades several types of proteoglycans. While natively, it is secreted to digest host tissue to facilitate nutrient acquisition, it has been repurposed as a tool for glycomics and as a therapeutic to treat scar formation, such post-stroke recovery. A limitation of the widely used ChABC Proteus vulgaris for use in biotechnology and medicine is that it is unstable and has a short half life. To overcome this, several groups have tried to stabilize ChABC while maintaining or improving enzyme efficiency. For example, in (Hettiaratchi, et al., 2020), they used Protein One Stop Shop (PROSS) to generate 3 designs ranging from XX-YY number of mutations and measured the enzyme efficiency of the enzyme to break down the substrate Chondroitan A.
#| echo=FALSE data_path <- here::here( "inst", "extdata", "enzyme_kinetics", "Hettiaratchi_2020.xlsx") data <- tibble::tibble( sheet_name = readxl::excel_sheets(data_path)[2:7]) |> dplyr::rowwise() |> dplyr::do({ sheet_name <- .$sheet_name data <- readxl::read_excel( path = data_path, sheet = sheet_name, range = "R2C2:R65C18", col_names = TRUE, .name_repair = ~ paste0("column_", 1:length(.))) |> dplyr::mutate( substrate_concentration_mg_ml = sheet_name |> stringr::str_extract("^[0-9.]+") |> as.numeric(), .before = 1) }) |> dplyr::ungroup() names(data)[seq(4, 16, by = 3)] <- paste0( data[1,2:16] |> as.character() |> na.omit(), "_1") names(data)[seq(5, 17, by = 3)] <- paste0( data[1,2:16] |> as.character() |> na.omit(), "_2") names(data)[seq(6, 18, by = 3)] <- paste0( data[1,2:16] |> as.character() |> na.omit(), "_3") names(data)[2] <- "time_s" names(data)[3] <- "temperature_C" data <- data |> dplyr::filter( !is.na(time_s), time_s != "Time [s]") |> tidyr::pivot_longer( cols = -c("substrate_concentration_mg_ml", "time_s", "temperature_C"), names_to = "treatment_replica", values_to = "product_concentration") |> tidyr::separate( col = treatment_replica, into = c("treatment", "replica"), sep = "_") |> dplyr::mutate( replica = as.factor(replica), time_s = as.numeric(time_s), temperature_C = as.numeric(temperature_C), product_concentration = ifelse( product_concentration == "OVER", NA, product_concentration), product_concentration = as.numeric(product_concentration)) baseline <- data |> dplyr::filter(treatment == "Blank") |> dplyr::group_by(substrate_concentration_mg_ml, time_s) |> dplyr::summarize( mean_baseline = mean(product_concentration, na.rm=TRUE), .groups = "drop") data <- data |> dplyr::left_join( baseline, by = c("substrate_concentration_mg_ml", "time_s")) |> dplyr::mutate( normalized_product_concentration = product_concentration - mean_baseline)
ggplot2::ggplot( data = data |> dplyr::filter( treatment != "Blank", !is.na(product_concentration))) + ggplot2::theme_bw() + ggplot2::geom_line( mapping = ggplot2::aes( x = time_s, y = normalized_product_concentration, color = log(substrate_concentration_mg_ml), group = paste0(substrate_concentration_mg_ml, "_", replica))) + ggplot2::geom_smooth( data = data |> dplyr::filter( treatment != "Blank", time_s <= 200), mapping = ggplot2::aes( x = time_s, y = normalized_product_concentration, group = paste0(substrate_concentration_mg_ml, "_", replica)), method = "lm", formula = y ~ x, color = "orange") + ggplot2::facet_wrap( facets = dplyr::vars(treatment), nrow = 1) + ggplot2::theme(legend.position = "bottom") + ggplot2::scale_color_continuous( "Substrate mg/ml", breaks = log(c(0.5, 1, 2.5, 5, 10, 15)), labels = c("0.5", "1", "2.5", "5", "10", "15")) + ggplot2::scale_x_continuous("Time (s)") + ggplot2::scale_y_continuous( "Product mg/ml", breaks = c(0.5, 1, 2.5, 5, 10, 15), labels = c("0.5", "1", "2.5", "5", "10", "15"))
data_simulate <- tidyr::expand_grid( kcat = 0.0072, kM = 0.0448, ET = 1, ST = c(0.5, 1, 2.5, 5, 10, 15)) |> dplyr::mutate(series_index = dplyr::row_number()) |> dplyr::rowwise() |> dplyr::do({ data <- . time <- seq(0, 1200, by = 20) data <- data.frame(data, time = time, P = BayesPharma::tQ_model_generate( time = time, kcat = data$kcat, kM = data$kM, ET = data$ET, ST = data$ST)[,2]) }) ggplot2::ggplot(data = data_simulate) + ggplot2::theme_bw() + ggplot2::geom_line( mapping = ggplot2::aes( x = time, y = 0.8 + 0.16*P, color = log(ST), group = ST)) + ggplot2::theme(legend.position = "bottom") + ggplot2::scale_color_continuous( "Substrate mg/ml:", breaks = log(c(0.5, 1, 2.5, 5, 10, 15)), labels = c("0.5", "1", "2.5", "5", "10", "15")) + ggplot2::scale_x_continuous("Time (s)") + ggplot2::scale_y_continuous( "Product mg/ml:", breaks = c(0.5, 1, 2.5, 5, 10, 15), labels = c("0.5", "1", "2.5", "5", "10", "15"))
model_data <- data |> dplyr::arrange( treatment, substrate_concentration_mg_ml, replica, time_s) model_data <- model_data |> dplyr::left_join( model_data |> dplyr::distinct(treatment, substrate_concentration_mg_ml, replica) |> dplyr::mutate(series_index = dplyr::row_number()), by = c("treatment", "substrate_concentration_mg_ml", "replica")) |> dplyr::filter(time_s > 0) |> dplyr::filter(!is.na(normalized_product_concentration)) |> dplyr::transmute( series_index, treatment, ET = 1, ST = substrate_concentration_mg_ml, P = product_concentration, time = time_s) model_data <- model_data |> dplyr::filter(treatment != "Blank") model_aa <- BayesPharma::tQ_model( data = model_data, formula = BayesPharma::tQ_formula(predictors = 0 + treatment), prior = BayesPharma::tQ_prior( kcat = brms::prior(prior = gamma(1, 200), lb = 0, nlpar = "kcat"), kM = brms::prior(prior = gamma(2, 0.2), lb = 0, nlpar = "kM")), init = BayesPharma::tQ_init( kcat = \() stats::runif(n = 1, 0.001, 0.1), kM = \() stats::runif(n = 1, 1, 20), sigma = \() stats::runif(n = 1, 0, 2)), cores = 8, threads = 2, control = NULL, backend="cmdstanr", expose_functions = FALSE) # trainable offset paramter model_formula <- brms::brmsformula( P ~ tQ_multiple(series_index, time + offset, kcat, kM, ET, ST), kcat + kM ~ 0 + treatment, offset ~ 1, nl = TRUE, loop = FALSE) model_formula$bayes_pharma_info <- list( formula_type = "tQ", series_index_variable = "series_index", treatment_variable = "time", treatment_units = "seconds", ET_variable = "ET", ET_units = "mg/ml", ST_variable = "ST", ST_units = "mg/ml", response_variable = "P", response_units = "mg/ml") model_offset <- BayesPharma::tQ_model( data = model_data, formula = model_formula, prior = BayesPharma::tQ_prior( kcat = brms::prior(prior = gamma(1, 200), lb = 0, nlpar = "kcat"), kM = brms::prior(prior = gamma(2, 0.2), lb = 0, nlpar = "kM"), offset = brms::prior(prior = normal(0, 5), nlpar = "offset"), sigma = brms::prior(prior = student_t(3, 0, 2.5), class = "sigma")), init = list( b_kcat = \() stats::runif(n = 1, 0.001, 0.1), b_kM = \() stats::runif(n = 1, 1, 20), b_offset = \() stats::runif(n = 1, -2, 2), sigma = \() stats::runif(n = 1, 0, 2)) |> "class<-"("bpinit"), cores = 8, threads = 2, control = NULL, backend="cmdstanr", expose_functions = FALSE) model_offset_rstan <- BayesPharma::tQ_model( data = model_data, formula = model_formula, prior = BayesPharma::tQ_prior( kcat = brms::prior(prior = gamma(1, 200), lb = 0, nlpar = "kcat"), kM = brms::prior(prior = gamma(2, 0.2), lb = 0, nlpar = "kM"), offset = brms::prior(prior = normal(0, 5), nlpar = "offset"), sigma = brms::prior(prior = student_t(3, 0, 2.5), class = "sigma")), init = list( b_kcat = \() stats::runif(n = 1, 0.001, 0.1), b_kM = \() stats::runif(n = 1, 1, 20), b_offset = \() stats::runif(n = 1, -2, 2), sigma = \() stats::runif(n = 1, 0, 2)) |> "class<-"("bpinit"), chains = 0, cores = 8, threads = 2, control = NULL, expose_functions = FALSE)
model_offset |> BayesPharma::plot_prior_posterior_densities()
model <- model_offset model_prior <- model |> update(sample_prior = "only") draws_prior <- model_prior |> tidybayes::tidy_draws() |> tidybayes::gather_variables() |> dplyr::filter(.variable |> stringr::str_detect("b_(kcat|kM)")) |> tidyr::separate(.variable, into = c("class", "parameter", "treatment"), sep = "_") |> dplyr::mutate(treatment = treatment |> stringr::str_replace("treatment", "")) draws_posterior <- model |> tidybayes::tidy_draws() |> tidybayes::gather_variables() |> dplyr::filter(.variable |> stringr::str_detect("b_(kcat|kM)")) |> tidyr::separate(.variable, into = c("class", "parameter", "treatment"), sep = "_") |> dplyr::mutate(treatment = treatment |> stringr::str_replace("treatment", "")) draws_prior_pairs <- draws_prior |> tidyr::pivot_wider( id_cols = c(".chain", ".iteration", ".draw", "treatment"), names_from = "parameter", values_from = ".value") draws_posterior_pairs <- draws_posterior |> tidyr::pivot_wider( id_cols = c(".chain", ".iteration", ".draw", "treatment"), names_from = "parameter", values_from = ".value") ggplot2::ggplot() + ggplot2::theme_bw() + ggplot2::geom_point( data = draws_prior_pairs, mapping = ggplot2::aes( x = log10(kcat), y = log10(kM)), color = "orange", size = .8, shape = 16, alpha = .3) + ggplot2::geom_point( data = draws_posterior_pairs, mapping = ggplot2::aes( x = log10(kcat), y = log10(kM)), color = "blue", size = .8, shape = 16, alpha = .3) + ggplot2::scale_x_continuous("Catalytic constant log10(kcat)") + ggplot2::scale_y_continuous("Michaelis constant log10(kM)") + ggplot2::facet_wrap(facets = dplyr::vars(treatment), nrow = 1)
model <- model_offset newdata_levels <- model$data |> dplyr::select(tidyselect::any_of(c("treatment", "ET", "ST", "time"))) |> as.list() |> purrr::map(unique) newdata_levels$time = seq(-120, 1200, 100) newdata <- do.call(tidyr::expand_grid, newdata_levels) # add series_index newdata <- newdata |> dplyr::left_join( newdata |> dplyr::distinct(treatment, ET, ST) |> dplyr::mutate(series_index = dplyr::row_number()), by = c("treatment", "ET", "ST")) pp_data <- model |> tidybayes::add_predicted_draws( newdata = newdata, value = "P", re_formula = NA, ndraws = 200) |> ggdist:: median_qi(.width = c(.5, .8, .95)) ggplot2::ggplot() + ggplot2::theme_bw() + ggplot2::theme(legend.position = "bottom") + ggplot2::scale_fill_discrete( "Median Quantile Interval", labels = c("95%", "80%", "50%")) + ggdist::geom_lineribbon( data = pp_data, mapping = ggplot2::aes( x = .data[["time"]], y = .data[["P"]], ymin = .data[[".lower"]], ymax = .data[[".upper"]], color = log10(.data[["ST"]]), group = .data[["ST"]]), alpha = .15) + ggplot2::geom_point( data = model$data, mapping = ggplot2::aes( x = .data[["time"]], y = .data[["P"]], color = log10(.data[["ST"]])), size = 0.8, alpha = 0.7) + ggplot2::facet_wrap(facets = ~treatment) + ggplot2::labs(title = "Predicted tQ Enzyme Progress Curve") + ggplot2::scale_color_viridis_c( "Substrate Concentration (uM)", labels = c("0.5", "2.5", "15"), breaks = log10(c(0.5, 2.5, 15)), begin = 0.2, end = 0.9) + ggplot2::scale_y_continuous("Product Concentration") + ggplot2::scale_x_continuous("Time (s)") ggplot2::ggsave( filename = "/tmp/model_offset_posterior_draws.pdf", width = 10, height = 6, useDingbats = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.