#' Fit reaction equations
#'
#' This function takes one reaction equation and associated multi-omic data and evaluate how well the
#' reaction equation can predict provided flux based on the MCMC-NNLS algorithm.
#' @param rxnSummary a list containing a reaction equation to fit (several forms of it) and multi-omic data
#'
#' @return summary of MCMC run and input data
#'
#' @export
fit_reaction_equations <- function(rxnSummary){
# this method allows for isoenzymes with different kinetics (none of the published work used this feature due to the extra
# degrees of freedom it uses)
kinetically_differing_isoenzymes <- any(names(rxnSummary$rxnForm) %in% rownames(rxnSummary$enzymeComplexes))
# if isoenzymes differ w.r.t. kinetics or regulation then their occupancy equation are stored as different elements of a list
# and enzyme concentrations need to be paired with these seperate equations
# format reaction equations to work with log-concentrations of metabolites and enzymes
rxnEquations <- format_raw_equations(rxnSummary, kinetically_differing_isoenzymes)
# Describe the relevent kinetic parameters
summarize_kinetic_parameters(rxnSummary, rxnEquations, kinetically_differing_isoenzymes)
# Format fluxes, metabolites and enzymes
omic_data <- format_omic_data(kineticPars, all_species, rxnSummary, kinetically_differing_isoenzymes)
# Combine enzyme(s) with non-linear portion of the kinetic form to generate final kinetic form
finalize_reaction_equations(rxnEquations, all_species, kinetically_differing_isoenzymes)
# Formulate priors on non-linear kinetic parameters (not kcat)
kineticParPrior <- build_kinetic_parameter_priors(rxnSummary, kineticPars, omic_data)
#### Sampling l(par|X) using MCMC-NNLS ####
fit_reaction_equation_MCMC_NNLS(markov_pars, kineticPars, kineticParPrior, rxnEquations, all_species, omic_data, kinetically_differing_isoenzymes)
run_summary <- list()
## Save MC information
run_summary[[rxnSummary$listEntry]]$kineticPars <- kineticPars
run_summary[[rxnSummary$listEntry]]$all_species <- all_species
run_summary[[rxnSummary$listEntry]]$kineticParPrior <- data.frame(kineticPars, kineticParPrior)
run_summary[[rxnSummary$listEntry]]$markovChain <- markov_par_vals
run_summary[[rxnSummary$listEntry]]$logLikelihood <- lik_track
## Save flux and rxn information
run_summary[[rxnSummary$listEntry]]$occupancyEq <- rxnEquations
run_summary[[rxnSummary$listEntry]]$metabolites <- omic_data$met_abund
run_summary[[rxnSummary$listEntry]]$enzymes <- omic_data$enzyme_abund
run_summary[[rxnSummary$listEntry]]$flux <- omic_data$flux
run_summary[[rxnSummary$listEntry]]$specSD <- omic_data$species_SD
run_summary[[rxnSummary$listEntry]]$specCorr <- omic_data$species_corr
run_summary
}
#### Setting up reaction equations and omic data
#' Format raw equations
#'
#' Using reaction forms in \code{rxnSummary} format equations as expressions that can be evaluated.
#'
#' @inheritParams fit_reaction_equations
#' @param kinetically_differing_isoenzymes True-False: does only one equation exist for the reaction (false)
#' or are are reaction equations isoenzyme specific (true)
#' @return list containing the occupancy (non-linear) equations expecting metabolomic data
#' in either linear or log2 space
#' @export
format_raw_equations <- function(rxnSummary, kinetically_differing_isoenzymes){
# reformat reactionEquations to work with log-space data
rxnEquations <- list()
if(kinetically_differing_isoenzymes){
for(isoenzyme in names(rxnSummary$rxnForm)){
rxnEquations$occupancyEq_list[[isoenzyme]] <- paste(deparse(as.list(rxnSummary$rxnForm[names(rxnSummary$rxnForm) == isoenzyme])[[1]][[2]]), collapse = "") # a parametric form relating metabolites and constants to fraction of maximal activity
rxnEquations$occupancyEq_list[[isoenzyme]] <- gsub('[ ]+', ' ', rxnEquations$occupancyEq_list[[isoenzyme]])
rxnEquations$l_occupancyEq_list[[isoenzyme]] <- rxnEquations$occupancyEq_list[[isoenzyme]]
rxnEquations$l_occupancyEq_list[[isoenzyme]] <- gsub('([^_])(t_)', '\\12^\\2', rxnEquations$l_occupancyEq_list[[isoenzyme]])
rxnEquations$l_occupancyEq_list[[isoenzyme]] <- gsub('([0-9]+)\\^(t_[0-9]{4})\\^([0-9]+)', '(\\1\\^\\2)\\^\\3', rxnEquations$l_occupancyEq_list[[isoenzyme]]) # correct 2^X^2 -> (2^X)^2
# create an expression for each isoenzyme
rxnEquations[["l_occupancyExpression"]][[isoenzyme]] <- parse(text = sub(' \\+ 0$', '', sub('^I', '', rxnEquations$l_occupancyEq_list[[isoenzyme]])))
}
}else{
# the typical case - single enzyme or kinetically equivalent isoenzymes
rxnEquations[["occupancyEq_list"]] <- paste(deparse(as.list(rxnSummary$rxnForm)[[2]]), collapse = "") # a parametric form relating metabolites and constants to fraction of maximal activity
rxnEquations[["occupancyEq_list"]] <- gsub('[ ]+', ' ', rxnEquations[["occupancyEq_list"]])
rxnEquations[["l_occupancyEq_list"]] <- rxnEquations[["occupancyEq_list"]]
rxnEquations[["l_occupancyEq_list"]] <- gsub('([^_])(t_)', '\\12^\\2', rxnEquations[["l_occupancyEq_list"]])
rxnEquations[["l_occupancyEq_list"]] <- gsub('([0-9]+)\\^(t_[0-9]{4})\\^([0-9]+)', '(\\1\\^\\2)\\^\\3', rxnEquations[["l_occupancyEq_list"]]) # correct 2^X^2 -> (2^X)^2
# create an expression for each isoenzyme
rxnEquations[["l_occupancyExpression"]] <- parse(text = sub(' \\+ 0$', '', sub('^I', '', rxnEquations$l_occupancyEq_list)))
}
rxnEquations
}
#' Summarize kinetic parameters
#'
#' # based on metabolites and enzyme data provided, determine the kinetic constants that are present in the reaction equations from \code{\link{format_raw_equations}}
#'
#' @inheritParams format_raw_equations
#' @param rxnEquations expressions for reaction equations
#' @return writes summary of kinetic parameter table (kineticPars) and types of all reaciton species (all_species)
#' to global environment
#' @export
summarize_kinetic_parameters <- function(rxnSummary, rxnEquations, kinetically_differing_isoenzymes){
if(kinetically_differing_isoenzymes & length(grep('.[0-9]$', names(rxnSummary$rxnForm))) != 0){
# A single enzyme is applied to multiple expressions (ex. partial constitutive activity)
kineticPars <- data.frame(rel_spec = c(names(rxnSummary$rxnForm), rxnSummary$rxnFormData$SubstrateID),
SpeciesType = c(rep("Enzyme", times = length(rxnSummary$rxnForm)), rep("Metabolite", times = nrow(rxnSummary$rxnFormData))), modelName = NA, commonName = NA, formulaName = NA, measured = NA)
}else{
# One enzyme, one Kcat
kineticPars <- data.frame(rel_spec = c(rownames(rxnSummary$enzymeComplexes), rxnSummary$rxnFormData$SubstrateID),
SpeciesType = c(rep("Enzyme", times = nrow(rxnSummary$enzymeComplexes)), rep("Metabolite", times = nrow(rxnSummary$rxnFormData))), modelName = NA, commonName = NA, formulaName = NA, measured = NA)
}
kineticPars$formulaName[kineticPars$SpeciesType == "Enzyme"] <- paste(paste("E", rxnSummary$rxnID, sep = "_"), kineticPars$rel_spec[kineticPars$SpeciesType == "Enzyme"], sep = "_")
kineticPars$modelName[kineticPars$SpeciesType == "Metabolite"] <- kineticPars$rel_spec[kineticPars$SpeciesType == "Metabolite"]
kineticPars$commonName[kineticPars$SpeciesType == "Metabolite"] <- unname(sapply(kineticPars$rel_spec[kineticPars$SpeciesType == "Metabolite"], function(x){rxnSummary$metNames[names(rxnSummary$metNames) == x]}))
kineticPars$commonName[kineticPars$SpeciesType == "Enzyme"] <- kineticPars$rel_spec[kineticPars$SpeciesType == "Enzyme"]
kineticPars$formulaName[kineticPars$SpeciesType == "Metabolite"] <- rxnSummary$rxnFormData$affinityParameter
all_species <- kineticPars[sapply(kineticPars$formulaName, function(ele_used){length(grep(ele_used, rxnEquations[["occupancyEq_list"]])) != 0}) | kineticPars$SpeciesType == "Enzyme",]
kineticPars <- kineticPars[sapply(kineticPars$formulaName, function(ele_used){length(grep(ele_used, rxnEquations[["occupancyEq_list"]])) != 0}),] #remove species which dont appear in the reaction equation
kineticPars <- rbind(kineticPars, c("keq", "keq", NA, NA, paste("Keq", rxnSummary$rxnID, sep = ""), NA))
if(any(rxnSummary$rxnFormData$Hill == 0)){ # an unspecified hill coefficient was found - introduce hill parameter
kineticPars <- rbind(kineticPars, c(rxnSummary$rxnFormData$SubstrateID[rxnSummary$rxnFormData$Hill == 0], "hillCoefficient", NA, NA, sub('^K', 'KH', rxnSummary$rxnFormData$affinityParameter[rxnSummary$rxnFormData$Hill == 0]), NA))
}
kineticPars <<- kineticPars
all_species <<- all_species
}
#' Format omic data
#'
#' Align multi omic data across experimental conditions; Combine metabolites and enzymes
#'
#' @inheritParams format_raw_equations
#' @param kineticPars table summarizing which species all non-linear kinetic parameters correspond to
#' @param all_species all_species table summarizing the types of all reaction species
#' @return a list summarizing the abundances and uncertainties in reaction metabolites, enzymes and flux
#' @export
format_omic_data <- function(kineticPars, all_species, rxnSummary, kinetically_differing_isoenzymes){
###
if(kinetically_differing_isoenzymes & length(grep('.[0-9]$', names(rxnSummary$rxnForm))) != 0){
# A single enzyme is applied to multiple expressions (ex. partial constitutive activity)
enzyme_abund <- t(rxnSummary$enzymeComplexes)[,data.table::chmatch(gsub('.[0-9]$', '', names(rxnSummary$rxnForm)), rownames(rxnSummary$enzymeComplexes))]
colnames(enzyme_abund) <- names(rxnSummary$rxnForm)
species_SD <- rxnSummary$all_species_SD
remapping_indices <- data.table::chmatch(gsub('.[0-9]$', '', names(rxnSummary$rxnForm)), colnames(species_SD)[colnames(species_SD) %in% rownames(rxnSummary$enzymeComplexes)])
remapping_table <- data.frame(remap = names(rxnSummary$rxnForm), enzyme = names(remapping_indices))
renamed_enzyme_SD <- species_SD[,colnames(species_SD) %in% rownames(rxnSummary$enzymeComplexes)][,remapping_indices]
colnames(renamed_enzyme_SD) <- names(rxnSummary$rxnForm)
species_SD <- cbind(species_SD[,!(colnames(species_SD) %in% rownames(rxnSummary$enzymeComplexes))], renamed_enzyme_SD)
species_corr <- matrix(0, nrow = ncol(species_SD), ncol = ncol(species_SD))
rownames(species_corr) <- colnames(species_corr) <- colnames(species_SD)
species_corr[!(colnames(species_SD) %in% names(rxnSummary$rxnForm)),!(colnames(species_SD) %in% names(rxnSummary$rxnForm))] <- rxnSummary$all_species_corr[!(rownames(rxnSummary$all_species_corr) %in% rownames(rxnSummary$enzymeComplexes)),!(rownames(rxnSummary$all_species_corr) %in% rownames(rxnSummary$enzymeComplexes))]
remapped_enzyme_corr <- species_corr[(colnames(species_SD) %in% names(rxnSummary$rxnForm)),(colnames(species_SD) %in% names(rxnSummary$rxnForm))]
for(i in 1:nrow(remapped_enzyme_corr)){
for(j in 1:ncol(remapped_enzyme_corr)){
remapped_enzyme_corr[i,j] <- ifelse(remapping_table$enzyme[remapping_table$remap == colnames(remapped_enzyme_corr)[j]] == remapping_table$enzyme[remapping_table$remap == rownames(remapped_enzyme_corr)[i]], 1, 0)
}
}
species_corr[rownames(species_corr) %in% rownames(remapped_enzyme_corr), colnames(species_corr) %in% colnames(remapped_enzyme_corr)] <- remapped_enzyme_corr
}else{
enzyme_abund <- t(rxnSummary$enzymeComplexes); colnames(enzyme_abund) <- all_species$rel_spec[all_species$SpeciesType == "Enzyme"]
species_SD <- rxnSummary$all_species_SD
species_corr <- rxnSummary$all_species_corr
}
met_abund <- rxnSummary$rxnMet
met_abund <- met_abund[,colnames(met_abund) %in% kineticPars$rel_spec]
if(length(kineticPars$rel_spec[kineticPars$SpeciesType == "Metabolite"]) <= 1){
met_abund <- data.frame(met_abund)
colnames(met_abund) <- kineticPars$rel_spec[kineticPars$SpeciesType == "Metabolite"]
kineticPars$measured[kineticPars$SpeciesType == "Metabolite"] <- !all(is.na(met_abund))
}else{
kineticPars$measured[kineticPars$SpeciesType == "Metabolite"] <- unname(sapply(kineticPars$rel_spec[kineticPars$SpeciesType == "Metabolite"], function(x){(apply(is.na(met_abund), 2, sum) == 0)[names((apply(is.na(met_abund), 2, sum) == 0)) == x]}))
}
kineticPars$measured <- as.logical(kineticPars$measured)
met_abund[,!kineticPars$measured[data.table::chmatch(colnames(met_abund), kineticPars$modelName)]] <- 0 # set missing data (unascertained) to invariant across conditions
flux <- rxnSummary$flux/median(abs(rxnSummary$flux$standardQP[rxnSummary$flux$standardQP != 0])) #flux, scaled to a prettier range
# If FVA min flux is greater than max flux, switch them (and print a warning).
if(sum(!(flux$FVAmax >= flux$FVAmin)) != 0){
print(paste("maximum flux is less than minimum flux for", sum(!(flux$FVAmax >= flux$FVAmin)), "conditions"))
}
flux[!(flux$FVAmax >= flux$FVAmin),c('FVAmin', 'FVAmax')] <- flux[!(flux$FVAmax >= flux$FVAmin),c('FVAmax', 'FVAmin')]
# If bounds are exactly equal, then introduce a minute spread so a range can be calculated ###
flux$FVAmax[flux$FVAmax == flux$FVAmin] <- flux$FVAmax[flux$FVAmax == flux$FVAmin] + abs(flux$FVAmax[flux$FVAmax == flux$FVAmin]*10^-4)
# Assign some objects to reaction_equations environment
kineticPars <<- kineticPars
omic_data <- list()
omic_data$enzyme_abund = enzyme_abund
omic_data$met_abund = met_abund
omic_data$species_SD = species_SD
omic_data$species_corr = species_corr
omic_data$flux = flux
return(omic_data)
}
#' Finalize reaction equations
#'
#' Expression combining the log-occupancy equation and scaling of enzyme abundance by activity
#' determined w.r.t. metabolites/enzymes in log-space and linear-space
#'
#' If there are multiple isoenzymes which differ kinetically then their expressions are generated seperately and then concatenated
#' a single equation is returned
#'
#' @inheritParams summarize_kinetic_parameters
#' @inheritParams format_omic_data
#' @return expand rxnEquations to include kcat parameters and partial derivatives of rxnEquations w.r.t. each specie
#' @export
finalize_reaction_equations <- function(rxnEquations, all_species, kinetically_differing_isoenzymes){
if(kinetically_differing_isoenzymes){
# if isoenzymes differ then their occupancy equations are not distributed e.g. Vmax1[O1] + Vmax2[O2] rather than (Vmax1 + Vmax2)*O
KcatEs_log <- mapply(function(E, Kcat){paste(Kcat, E, sep = " * ")}, E = sapply(all_species$rel_spec[all_species$SpeciesType == "Enzyme"], function(x){paste("2^", x, sep = "")}), Kcat = all_species$formulaName[all_species$SpeciesType == "Enzyme"])
KcatEs_linear <- mapply(function(E, Kcat){paste(Kcat, E, sep = " * ")}, E = all_species$rel_spec[all_species$SpeciesType == "Enzyme"], Kcat = all_species$formulaName[all_species$SpeciesType == "Enzyme"])
if(!all(names(rxnEquations$occupancyEq_list) %in% c(names(KcatEs_log), "other"))){
stop(paste("isoenzyme name does not match available complexes for reaction", names(rxnList_form)[rxN],"\nCheck the field \"enzymeInvolved\" in manual_ComplexRegulation.tsv"))
}
# a complex is matched to an enzyme either by being directly specified or otherwise being placed with "other" enzymes
occEqtn_complex_match <- data.frame(complex = names(KcatEs_log), occEqtn = NA)
occEqtn_complex_match$occEqtn[occEqtn_complex_match$complex %in% names(rxnEquations$occupancyEq_list)] <- occEqtn_complex_match$complex[occEqtn_complex_match$complex %in% names(rxnEquations$occupancyEq_list)]
occEqtn_complex_match$occEqtn[is.na(occEqtn_complex_match$occEqtn)] <- "other"
KcatExpressions_linear <- NULL
KcatExpressions_log <- NULL
for(isoenzyme in unique(occEqtn_complex_match$occEqtn)){
# log
KcatExpression <- paste('(', paste(KcatEs_log[names(KcatEs_log) %in% occEqtn_complex_match$complex[occEqtn_complex_match$occEqtn == isoenzyme]], collapse = " + "), ')', sep = "")
isoenzymeO <- rxnEquations$l_occupancyEq_list[names(rxnEquations$occupancyEq_list) == isoenzyme]
isoenzymeO <- sub('^I', paste(KcatExpression, '*', sep = ''), isoenzymeO)
isoenzymeO <- sub(' \\+ 0$', '', isoenzymeO)
KcatExpressions_log <- c(KcatExpressions_log, isoenzymeO)
# linear
KcatExpression <- paste('(', paste(KcatEs_linear[names(KcatEs_linear) %in% occEqtn_complex_match$complex[occEqtn_complex_match$occEqtn == isoenzyme]], collapse = " + "), ')', sep = "")
isoenzymeO <- rxnEquations$occupancyEq_list[names(rxnEquations$occupancyEq_list) == isoenzyme]
isoenzymeO <- sub('^I', paste(KcatExpression, '*', sep = ''), isoenzymeO)
isoenzymeO <- sub(' \\+ 0$', '', isoenzymeO)
KcatExpressions_linear <- c(KcatExpressions_linear, isoenzymeO)
}
rxnEquations[["full_kinetic_form_list"]] <- paste(unlist(KcatExpressions_log), collapse = " + ")
rxnEquations[["elasticity_calc"]] <- paste(unlist(KcatExpressions_linear), collapse = " + ")
}else{
# (Vmax1 + Vmax2)*O
KcatEs <- mapply(function(E, Kcat){paste(Kcat, E, sep = " * ")}, E = sapply(all_species$rel_spec[all_species$SpeciesType == "Enzyme"], function(x){paste("2^", x, sep = "")}), Kcat = all_species$formulaName[all_species$SpeciesType == "Enzyme"])
KcatExpression <- paste('(', paste(KcatEs, collapse = " + "), ')', sep = "")
Kcatpaste <- paste('I(', KcatExpression, ' * ', sep = "")
rxnEquations[["full_kinetic_form_list"]] <- rxnEquations[["l_occupancyEq_list"]]
rxnEquations[["full_kinetic_form_list"]] <- gsub('(I\\()', Kcatpaste, rxnEquations[["full_kinetic_form_list"]])
rxnEquations[["full_kinetic_form_list"]] <- sub('I\\(', '\\(', rxnEquations[["full_kinetic_form_list"]])
# save the same expression in linear space, so that it can be used later on to look at elasticitity
KcatEs <- mapply(function(E, Kcat){paste(Kcat, E, sep = " * ")}, E = all_species$rel_spec[all_species$SpeciesType == "Enzyme"], Kcat = all_species$formulaName[all_species$SpeciesType == "Enzyme"])
KcatExpression <- paste('(', paste(KcatEs, collapse = " + "), ')', sep = "")
Kcatpaste <- paste('I(', KcatExpression, ' * ', sep = "")
rxnEquations[["elasticity_calc"]] <- rxnEquations[["occupancyEq_list"]]
rxnEquations[["elasticity_calc"]] <- gsub('(I\\()', Kcatpaste, rxnEquations[["elasticity_calc"]])
rxnEquations[["elasticity_calc"]] <- sub('I\\(', '\\(', rxnEquations[["elasticity_calc"]])
}
# find the partial derivatives of the kinetic form for each reaction specie
eq <- eval(parse(text = paste('expression(',rxnEquations[["full_kinetic_form_list"]],')')))
D_full_kinetic_form <- list()
for(spec in c(kineticPars$rel_spec[(kineticPars$measured & !is.na(kineticPars$measured)) | kineticPars$modelName == "t_metX" & !is.na(kineticPars$modelName)], all_species$rel_spec[all_species$SpeciesType == "Enzyme"])){
D_full_kinetic_form[[spec]] <- D(eq, spec)
}
rxnEquations[["kinetic_form_partials"]] <- D_full_kinetic_form
rxnEquations <<- rxnEquations
}
#' Build kinetic parameter priors
#'
#' Proposal distributions for each non-linear kinetic parameter are generated: these will be used by \code{\link{par_draw}}.
#'
#' @inheritParams format_omic_data
#' @inheritParams fit_reaction_equations
#' @param omic_data abundances and uncertainties of reaction species and flux: setup in \code{\link{format_omic_data}}
#' @return data.frame containing the parameters for each kinetic parameter's proposal distribution
#' @export
build_kinetic_parameter_priors <- function(rxnSummary, kineticPars, omic_data){
kineticParPrior <- data.frame(distribution = rep(NA, times = nrow(kineticPars)), par_1 = NA, par_2 = NA, par_3 = NA)
# Specify prior for michaelis constants
kineticParPrior$distribution[kineticPars$SpeciesType %in% c("Metabolite", "keq")] <- "unif"
kineticParPrior$par_1[kineticParPrior$distribution == "unif"] <- -15; kineticParPrior$par_2[kineticParPrior$distribution == "unif"] <- 15 # default value to 2^-15:2^15
for(exp_param in kineticPars$formulaName[!is.na(kineticPars$measured) & kineticPars$measured == TRUE]){
kineticParPrior[kineticPars$formulaName == exp_param & !is.na(kineticPars$modelName), c(2:3)] <- median(omic_data$met_abund[,colnames(omic_data$met_abund) == kineticPars$modelName[kineticPars$formulaName == exp_param & !is.na(kineticPars$formulaName)]]) + c(-15,15)
}#priors for measured metabolites (either in absolute or relative space) are drawn about the median
# Prior for keq is centered around sum log(sub) - sum log(prod) - this deals with some species being in absolute space and others being absolute measurements
n_c <- nrow(omic_data$flux)
kineticParPrior[kineticPars$SpeciesType == "keq", c(2:3)] <- median(rowSums(omic_data$met_abund * c(rep(1,n_c)) %*% t(rxnSummary$rxnStoi[data.table::chmatch(colnames(omic_data$met_abund), names(rxnSummary$rxnStoi))]))) + c(-20,20)
# Specify prior for hill constants
kineticParPrior$distribution[kineticPars$SpeciesType == "hillCoefficient"] <- "unif"
kineticParPrior$par_1[kineticPars$SpeciesType == "hillCoefficient"] <- -3
kineticParPrior$par_2[kineticPars$SpeciesType == "hillCoefficient"] <- 3
return(kineticParPrior)
}
#' Fit reaction equation using MCMC and NNLS
#'
#' Run the MCMC-NNLS algorithm by continually updating kinetic parameters and evaluating their likelihood
#'
#' @param markov_pars parameters for MCMC: a list containing 3 values: sample_freq, n_samples and burn_in
#' @inheritParams build_kinetic_parameter_priors
#' @param kineticParPrior parameters for the proposal distribution of each kinetic parameter
#' @inheritParams finalize_reaction_equations
#' @return write draws from the posterior distribution of non-linear kinetic parameters (markov_par_vals)
#' and their log-likelihoods (lik_track) to the global environment
#' @export
fit_reaction_equation_MCMC_NNLS <- function(markov_pars, kineticPars, kineticParPrior, rxnEquations, all_species, omic_data, kinetically_differing_isoenzymes){
### Initialize parameters & setup tracking of likelihood and parameters ###
lik_track <- NULL
markov_par_vals <- matrix(NA, ncol = nrow(kineticParPrior), nrow = markov_pars$n_samples)
colnames(markov_par_vals) <- kineticPars$formulaName
current_pars <- rep(NA, times = nrow(kineticParPrior))
current_pars <- par_draw(1:nrow(kineticParPrior), current_pars, kineticParPrior)
current_lik <- lik_calc_fittedSD(current_pars, kineticPars, all_species, rxnEquations, omic_data, kinetically_differing_isoenzymes)
### Generate markov chain ###
for(i in 1:(markov_pars$burn_in + markov_pars$n_samples*markov_pars$sample_freq)){
for(j in 1:nrow(kineticPars)){#loop over parameters values
proposed_par <- par_draw(j, current_pars, kineticParPrior)
proposed_lik <- lik_calc_fittedSD(proposed_par, kineticPars, all_species, rxnEquations, omic_data, kinetically_differing_isoenzymes)
if(runif(1, 0, 1) < exp(proposed_lik - current_lik) | (proposed_lik == current_lik & proposed_lik == -Inf)){
current_pars <- proposed_par
current_lik <- proposed_lik
}
}
if(i > markov_pars$burn_in){
if((i - markov_pars$burn_in) %% markov_pars$sample_freq == 0){
markov_par_vals[(i - markov_pars$burn_in)/markov_pars$sample_freq,] <- current_pars
lik_track <- c(lik_track, current_lik)
}
}
}
colnames(markov_par_vals) <- kineticPars$rel_spec
markov_par_vals <<- markov_par_vals
lik_track <<- lik_track
}
#' Draw parameters
#'
#' Update parameters using their prior (given by kineticParPrior) - update those those parameters
#' whose index is in "updates"
#'
#' Options for these parameters are:
#' -@-@ unif: uniform in log-space: par_1 = lowerbound, par_2 = upperbound. draw in log2 space and exponentiate back to linear space
#' -@-@ norm: lognormal: in log2 space draw a value from mean = par_1, sd = par_2
#' -@-@ SpSl: spike and slab (In log2 space: the spike is a point mass at zero with p = par_3, the slab is a normal with mean = par_1 and sd = par_2)
#'
#' @param updates the parameters corresponding to indeces of kineticParPrior
#' @param current_pars current kinetic parameters some which will be overwritten
#' @inheritParams fit_reaction_equation_MCMC_NNLS
#' @return updated kinetic parameters
#' @export
par_draw <- function(updates, current_pars, kineticParPrior){
draw <- current_pars
for(par_n in updates){
if(kineticParPrior$distribution[par_n] == "unif"){
draw[par_n] <- runif(1, kineticParPrior$par_1[par_n], kineticParPrior$par_2[par_n])
} else if(kineticParPrior$distribution[par_n] == "norm"){
draw[par_n] <- rnorm(1, kineticParPrior$par_1[par_n], kineticParPrior$par_2[par_n])
} else if(kineticParPrior$distribution[par_n] == "SpSl"){
draw[par_n] <- ifelse(rbinom(1, 1, kineticParPrior$par_3[par_n]) == 0, 0, rnorm(1, kineticParPrior$par_1[par_n], kineticParPrior$par_2[par_n]))
} else {
stop("invalid distribution")
}
}
draw
}
#' Find the likelihood of data given proposed parameters
#'
#' Determine the likelihood of predicted flux as a function of metabolite abundance and
#' kinetics parameters relative to actual flux
#'
#' @param proposed_params non-linear kinetic parameters to be testing using the reaction equation
#' @inheritParams fit_reaction_equation_MCMC_NNLS
#' @return a numeric log-likelihood
#' @export
lik_calc_fittedSD <- function(proposed_params, kineticPars, all_species, rxnEquations, omic_data, kinetically_differing_isoenzymes){
requireNamespace("nnls", quietly = T)
n_c <- nrow(omic_data$met_abund)
par_stack <- rep(1, n_c) %*% t(proposed_params); colnames(par_stack) <- kineticPars$formulaName
par_stack <- 2^par_stack
occupancy_vals <- data.frame(omic_data$met_abund, par_stack)
if(!(kinetically_differing_isoenzymes)){
predOcc <- eval(rxnEquations[["l_occupancyExpression"]], occupancy_vals) #predict occupancy as a function of metabolites and kinetic constants based upon the occupancy equation
enzyme_activity <- (predOcc %*% t(rep(1, sum(all_species$SpeciesType == "Enzyme"))))*2^omic_data$enzyme_abund #occupany of enzymes * relative abundance of enzymes
}else{
enzyme_activity <- NULL
for(isoenzyme in names(rxnSummary$rxnForm)){
predOcc <- eval(rxnEquations[["l_occupancyExpression"]][[isoenzyme]], occupancy_vals)
enzyme_activity <- cbind(enzyme_activity, predOcc %*% t(rep(1, sum(occEqtn_complex_match$occEqtn == isoenzyme)))*2^omic_data$enzyme_abund[,colnames(omic_data$enzyme_abund) %in% occEqtn_complex_match$complex[occEqtn_complex_match$occEqtn == isoenzyme]])
}
}
# fit flux ~ enzyme*occupancy using non-negative least squares (all enzymes have activity > 0, though negative flux can occur through occupancy)
# flux objective is set as the average of the minimal and maximal allowable flux flowing through the reaction at the optimal solution
flux_fit <- nnls::nnls(enzyme_activity, (omic_data$flux$FVAmax + omic_data$flux$FVAmin)/2)
# setting the variance from residual mean-squared error
nparam <- sum(kineticPars$measured[kineticPars$SpeciesType == "Metabolite"]) + # number of measured metabolites
sum(all_species$SpeciesType == "Enzyme") + # number of measured enzyme groups
sum(kineticPars$SpeciesType == "keq") # plus 1 if keq is included
fit_resid_error <- sqrt(mean((flux_fit$resid - mean(flux_fit$resid))^2))*sqrt(n_c/(n_c-nparam))
# integrate over the cdf from FVA_min to FVA_max t
lik <- (pnorm(omic_data$flux$FVAmax, flux_fit$fitted, fit_resid_error) - pnorm(omic_data$flux$FVAmin, flux_fit$fitted, fit_resid_error))/(omic_data$flux$FVAmax - omic_data$flux$FVAmin)
return(sum(log(lik)))
}
#' Populate reaction equations
#'
#' Not meant for users. Setup a subset of reaction forms for package including rMM reaction forms,
#' significant complex regulation (2 regulators or cooperativity) and either significant single regulators
#' (or best regulators for complex regulation)
#' @return generates the reactionForms.Rds file
populate_reactionEqns <- function(){
rxn_forms <- all_rxn_fits %>% arrange(desc(modelType))
write.table(rxn_forms, file = "companionFiles/reactionEqn_fitting/rMech_summary_table.tsv", sep = "\t", row.names = F, quote = F)
load("~/Desktop/Rabinowitz/FBA_SRH/Yeast_genome_scale/paramOptim.Rdata")
reactionForms <- rxnList_form[rxn_forms$rMech]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.