#' Get PSM-protein weights for summarization with shared peptides
#'
#' @inheritParams getWeightedProteinSummary
#' @return data.table
#'
#' @export
getPeptideProteinWeights = function(feature_data,
norm = "p_norm", norm_parameter = 1,
weights_mode = "contributions",
weights_penalty = FALSE,
weights_penalty_param = 0.1) {
weights_design = getWeightsDesign(feature_data)
design_matrix = weights_design[["x"]]
y = weights_design[["y"]]
is_non_missing = !is.na(y)
y = y[is_non_missing]
design_matrix = design_matrix[is_non_missing, ]
cols = colnames(design_matrix)
cols_split = stringr::str_split(cols, "__") # TODO: STRINGR -> STRINGI
psms_cols = sapply(cols_split, function(x) x[1])
protein_cols = sapply(cols_split, function(x) if (length(x) == 1) NA else x[-1])
if (weights_penalty) {
feature_data[, NumPeptidesPerProtein := uniqueN(ProteinName), by = "PSM"]
feature_data[, EqualWeights := 1 / NumPeptidesPerProtein]
equal_weights = unique(feature_data[, .(PSM, ProteinName, EqualWeights)])[, EqualWeights]
names(equal_weights) = unique(feature_data[, .(PSM, ProteinName, EqualWeights)])[, paste(PSM, ProteinName, sep = "__")]
colnames(design_matrix)
equal_weights_vals = equal_weights[colnames(design_matrix)]
equal_weights_vals = ifelse(is.na(equal_weights_vals), 0, equal_weights_vals)
equal_weights_vals = unname(equal_weights_vals)
multiplier = ifelse(equal_weights_vals == 0, 0, 1)
}
params_full = CVXR::Variable(ncol(design_matrix)) # n_params defined earlier and passed to function below?
constraints = getWeightsConstraints(params_full,
design_matrix, weights_mode,
cols, protein_cols, psms_cols)
# TODO: move below to another function?
if (weights_penalty) {
penalty = 0.1 * sum(multiplier * (params_full - equal_weights_vals) ^ 2)
if (norm == "p_norm") {
obj = CVXR::p_norm(design_matrix %*% params_full - y, norm_parameter) + penalty
} else {
obj = sum(CVXR::huber(design_matrix %*% params_full - y, norm_parameter)) + penalty
}
} else {
if (norm == "p_norm") {
obj = CVXR::p_norm(design_matrix %*% params_full - y, norm_parameter)
} else {
obj = sum(CVXR::huber(design_matrix %*% params_full - y, norm_parameter))
}
}
prob_con = CVXR::Problem(CVXR::Minimize(obj), constraints)
sol_con = CVXR::solve(prob_con) # TODO: WHAT IF this throws an error?
alphas = as.vector(sol_con$getValue(CVXR::variables(prob_con)[[1]]))
result = data.table::data.table(
ProteinName = protein_cols,
PSM = psms_cols,
Weight = alphas
)
result = result[!is.na(ProteinName)]
result = result[Weight > 1e-3] # 1e-4?
result[, Weight := Weight / sum(Weight), by = "PSM"]
result
}
#' Get design matrix and response for weights calculation
#' @inheritParams getWeightedProteinSummary
#' @keywords internal
getWeightsDesign = function(feature_data) {
intensities_tbl = unique(feature_data[, .(PSM, Channel, log2IntensityNormalized)])
psms_intercept_tbl = unique(feature_data[, .(PSM, Channel, Present = 1)])
psms_intercept_tbl = data.table::dcast(psms_intercept_tbl,
PSM + Channel ~ PSM, value.var = "Present", fill = 0)
colnames(psms_intercept_tbl)[3:ncol(psms_intercept_tbl)] = paste0("int_", 1:(ncol(psms_intercept_tbl) - 2))
psms_protein_tbl = unique(feature_data[, .(PSM, ProteinName, Channel, CenteredAbundance)])
psms_protein_tbl = data.table::dcast(psms_protein_tbl,
PSM + Channel ~ PSM + ProteinName,
value.var = "CenteredAbundance",
fill = 0, sep = "__")
wide = merge(
merge(psms_intercept_tbl, intensities_tbl,
by = c("PSM", "Channel"), all.x = T, all.y = T),
psms_protein_tbl,
by = c("PSM", "Channel"), all.x = T, all.y = T
)
wide = data.table::as.data.table(wide)
y_full = wide$log2IntensityNormalized
x_full = as.matrix(wide[, -(1:2), with = FALSE])
x_full = x_full[, !(colnames(x_full) == "log2IntensityNormalized")]
x_full = cbind(intercept = 1, x_full)
list(x = x_full,
y = y_full)
}
#' Get constraints for weights
#' @param params_full object generated by CVXR::Variables
#' @param design_matrix design matrix for weights optimization
#' @inheritParams getWeightedProteinSummary
#' @param cols column names of the design matrix
#' @param protein_cols protein names extracted from column names
#' @param psms_cols feature names extracted from column names
getWeightsConstraints = function(params_full, design_matrix, weights_mode,
cols, protein_cols, psms_cols) {
unique_psms = unique(psms_cols)
unique_psms = unique_psms[!grepl("int", unique_psms)]
n_params = ncol(design_matrix)
n_conditions = length(unique_psms)
intercept_conditions = matrix(1, nrow = 1, ncol = n_params)
intercept_conditions[!grepl("int_[0-9]+", colnames(design_matrix))] = 0
constraint_matrix_full = matrix(0, nrow = n_conditions, ncol = n_params)
for (i in seq_along(unique_psms)) {
psm = unique_psms[i]
constraint_matrix_full[i, psms_cols == psm] = 1
}
constraint_matrix_full = rbind(constraint_matrix_full,
intercept_conditions)
n_intercepts = sum(grepl("int", cols))
positive_matrix = diag(1, n_params - n_intercepts)
positive_matrix = rbind(matrix(0, nrow = n_intercepts,
ncol = ncol(positive_matrix)),
positive_matrix)
positive_matrix = cbind(matrix(0, ncol = n_intercepts,
nrow = nrow(positive_matrix)),
positive_matrix)
if (weights_mode == "contributions") {
constraints_full = list(positive_matrix %*% params_full >= rep(0, n_params),
constraint_matrix_full %*% params_full == c(rep(1, n_conditions),
rep(0, nrow(constraint_matrix_full) - n_conditions)))
} else {
constraints_full = list(positive_matrix %*% params_full >= rep(0, n_params),
positive_matrix %*% params_full <= rep(1, n_params))
}
constraints_full
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.