# mage R package
# This file contains functions to:
# 1. Compute the table of scores (both asso. and charac. scores)
# 2. Determine the p-values of MIC Scores
# 3. Adjust p-values using FDR methodology
#' @title compute_scores
#'
#' @description
#' Computes the table of association and characterization scores
#'
#' @param x matrix: a gene expression matrix
#' (rownames corresponding to genes and colnames to cells)
#'
#' @param n.cores integer: nb of cores to be used for parallel processing
#'
#' @return
#' Returns a table with the following columns:
#' \describe{
#' \item{1:2}{Names of the two genes whose assocation is being assessed}
#' \item{3}{MIC score, \emph{i.e} the strength of the association between the 2 genes}
#' \item{4:n}{Characterization scores, informing about the "shape" of the association}
#' }
#'
#' @importFrom utils combn
#' @importFrom stats cor p.adjust
#' @importFrom data.table data.table
compute_scores <- function(x,
n.cores = 1) {
n_genes <- nrow(x);
# Get unique pairs
# ----------------------------------------------------------------------
pairs <- t(combn(nrow(x), 2));
n_pairs <- nrow(pairs);
pairs_name <- paste(rownames(x)[pairs[, 1]],
rownames(x)[pairs[, 2]],
sep = "-");
pairs_ix <- 1:n_pairs;
# Compute MINE scores
# ----------------------------------------------------------------------
cat(" * Compute the MINE scores for all pairs of variables ...\n");
cat(" this may take quite some time ...\n");
MINE_sym_mats <- minerva::mine(t(x), master = 1:n_genes,
n.cores = n.cores,
alpha = 0.6, C = 15);
n_scores <- length(MINE_sym_mats);
scores_name <- names(MINE_sym_mats);
# Aggregate the symetric matrices into a global table
# where each row is a unique association
MINE_mat <- matrix(nrow = n_pairs, ncol = n_scores);
colnames(MINE_mat) <- scores_name;
for(i in 1:n_scores) {
MINE_mat[, i] <- unlist(vapply(pairs_ix,
function(ix)
as.list(MINE_sym_mats[[i]][pairs[ix, 1], pairs[ix, 2]]),
FUN.VALUE = c(numeric)));
}
# Compute Pearson and Spearman Correlations as well
# ----------------------------------------------------------------------
cat(" * Compute the Pearson Correlation for all pairs of variables ...\n");
PEARSON_cor <- unlist(vapply(pairs_ix,
function(ix) as.list(cor(x[pairs[ix, 1], ],
x[pairs[ix, 2], ],
method = "pearson")),
FUN.VALUE = c(numeric)));
cat(" * Compute the Spearman Correlation for all pairs of variables ...\n");
SPEARMAN_cor <- unlist(vapply(pairs_ix,
function(ix) as.list(cor(x[pairs[ix, 1], ],
x[pairs[ix, 2], ],
method = "spearman")),
FUN.VALUE = c(numeric)));
# Sort MINE_mat by decreasing order of MIC Score
# ----------------------------------------------------------------------
MINE_table <- data.frame(MINE_mat);
sort_ix <- sort(unlist(MINE_table[,1]),
decreasing = T, index.return = T)$ix;
# Write final table, assign customed column names
# ----------------------------------------------------------------------
final_table <- data.table(`X gene` = rownames(x)[pairs[sort_ix, 1]],
`Y gene` = rownames(x)[pairs[sort_ix, 2]],
`MIC (strength)` = MINE_table[sort_ix, ]$`MIC`,
`MIC-p^2 (nonlinearity)` = MINE_table[sort_ix, ]$`MICR2`,
`MAS (non-monotonicity)` = MINE_table[sort_ix, ]$`MAS`,
`MEV (functionality)` = MINE_table[sort_ix, ]$`MEV`,
`MCN (complexity)` = MINE_table[sort_ix, ]$`MCN`,
`PEARSON p (linear correlation)` = PEARSON_cor[sort_ix],
`SPEARMAN rho (rank correlation)` = SPEARMAN_cor[sort_ix]);
return(final_table);
}
#' @title assign_pval
#'
#' @description
#' Assigns p-values to MIC scores
#'
#' @param MIC_scores numeric vector of MIC scores
#' @param nb_cells nb of cells in the gene expression matrix
#'
#' @return
#' Returns a vector of p-values
#'
#' @details
#' Since the p-value of a given MIC score cannot be computed
#' by a simple mathemical function, \code{assigns_pval} reads
#' the p-value table generated by the authors of the MINE
#' statistics (see \url{http://www.exploredata.net/Downloads/P-Value-Tables})
#' at the closest \emph{sample_size} to \emph{nb_cells}.
#'
#' For each \emph{MIC score}, \code{assign_pval} will then
#' retrieve the corresponding p-value from this table.
#'
assign_pval <- function(MIC_scores,
nb_cells) {
MIC_scores <- round(MIC_scores, 3);
# Get indexes of each unique value in the MIC score vector
# ----------------------------------------------------------------------
uniq_MIC_scores <- unique(MIC_scores);
uniq_MIC_scores_ix <- list();
for(i in 1:length(uniq_MIC_scores)) {
uniq_MIC_scores_ix[[i]] <- which(MIC_scores == uniq_MIC_scores[i]);
}
# Get appropriate pval_tab:
# sample_size must be as close as possible to nb_cells
# ----------------------------------------------------------------------
pval_data <- load(system.file("R/sysdata.rda", package = "mage"));
closest_sample_size_ix <- which.min(abs(as.integer(sample_size) - nb_cells));
pval_tab <- pval_tables[[closest_sample_size_ix]];
# Print the absolute difference btw nb_cells and sample_size
# ----------------------------------------------------------------------
cat(" * Find pval table with the n closest to nb_cells\n");
cat(sprintf(" abs(sample_size - nb_cells) = %d\n",
abs(as.integer(sample_size[closest_sample_size_ix]) - nb_cells)));
cat(" If this difference is too big, consider ignoring these p-values ...\n");
# Get the boundaries of the table
# ----------------------------------------------------------------------
pval_tab_max_MIC <- pval_tab[1, 1];
pval_tab_min_MIC <- pval_tab[nrow(pval_tab), 1];
most_signif_MIC_ix <- which(uniq_MIC_scores > c(pval_tab_max_MIC));
least_signif_MIC_ix <- which(uniq_MIC_scores < c(pval_tab_min_MIC));
# Get the p-value assigned to any MIC written in pval_tab
# ----------------------------------------------------------------------
pval_tab_MIC_scores <- round(pval_tab[,1], 3);
pval_tab_uniq_MIC_scores <- unique(pval_tab_MIC_scores);
pval_tab_uniq_MIC_scores_pval <- rep(0, length(pval_tab_uniq_MIC_scores));
for(i in 1:length(pval_tab_uniq_MIC_scores)) {
pval_tab_uniq_MIC_scores_pval[i] <-
round(pval_tab[which(pval_tab_MIC_scores == pval_tab_uniq_MIC_scores[i])[1], 2], 3);
}
pval_tab_uniq_MIC_scores_pval <- unlist(pval_tab_uniq_MIC_scores_pval);
# Use these references to assign a p-value to each MIC score
# present in the MIC_scores vector
# ----------------------------------------------------------------------
p_values <- rep(1, length(MIC_scores)); # Any unassigned MIC will have a p-value of 1
first_i = length(most_signif_MIC_ix) + 1;
last_i = length(uniq_MIC_scores) - length(least_signif_MIC_ix) + 1;
for(i in first_i:last_i) {
cur_ix <- which.min(
abs(pval_tab_uniq_MIC_scores
- uniq_MIC_scores[i]));
p_values[uniq_MIC_scores_ix[[i]]] <- pval_tab_uniq_MIC_scores_pval[cur_ix];
}
# Assign p-value = 0 to any MIC > max(MIC) present in pval_tab
# ----------------------------------------------------------------------
if(length(most_signif_MIC_ix > 0)) {
for(i in 1:length(most_signif_MIC_ix)) {
p_values[uniq_MIC_scores_ix[[most_signif_MIC_ix[i]]]] <- 0;
}
}
return(p_values);
}
#' @title adjust_pval
#'
#' @description
#' Adjust p-values using FDR method (Benjamini & Hochberg, 1995)
#'
#' @param pval numeric vector of p-values
#'
#' @return
#' Returns a numeric vector of adjusted p-values
#'
adjust_pval <- function(pval) {
adjusted_pval <- p.adjust(pval, method = "BH");
return(adjusted_pval);
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.