Nothing
#' Stability selection metrics
#'
#' Computes the stability score (see \code{\link{StabilityScore}}) and
#' upper-bounds of the \code{\link{PFER}} and \code{\link{FDP}} from selection
#' proportions of models with a given parameter controlling the sparsity of the
#' underlying algorithm and for different thresholds in selection proportions.
#'
#' @inheritParams GraphicalModel
#' @param selprop array of selection proportions.
#' @param PFER_thr_blocks vector of block-specific thresholds in PFER for
#' constrained calibration by error control. If \code{PFER_thr=Inf} and
#' \code{FDP_thr=Inf}, unconstrained calibration is used.
#' @param FDP_thr_blocks vector of block-specific thresholds in the expected
#' proportion of falsely selected features (or False Discovery Proportion,
#' FDP) for constrained calibration by error control. If \code{PFER_thr=Inf}
#' and \code{FDP_thr=Inf}, unconstrained calibration is used.
#' @param Sequential_template logical matrix encoding the type of procedure to
#' use for data with multiple blocks in stability selection graphical
#' modelling. For multi-block estimation, the stability selection model is
#' constructed as the union of block-specific stable edges estimated while the
#' others are weakly penalised (\code{TRUE} only for the block currently being
#' calibrated and \code{FALSE} for other blocks). Other approaches with joint
#' calibration of the blocks are allowed (all entries are set to \code{TRUE}).
#' @param graph logical indicating if stability selection is performed in a
#' regression (if \code{FALSE}) or graphical (if \code{TRUE})
#' framework.
#' @param group vector encoding the grouping structure among predictors. This
#' argument indicates the number of variables in each group and only needs to
#' be provided for group (but not sparse group) penalisation.
#'
#' @return A list with: \item{S}{a matrix of the best (block-specific) stability
#' scores for different (sets of) penalty parameters. In multi-block stability
#' selection, rows correspond to different sets of penalty parameters, (values
#' are stored in the output "Lambda") and columns correspond to different
#' blocks.} \item{Lambda}{a matrix of (block-specific) penalty parameters. In
#' multi-block stability selection, rows correspond to sets of penalty
#' parameters and columns correspond to different blocks.} \item{Q}{a matrix
#' of average numbers of (block-specific) edges selected by the underlying
#' algorithm for different (sets of) penalty parameters. In multi-block
#' stability selection, rows correspond to different sets of penalty
#' parameters, (values are stored in the output "Lambda") and columns
#' correspond to different blocks.} \item{Q_s}{a matrix of calibrated numbers
#' of (block-specific) stable edges for different (sets of) penalty
#' parameters. In multi-block stability selection, rows correspond to
#' different sets of penalty parameters, (values are stored in the output
#' "Lambda") and columns correspond to different blocks.} \item{P}{a matrix of
#' calibrated (block-specific) thresholds in selection proportions for
#' different (sets of) penalty parameters. In multi-block stability selection,
#' rows correspond to different sets of penalty parameters, (values are stored
#' in the output "Lambda") and columns correspond to different blocks.}
#' \item{PFER}{a matrix of computed (block-specific) upper-bounds in PFER of
#' calibrated graphs for different (sets of) penalty parameters. In
#' multi-block stability selection, rows correspond to different sets of
#' penalty parameters, (values are stored in the output "Lambda") and columns
#' correspond to different blocks.} \item{FDP}{a matrix of computed
#' (block-specific) upper-bounds in FDP of calibrated stability selection
#' models for different (sets of) penalty parameters. In multi-block stability
#' selection, rows correspond to different sets of penalty parameters, (values
#' are stored in the output "Lambda") and columns correspond to different
#' blocks.} \item{S_2d}{an array of (block-specific) stability scores obtained
#' with different combinations of parameters. Rows correspond to different
#' (sets of) penalty parameters and columns correspond to different thresholds
#' in selection proportions. In multi-block stability selection, indices along
#' the third dimension correspond to different blocks.} \item{PFER_2d}{an
#' array of computed upper-bounds of PFER obtained with different combinations
#' of parameters. Rows correspond to different penalty parameters and columns
#' correspond to different thresholds in selection proportions. Not available
#' in multi-block stability selection graphical modelling.} \item{FDP_2d}{an
#' array of computed upper-bounds of FDP obtained with different combinations
#' of parameters. Rows correspond to different penalty parameters and columns
#' correspond to different thresholds in selection proportions. Not available
#' in multi-block stability selection graphical modelling.}
#'
#' @family stability metric functions
#'
#' @references \insertRef{ourstabilityselection}{sharp}
#'
#' \insertRef{stabilityselectionMB}{sharp}
#'
#' \insertRef{stabilityselectionSS}{sharp}
#'
#' @examples
#' ## Sparse or sparse group penalisation
#'
#' # Simulating set of selection proportions
#' set.seed(1)
#' selprop <- matrix(round(runif(n = 20), digits = 2), nrow = 2)
#'
#' # Computing stability scores for different thresholds
#' metrics <- StabilityMetrics(
#' selprop = selprop, pi = c(0.6, 0.7, 0.8),
#' K = 100, graph = FALSE
#' )
#'
#'
#' ## Group penalisation
#'
#' # Simulating set of selection proportions
#' set.seed(1)
#' selprop <- matrix(round(runif(n = 6), digits = 2), nrow = 2)
#' selprop <- cbind(
#' selprop[, 1], selprop[, 1],
#' selprop[, 2], selprop[, 2],
#' matrix(rep(selprop[, 3], each = 6), nrow = 2, byrow = TRUE)
#' )
#'
#' # Computing stability scores for different thresholds
#' metrics <- StabilityMetrics(
#' selprop = selprop, pi = c(0.6, 0.7, 0.8),
#' K = 100, graph = FALSE, group = c(2, 2, 6)
#' )
#' @export
StabilityMetrics <- function(selprop, pk = NULL, pi_list = seq(0.6, 0.9, by = 0.01),
K = 100, n_cat = NULL,
PFER_method = "MB", PFER_thr_blocks = Inf, FDP_thr_blocks = Inf,
Sequential_template = NULL, graph = TRUE, group = NULL) {
if (graph) {
nlambda <- dim(selprop)[3]
} else {
nlambda <- nrow(selprop)
}
# Extracting pk
if (is.null(pk)) {
pk <- ncol(selprop)
}
if (is.null(Sequential_template)) {
Sequential_template <- matrix(TRUE, nrow = nlambda, ncol = 1)
}
# Creating matrix with block indices
nblocks <- 1
if (graph) { # to avoid memory issues in high dimensional variable selection
bigblocks <- BlockMatrix(pk)
nblocks <- length(pk) * (length(pk) + 1) / 2
bigblocks_vect <- factor(bigblocks[upper.tri(bigblocks)], levels = seq_len(nblocks))
N_blocks <- unname(table(bigblocks_vect))
blocks <- levels(bigblocks_vect)
names(N_blocks) <- blocks
}
# Initialising objects to be filled
Q <- Q_s <- P <- matrix(NA, nrow = nlambda, ncol = nblocks)
best_loglik <- best_PFER <- best_FDP <- matrix(NA, nrow = nlambda, ncol = nblocks)
if (nblocks == 1) {
loglik <- PFER <- FDP <- matrix(NA, ncol = length(pi_list), nrow = nlambda)
} else {
loglik <- array(NA, dim = c(nlambda, length(pi_list), nblocks))
}
# Computing the metrics for each value of lambda
for (k in seq_len(nlambda)) {
# Extracting corresponding selection proportions
if (graph) {
stab_iter <- selprop[, , k]
} else {
stab_iter <- selprop[k, ]
}
# Computing stability score with block-specific pi
for (block_id in seq_len(nblocks)) {
if (Sequential_template[k, block_id]) {
if (graph) {
stab_iter_block <- stab_iter[(bigblocks == block_id) & (upper.tri(bigblocks))] # selection proportions in the block
} else {
stab_iter_block <- stab_iter
}
# Using group penalisation (extracting one per group)
if (!is.null(group)) {
stab_iter_block <- stab_iter_block[cumsum(group)]
}
q_block <- round(sum(stab_iter_block, na.rm = TRUE)) # average number of edges selected by the original procedure in the block
Q[k, block_id] <- q_block
N_block <- length(stab_iter_block) # maximum number of edges in the block
tmp_loglik <- tmp_PFERs <- tmp_FDPs <- rep(NA, length(pi_list))
# Computing error rates and stability score for different values of pi
for (j in seq_len(length(pi_list))) {
pi <- pi_list[j]
tmp_PFERs[j] <- PFER(q = q_block, pi = pi, N = N_block, K = K, PFER_method = PFER_method)
tmp_FDPs[j] <- FDP(selprop = stab_iter_block, PFER = tmp_PFERs[j], pi = pi)
if ((tmp_PFERs[j] <= PFER_thr_blocks[block_id]) & (tmp_FDPs[j] <= FDP_thr_blocks[block_id])) {
# Computing stability score (group penalisation is accounted for above so no need here)
if (is.null(n_cat)) {
theta <- ifelse(stab_iter_block >= pi, yes = 1, no = 0)
tmp_loglik[j] <- ConsensusScore(prop = stab_iter_block, K = K, theta = theta)
} else {
tmp_loglik[j] <- StabilityScore(selprop = stab_iter_block, pi_list = pi, K = K, n_cat = n_cat, group = NULL)
}
}
}
# Storing stability score in a matrix if only one block
if (nblocks == 1) {
loglik[k, ] <- tmp_loglik
PFER[k, ] <- tmp_PFERs
FDP[k, ] <- tmp_FDPs
} else {
loglik[k, , block_id] <- tmp_loglik
}
# Keeping best stability score and other parameters at the max
if (any(!is.na(tmp_loglik))) {
tmp_loglik[is.na(tmp_loglik)] <- 0
# Extracting the best pi (highest threshold if multiple choices)
myid <- which(tmp_loglik == max(tmp_loglik, na.rm = TRUE))
myid <- max(myid)
# Extracting corresponding metrics
tmp_loglik[which(tmp_loglik == 0)] <- NA
best_loglik[k, block_id] <- tmp_loglik[myid]
P[k, block_id] <- pi_list[myid]
Q_s[k, block_id] <- sum(stab_iter_block >= pi_list[myid], na.rm = TRUE)
best_PFER[k, block_id] <- tmp_PFERs[myid]
best_FDP[k, block_id] <- tmp_FDPs[myid]
}
}
}
}
best_loglik_blocks <- best_loglik
best_loglik <- matrix(apply(best_loglik, 1, sum), ncol = 1)
if (nblocks == 1) {
return(list(
S = best_loglik_blocks,
Q = Q, Q_s = Q_s, P = P,
PFER = best_PFER, FDP = best_FDP,
S_2d = loglik, PFER_2d = PFER, FDP_2d = FDP
))
} else {
return(list(
S = best_loglik_blocks,
Q = Q, Q_s = Q_s, P = P,
PFER = best_PFER, FDP = best_FDP,
S_2d = loglik
))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.