Nothing
#' Multi-Omic integration via Sparse Singular value decomposition.
#'
#' This function integrates omic blocks to perform sparse singular
#' value decomposition (SVD), non-linear embedding, and/or cluster
#' analysis. Both supervised and unsupervised methods are supported.
#' In both cases, if multiple omic blocks are used as predictors, they
#' are concatenated and normalized to form an 'extended' omic matrix 'X'
#' (Gonzalez-Reymundez and Vazquez, 2020). Supervised analysis can be
#' obtained by indicating which omic block defines a multivariate
#' response 'Y'. Each method within MOSS returns a matrix 'B',
#' which form depends on the technique used
#' (e.g. B = X in pca; B = X'Y, for pls; B = (X'X)^-X'Y, for lrr).
#' A sparse SVD of matrix B is then obtained to summarize the variability
#' among samples and features in terms of latent factors.
#'
#' Once 'dense' solutions are found (the result of SVD on a matrix B),
#' the function ssvdEN_sol_path is called to perform sparse SVD (sSVD)
#' on a grid of possible degrees of sparsity (nu),
#' for a possible value of the elastic net parameter (alpha).
#' The sSVD is performed using the algorithm of Shen and Huang (2008),
#' extended to include Elastic Net type of regularization.
#' For one latent factor (rank 1 case), the algorithm finds vectors u
#' and v' and scalar d that minimize:
#' \tabular{rl}{
#' ||B-d* uv'||^2 +
#' lambda(nu_v)(alpha_v||v'||_1 +
#' (1-alpha_v)||v'||^2) + lambda(nu_u)(alpha_u||u||_1 +
#' (1-alpha_u)||u||^2)
#' }
#'
#' such that ||u|| = 1.
#' The right Eigenvector is obtained from v / ||v|| and the
#' corresponding d from u'Bv.
#' The element lambda(nu_.) is a monotonically decreasing function of
#' nu_. (the number of desired element different from zero)
#' onto positive real numbers, and alpha_. is any number between
#' zero and one
#' balancing shrinking and variable selection.
#' Selecting degree of sparsity: The function allows to tune the
#' degree of sparsity using an ad-hoc method based on
#' the one presented in Shen & Huang (2008, see reference) and
#' generalized for tuning both nu_v and nu_u.
#' This is done by exploring the proportion of
#' explained variance (PEV) on a grid of possible values.
#' Drastic and/or steep
#' changes in the PEV trajectory across degrees of sparsity are
#' used for automatic selection
#' (see help for the function ssvdEN_sol_path).
#' By imposing the additional assumption of omic blocks
#' being conditionally independent, each multivariate technique can
#' be extended using a 'multi-block' approach, where the
#' contribution of each omic block to the total (co)variance is
#' addressed.
#' When response Y is a character column matrix,
#' with classes or categories by subject,
#' each multivariate technique can be extended to perform
#' linear discriminant analysis.
#'
#' @note
#' \enumerate{
#' \item The function does not return PEV for EN parameter
#' (alpha_v and/or alpha_u), the user needs to provide
#' a single value for each.
#' \item When number of PC index > 1,
#' columns of T might not be orthogonal.
#' \item Although the user is encouraged to
#' perform data projection and
#' cluster separately, MOSS allows to do this automatically.
#' However, both tasks might require further tuning than the
#' provided by default, and computations could become cumbersome.
#' \item Tuning of degrees of sparsity is done heuristically
#' on training set. In our experience, this results in
#' high specificity, but rather low sensitivity.
#' (i.e. too liberal cutoffs, as compared with
#' extensive cross-validation on testing set).
#' \item When 'method' is an unsupervised technique,
#' 'K.X' is the number of
#' latent factors returned and used in further analysis.
#' When 'method' is a supervised technique,
#' 'K.X' is used to perform a SVD
#' to facilitate the product of large matrices and inverses.
#' \item If 'K.X' (or 'K.Y') equal 1, no plots are returned.
#' \item Although the degree of sparsity maps onto number of
#' features/subjects for Lasso, the user needs to be aware that this
#' conceptual correspondence
#' is lost for full EN (alpha belonging to (0, 1);
#' e.g. the number of features selected with alpha < 1 will
#' be eventually larger than the optimal degree of sparsity).
#' This allows to rapidly increase the number of
#' non-zero elements
#' when tuning the degrees of sparsity.
#' In order to get exact values for the degrees of sparsity
#' at subjects
#' or features levels, the user needs to
#' set the value of 'exact.dg' parameter from 'FALSE'
#' (the default) to 'TRUE'.
#' }
#' @param data.blocks List containing omic blocks of class 'matrix' or
#' 'FBM'. In
#' each block, rows represent subjects and columns features.
#' @param method Multivariate method. Character.
#' Defaults to 'pca'. Possible options are pca, mbpca, pca-lda,
#' mbpca-lda, pls,
#' mbpls, pls-lda, mbpls-lda, lrr, mblrr, lrr-lda, mblrr-lda.
#' @param resp.block
#' What block should be used as response? Integer.
#' Only used when the specified
#' method is supervised.
#' @param K.X Number of principal components for predictors.
#' Integer. Defaults to 5.
#' @param K.Y Number of responses PC index when method is
#' supervised. Defaults to K.X.
#' @param verbose Should we print messages? Logical.
#' Defaults to TRUE.
#' @param nu.parallel Tuning degrees of sparsity in parallel. Defaults to FALSE.
#' @param nu.u A grid with
#' increasing integers representing degrees of sparsity for
#' left Eigenvectors.
#' Defaults to NULL.
#' @param nu.v Same but for right Eigenvectors. Defaults to NULL.
#' @param alpha.u Elastic Net parameter for left Eigenvectors.
#' Numeric between 0
#' and 1. Defaults to 1.
#' @param alpha.v Elastic Net parameter for right
#' Eigenvectors. Numeric between 0 and 1. Defaults to 1.
#' @param cluster Arguments
#' passed to the function tsne2clus as a list. Defaults to FALSE.
#' If cluster=TRUE,
#' default parameters are used (eps_range=c(0,4), eps_res=100).
#' @param clus.lab A
#' vector of same length than number of subjects with labels used to
#' visualize clusters. Factor.
#' Defaults to NULL.
#' When sparsity is imposed on the left
#' Eigenvectors, the association between non-zero loadings and
#' labels' groups is shown by a Chi-2 statistics for each pc.
#' When sparsity is not imposed, the association between
#' labels and PC
#' is addressed by a Kruskal-Wallis statistics.
#' @param tSNE Arguments passed to the function pca2tsne as a list.
#' Defaults to
#' FALSE. If tSNE=T, default parameters are used
#' (perp=50,n.samples=1,n.iter=1e3).
#' @param axes.pos PC index used for tSNE.
#' Defaults to 1 : K.Y. Used only when tSNE
#' is different than NULL.
#' @param covs Covariates which effect we wish to adjust for. Accepts matrix,
#' data.frame, numeric, or character vectors.
#' @param scale.arg Should the omic blocks be centered and
#' scaled? Logical. Defaults to TRUE.
#' @param norm.arg Should omic blocks be
#' normalized? Logical. Defaults to TRUE.
#' @param plot Should results be plotted?
#' Logical. Defaults to FALSE.
#' @param approx.arg Should we use standard SVD or
#' random approximations? Defaults to FALSE.
#' If TRUE and at least one block is of
#' class 'matrix', irlba is called. If TRUE & is(O,'FBM')==TRUE,
#' big_randomSVD is called.
#' @param exact.dg Should we compute exact degrees of sparsity? Logical.
#' Defaults to FALSE. Only relevant When alpha.s or alpha.f are in
#' the (0,1)
#' interval and exact.dg = TRUE.
#' @param use.fbm Should we treat omic blocks as
#' Filed Backed Matrix (FBM)? Logical. Defaults to FALSE.
#' @param lib.thresh Should we use a liberal or conservative
#' threshold to tune degrees of sparsity? Logical. Defaults to TRUE.
#' @return Returns a list with the results of the sparse SVD.
#' If \emph{plot}=TRUE, a series of plots is generated as well.
#' \itemize{
#' \item \emph{\strong{B:}} The object of the (sparse) SVD.
#' Depending of the method used,
#' B can be a extended matrix of normalized omic blocks,
#' a variance-covariance matrix,
#' or a matrix of regression coefficients.
#' If at least one of the blocks in 'data.blocks' is of class FBM,
#' is(B,'FBM') is TRUE.
#' Otherwise, is(B,'matrix') is TRUE.
#' \item \emph{\strong{Q:}} Matrix with the SVD projections at the
#' level of subjects.
#' \item \emph{\strong{selected_items:}} List containing the position, name, and loadings
#' of selected features and subjects by latent dimension.
#' if 'plot=TRUE', a scatterplot is displayed, where the x-axis
#' represents the latent dimensions, the y-axis the total number
#' of features selected in log scale, and each point is a pie chart
#' showing the relative contribution of each omic to the number of
#' features selected. The radio of the pie-chart represents the
#' coefficient of variation among squared loadings
#' (mean squared loadings divided by their standard deviation)
#' \item \emph{\strong{dense:}} A list containing the results of the
#' dense SVD.\itemize{
#' \item \strong{u:} Matrix with left Eigenvectors.
#' \item \strong{v:} Matrix with right Eigenvectors.
#' \item \strong{d:} Matrix with singular values.
#' }
#' \item \emph{\strong{sparse:}} A list containing the results of the
#' sparse SVD.
#' \itemize{
#' \item \strong{u:} Matrix with left Eigenvectors.
#' \item \strong{v:} Matrix with right Eigenvectors.
#' \item \strong{d:} Matrix with singular values.
#' \item \strong{opt_dg_v} Selected degrees of sparsity for
#' right Eigenvectors.
#' \item \strong{opt_dg_u:} Selected degrees of sparsity for
#' left Eigenvectors.
#' }
#' \item Graphical displays: Depending on the values in 'plot',
#' 'tSNE','cluster',
#' and 'clus.lab' arguments, the following ggplot objects can be
#' obtained.
#' They contain:\itemize{
#' \item \strong{scree_plot:} Plots of Eigenvalues and their
#' first and
#' second order empirical derivatives along PC indexes.
#' \item \strong{tun_dgSpar_plot:} Plots with the PEV trajectory,
#' as well as
#' its first and second empirical derivatives along the degrees of
#' sparsity path.
#' \item \strong{PC_plot:} Plot of the first principal
#' components according to axes.pos. By default the first two
#' are plotted.
#' \item \strong{tSNE_plot:} Plot with the tSNE mapping onto
#' two dimensions.
#' \item \strong{clus_plot:} The output of function tsne2clus.
#' \item \strong{subLabels_vs_PCs:} Plot of the Kruskal-Wallis
#' (or Chi-square)
#' statistics of the association test between PC
#' (or selected subjects) and
#' pre-established subjects groups.
#' \item \strong{clusters_vs_PCs:} Plot of the Kruskal-Wallis
#' (or Chi-square)
#' statistics of the association test between PC
#' (or selected subjects) and
#' detected clusters.
#' }
#'
#' }
#' @references \itemize{
#' \item Gonzalez-Reymundez, and Vazquez. 2020. Multi-omic Signatures
#' identify pan-cancer classes of tumors beyond tissue of origin.
#' Scientific Reports 10 (1):8341
#' \item Shen, Haipeng, and Jianhua Z. Huang. 2008.
#' Sparse Principal Component
#' Analysis via Regularized Low Rank Matrix approximation.
#' Journal of Multivariate Analysis 99 (6). Academic Press: 1015_34.
#' \item Baglama, Jim, Lothar Reichel, and B W Lewis. 2018.
#' Irlba: Fast Truncated Singular Value Decomposition and
#' Principal Components Analysis for Large Dense and
#' Sparse Matrices.
#' \item Taskesen, Erdogan, Sjoerd M. H. Huisman, Ahmed Mahfouz,
#' Jesse H. Krijthe, Jeroen de Ridder, Anja van de Stolpe,
#' Erik van den Akker, Wim Verheagh, and Marcel J. T. Reinders. 2016.
#' Pan-Cancer Subtyping in a 2D-Map Shows Substructures
#' That Are Driven by Specific Combinations of Molecular
#' Characteristics. Scientific Reports 6 (1):24949.
#' \item van der Maaten L, Hinton G. Visualizing Data using t-SNE.
#' J Mach Learn Res. 2008;9: 2579–2605
#' }
#' @export
#' @examples
#' # Example1: sparse PCA of a list of omic blocks.
#' library("MOSS")
#' sim_data <- simulate_data()
#' set.seed(43)
#'
#' # Extracting simulated omic blocks.
#' sim_blocks <- sim_data$sim_blocks
#'
#' # Extracting subjects and features labels.
#' lab.sub <- sim_data$labels$lab.sub
#' lab.feat <- sim_data$labels$lab.feat
#' out <- moss(sim_blocks[-4],
#' method = "pca",
#' nu.v = seq(1, 200, by = 100),
#' nu.u = seq(1, 100, by = 50),
#' alpha.v = 0.5,
#' alpha.u = 1
#' )
#' \donttest{
#' library(ggplot2)
#' library(ggthemes)
#' library(viridis)
#' library(cluster)
#' library(fpc)
#'
#' set.seed(43)
#'
#' # Example2: sparse PCA with t-SNE, clustering, and association with
#' # predefined groups of subjects.
#' out <- moss(sim_blocks[-4],axes.pos=c(1:5),
#' method = "pca",
#' nu.v = seq(1, 200, by = 10),
#' nu.u = seq(1, 100, by = 2),
#' alpha.v = 0.5,
#' alpha.u = 1,
#' tSNE = TRUE,
#' cluster = TRUE,
#' clus.lab = lab.sub,
#' plot = TRUE
#' )
#'
#' # This shows clusters obtained with labels from pre-defined groups
#' # of subjects.
#' out$clus_plot
#'
#' # This shows the statistical overlap between PCs and the pre-defined
#' # groups of subjects.
#' out$subLabels_vs_PCs
#'
#' # This shows the contribution of each omic to the features
#' # selected by PC index.
#' out$selected_items
#'
#' # This shows features forming signatures across clusters.
#' out$feat_signatures
#'
#' # Example3: Multi-block PCA with sparsity.
#' out <- moss(sim_blocks[-4],axes.pos=1:5,
#' method = "mbpca",
#' nu.v = seq(1, 200, by = 10),
#' nu.u = seq(1, 100, by = 2),
#' alpha.v = 0.5,
#' alpha.u = 1,
#' tSNE = TRUE,
#' cluster = TRUE,
#' clus.lab = lab.sub,
#' plot = TRUE
#' )
#' out$clus_plot
#'
#' # This shows the 'weight' each omic block has on the variability
#' # explained by each PC. Weights in each PC add up to one.
#' out$block_weights
#'
#' # Example4: Partial least squares with sparsity (PLS).
#' out <- moss(sim_blocks[-4],axes.pos=1:5,
#' K.X = 500,
#' K.Y = 5,
#' method = "pls",
#' nu.v = seq(1, 100, by = 2),
#' nu.u = seq(1, 100, by = 2),
#' alpha.v = 1,
#' alpha.u = 1,
#' tSNE = TRUE,
#' cluster = TRUE,
#' clus.lab = lab.sub,
#' resp.block = 3,
#' plot = TRUE
#' )
#' out$clus_plot
#'
#' # Get some measurement of accuracy at detecting features with signal
#' # versus background noise.
#' table(out$sparse$u[, 1] != 0, lab.feat[1:2000])
#' table(out$sparse$v[, 1] != 0, lab.feat[2001:3000])
#'
#' # Example5: PCA-LDA
#' out <- moss(sim_blocks,
#' method = "pca-lda",
#' cluster = TRUE,
#' resp.block = 4,
#' clus.lab = lab.sub,
#' plot = TRUE
#' )
#' out$clus_plot
#' }
#'
moss <- function(data.blocks, scale.arg = TRUE, norm.arg = TRUE,
method = "pca", resp.block = NULL,covs=NULL,
K.X = 5, K.Y = K.X, verbose = TRUE, nu.parallel = FALSE,
nu.u = NULL, nu.v = NULL,
alpha.u = 1, alpha.v = 1, plot = FALSE, cluster = FALSE,
clus.lab = NULL, tSNE = FALSE,
axes.pos = seq_len(K.Y),approx.arg = FALSE,
exact.dg = FALSE, use.fbm = FALSE,lib.thresh = TRUE) {
# Inputs need to be a list of data matrices.
if (!is.list(data.blocks)) stop("Input has to be a list with omic
blocks.")
# Checking if the right packages are present for plotting.
if (plot == TRUE) {
if (!requireNamespace("viridis", quietly = TRUE)) {
stop("Package 'viridis' needs to be installed for graphical
displays.")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("Package 'ggplot2' needs to be installed for graphical
displays.")
}
if (!requireNamespace("ggpmisc", quietly = TRUE)) {
stop("Package 'ggpmisc' needs to be installed for showing peaks
on the PEV trajectory.")
}
if (!requireNamespace("ggthemes", quietly = TRUE)) {
stop("Package 'ggthemes' need to be installed for graphical
displays.")
}
}
# Only objects of class 'matrix' or 'FBM' are accepted.
if (any(vapply(data.blocks, function(X) {
any(vapply(
c("matrix", "FBM", "array"), function(x) inherits(X, x),
TRUE
))
}, TRUE) == FALSE)) {
stop("Elements in data.blocks
have to be 'array', 'matrix',or 'FBM' objects")
}
# Number of data blocks.
M <- length(data.blocks)
# Only one core for big_svd calculationsw within moss.
ncores <- 1
# Is tSNE being passed as logical?
if (is.logical(tSNE) && (tSNE == FALSE)) tSNE <- NULL
# Is cluster being passed as logical?
if (is.logical(cluster) && (cluster == FALSE)) cluster <- NULL
# Always plot results if any of these conditions is met.
if (is.null(tSNE) == FALSE | is.null(cluster) == FALSE |
is.null(clus.lab) == FALSE) {
plot <- TRUE
}
# Checking dimnames.
if (grepl(method, pattern = "-lda")) dim.names <-
lapply(data.blocks[-resp.block],dimnames)
else dim.names <-
lapply(data.blocks,dimnames)
# Get features names.
feature.labels <- lapply(dim.names,function(x) x[[2]])
if (Reduce("+", lapply(dim.names, function(x) is.null(x[[1]]))) > 1) {
warning("Row names missing for at least one omic block.")
}
else {
if (M > 1) {
for (l in seq(1, M)) {
if(!all.equal(dim.names[[1]][[1]],dim.names[[l]][[1]])) {
warning("Row names across omic blocks are inconsistent.")
}
}
}
}
# Checking number of rows.
n <- unlist(lapply(data.blocks, nrow))
if (length(unique(n)) > 1) stop("Different number of rows across omics.")
else n <- unique(n)
if (sum(vapply(data.blocks, inherits, TRUE, what = "FBM")) == M)
warning("We cannot check consistency among rows names.
Make sure rows across omic blocks represent the same
subjects/samples.")
# Turning omic blocks into FBM
if (use.fbm == TRUE) {
if (!requireNamespace("bigstatsr", quietly = TRUE)) {
stop("Package 'bigstatsr' needs to be installed to
handle FBM objects.")
} else {
if (verbose) message("Turning omic blocks into FBM objects.")
if (grepl(method, pattern = "-lda")) {
data.blocks[-resp.block] <- lapply(
data.blocks[-resp.block],
bigstatsr::as_FBM
)
}
if (!grepl(method, pattern = "-lda")) {
data.blocks <- lapply(data.blocks, bigstatsr::as_FBM)
}
}
}
# Checking the class of each data block.
block.class <- rep("matrix", M)
block.class[vapply(data.blocks, inherits, TRUE, what = "FBM")] <- "FBM"
# Printing method's name.
c(
"Low Rank Regression (LRR)",
"Partial Least Squares (PLS)",
"Multi-Block Low Rank Regression (MBLRR)",
"Multi-Block Partial Least Squares (MBPLS)",
"Principal Components Analysis (PCA)",
"Multi-Block Principal Components Analysis (MBPCA)",
"Principal Components Analysis - Linear Discriminat Analysis
(PCA-LDA)",
"Multi-Block Principal Components Analysis -
Linear Discriminant Analysis
(MBPCA-LDA)",
"Partial Least Squares - Linear Discriminant Analysis (PLS-LDA)",
"Multi-Block Partial Least Squares - Linear Discriminant Analysis
(MBPLS-LDA)",
"Low Rank Regression - Linear Discriminant Analysis (LRR-LDA)",
"Multi-Block Low Rank Regression - Linear Discriminant Analysis
(MBLRR-LDA)"
)[
c(
"lrr", "pls", "mblrr", "mbpls", "pca", "mbpca", "pca-lda", "mbpca-lda",
"pls-lda",
"mbpls-lda", "lrr-lda", "mblrr-lda"
) == method
] -> method.name
title. <- paste0(
"| ",
method.name,
" for ",
M,
ifelse(M == 1,
" data block and ",
" data blocks and "
),
K.Y,
ifelse(M == 1,
" latent factor |",
" latent factors |"
)
)
if (verbose) {
message(rep("-", nchar(title.)))
message(title.)
message(rep("-", nchar(title.)))
}
# Checking if the right packages are present to handle
# approximated SVDs.
if (approx.arg == TRUE) {
if (any(block.class == "FBM")) {
if (!requireNamespace("bigstatsr", quietly = TRUE)) {
stop("Package 'bigstatsr' needs to be installed to handle
FBM objects.")
}
}
else {
if (!requireNamespace("irlba", quietly = TRUE)) {
stop("Package 'irlba' needs to be installed
to get fast truncated SVD solutions.")
}
}
}
# Checking if the right packages are present for Rtsne.
if (is.null(tSNE) == FALSE) {
if (!requireNamespace("Rtsne", quietly = TRUE)) {
stop("Package 'Rtsne' needs to be installed
to generate t-SNE projections.")
}
}
# Checking if the right packages are present for dbscan.
if (is.null(cluster) == FALSE) {
if (!requireNamespace("dbscan",
quietly = TRUE
)) {
stop("Package 'dbscan' needs to be installed for clustering.")
}
}
# Available methods.
if (!any(method %in%
c(
"pca", "mbpca", "pls", "mbpls", "lrr", "mblrr", "pca-lda",
"mbpca-lda",
"pls-lda", "mbpls-lda", "lrr-lda", "mblrr-lda"
))) {
stop(
"Method ",
method,
" not supported.
Try one of these: pca, mbpca, pls, mbpls, lrr, mblrr, pca-lda,
mbpca-lda, pls-lda, mbpls-lda, lrr-lda, or mblrr-lda."
)
}
# At least two latent factor for tSNE.
if (is.null(tSNE) == FALSE & K.Y < 2) {
stop("Number of latent factors needs to be larger or equal than 2
for tSNE projection.")
}
# Checking for missing data.
if (verbose) message("Checking for missing values
Imputing if necessary.")
if (grepl(method, pattern = "-lda")) data.blocks[-resp.block] <-
lapply(data.blocks[-resp.block], prepro_na)
else data.blocks <- lapply(data.blocks, prepro_na)
# Should we adjust for a list of covariates?
if(!is.null(covs)) {
if (verbose) message("Adjusting for covariates effects.\n")
if (grepl(method, pattern = "-lda")) {
data.blocks[-resp.block] <- cov_adj(data.blocks[-resp.block],
covs,
n,
dim.names)
}
if (!grepl(method, pattern = "-lda")) {
data.blocks <- cov_adj(data.blocks,
covs,
n,
dim.names)
}
}
# If method is a supervised one,
# the first response block is chosen: Y = data.blocks[[1]]
if (!any(method %in% c("pca", "mbpca"))) {
if (is.null(resp.block)) {
warning("If not specified, the first data block in the list
will be used as response!")
resp.block <- 1
}
else {
# The response block needs to correspond to one element in the list
if (!any(resp.block %in% seq_len(length(data.blocks)))) {
stop("resp.block needs to be an integer between 1 and
the total number of data blocks")
} else {
data.blocks <-
data.blocks[c(
resp.block,
seq_len(length(data.blocks))[-resp.block]
)]
message("Block ", resp.block, " used as response.")
}
}
}
# Naming data blocks if necessary.
if (is.null(names(data.blocks))) {
names(data.blocks) <- paste(
"Block",
seq_len(M)
)
} else {
tmp <- names(data.blocks) == ""
names(data.blocks)[tmp] <- paste("Block", seq_len(M))[tmp]
}
# Standardizing/Normalizing data blocks.
if (scale.arg == T | norm.arg == T) {
if (verbose) message("Standardizing/Normalizing data blocks.")
if (grepl(method, pattern = "-lda") |
grepl(method, pattern = "lrr") |
grepl(method, pattern = "pls")) {
if (M == 2) norm.arg <- FALSE
data.blocks[-1] <- lapply(data.blocks[-1],
prepro_sub,
scale.arg = scale.arg,
norm.arg = norm.arg
)
}
else {
if (M == 1) norm.arg <- FALSE
data.blocks <-
lapply(data.blocks,
prepro_sub,
scale.arg = scale.arg,
norm.arg = norm.arg
)
}
}
if (verbose) {
# Penalty types by type of Eigenvector.
if (alpha.v > 1 | alpha.v < 0) {
stop("alpha.v must be a number within [0,1]")
} else {
if (alpha.v < 1) {
if (alpha.v == 0) {
message("Ridge shrinking in RIGHT Eigenvectors (no feature selection).")
} else {
message("Elastic net shrinking & selection in RIGHT Eigenvectors.")
}
}
else if (is.null(nu.v) == FALSE)
message("LASSO in RIGHT Eigenvectors.")
}
if (alpha.u > 1 | alpha.u < 0) {
stop("alpha.u must be a number within [0,1]")
} else {
if (alpha.u < 1) {
if (alpha.u == 0) {
message("Ridge shrinking in LEFT Eigenvectors (no feature selection).")
} else {
message("Elastic net shrinking & selection in LEFT Eigenvectors.")
}
}
else if (is.null(nu.u) == FALSE)
message("LASSO in LEFT Eigenvectors.")
}
}
# No plots displayed for K.X = 1, or K.Y = 1
if ((K.Y == 1 | K.X == 1) & plot == TRUE) {
warning("Plots will only be generated for more than ONE laten factor")
plot <- FALSE
}
# Getting SVD from chosen method.
if (method == "pca") {
SVD_X <- rrr_pca(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
svd0 <- SVD_X$SVD
O <- SVD_X$Z
left.lab <- "Subjects"
right.lab <- "Predictors"
}
if (method == "mbpca") {
SVD_X <- rrr_mb_pca(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
svd0 <- SVD_X$SVD[[1]]
O <- SVD_X$Z
left.lab <- "Subjects"
right.lab <- "Predictors"
}
if (method == "pls") {
SVD_X <- rrr_pls(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
svd0 <- SVD_X$SVD
O <- SVD_X$LR
aux <- nu.u
nu.u <- nu.v
nu.v <- aux
aux <- alpha.u
alpha.u <- alpha.v
alpha.v <- aux
left.lab <- "Predictors"
right.lab <- "Responses"
}
if (method == "mbpls") {
SVD_X <- rrr_mb_pls(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
svd0 <- SVD_X$SVD[[1]]
O <- SVD_X$LR
aux <- nu.u
nu.u <- nu.v
nu.v <- aux
aux <- alpha.u
alpha.u <- alpha.v
alpha.v <- aux
left.lab <- "Predictors"
right.lab <- "Responses"
}
if (method == "lrr") {
SVD_X <- rrr_rrr(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
svd0 <- SVD_X$SVD
O <- SVD_X$LR
aux <- nu.u
nu.u <- nu.v
nu.v <- aux
aux <- alpha.u
alpha.u <- alpha.v
alpha.v <- aux
left.lab <- "Predictors"
right.lab <- "Responses"
}
if (method == "mblrr") {
SVD_X <- rrr_mb_rrr(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
svd0 <- SVD_X$SVD[[1]]
O <- SVD_X$LR
aux <- nu.u
nu.u <- nu.v
nu.v <- aux
aux <- alpha.u
alpha.u <- alpha.v
alpha.v <- aux
left.lab <- "Predictors"
right.lab <- "Responses"
}
if (method == "pca-lda") {
if (ncol(data.blocks[[1]]) != 1 |
inherits(data.blocks[[1]][, 1], "character") == FALSE) {
stop("Response block needs to be a column with characters
representing different classes.")
}
if (min(table(data.blocks[[1]])) < 2) {
stop("Response block needs at least 2 samples by class")
}
data.blocks[[1]] <- stats::model.matrix(~ -1 + data.blocks[[1]])
SVD_X <- rrr_lda(data.blocks, block.class, K.X, ncores, verbose, M)
svd0 <- SVD_X$SVD
O <- SVD_X$Z
left.lab <- "Subjects"
right.lab <- "Predictors"
}
if (method == "mbpca-lda") {
if (ncol(data.blocks[[1]]) != 1 |
inherits(data.blocks[[1]][, 1], "character") == FALSE) {
stop("Response block needs to be a column with characters
representing
different classes.")
}
if (min(table(data.blocks[[1]])) < 2) {
stop("Response block needs at least 2 samples by class")
}
data.blocks[[1]] <- stats::model.matrix(~ -1 + data.blocks[[1]])
SVD_X <- rrr_mb_lda(data.blocks, block.class, K.X, ncores, verbose, M)
svd0 <- SVD_X$SVD[[1]]
O <- SVD_X$Z
left.lab <- "Subjects"
right.lab <- "Predictors"
}
if (method == "pls-lda") {
if (ncol(data.blocks[[1]]) != 1 |
inherits(data.blocks[[1]][, 1], "character") == FALSE) {
stop("Response block needs to be a column with characters
representing
different classes.")
}
if (min(table(data.blocks[[1]])) < 2) {
stop("Response block needs at least 2 samples by class")
}
data.blocks[[1]] <- stats::model.matrix(~ -1 + data.blocks[[1]])
SVD_X <- rrr_pls(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
SVD_X$SVD$`w.x` <- SVD_X$SVD$u
svd0 <- SVD_X$SVD
O <- SVD_X$LR
aux <- nu.u
nu.u <- nu.v
nu.v <- aux
aux <- alpha.u
alpha.u <- alpha.v
alpha.v <- aux
left.lab <- "Predictors"
right.lab <- "Responses"
}
if (method == "mbpls-lda") {
if (ncol(data.blocks[[1]]) != 1 |
inherits(data.blocks[[1]][, 1], "character") == FALSE) {
stop("Response block needs to be a
column with characters representing different classes.")
}
if (min(table(data.blocks[[1]])) < 2) {
stop("Response block needs at least 2 samples by class")
}
data.blocks[[1]] <- stats::model.matrix(~ -1 + data.blocks[[1]])
SVD_X <- rrr_mb_pls(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
SVD_X$SVD[[1]]$`w.x` <- SVD_X$SVD$u
svd0 <- SVD_X$SVD[[1]]
O <- SVD_X$LR
aux <- nu.u
nu.u <- nu.v
nu.v <- aux
aux <- alpha.u
alpha.u <- alpha.v
alpha.v <- aux
left.lab <- "Predictors"
right.lab <- "Response"
}
if (method == "lrr-lda") {
if (ncol(data.blocks[[1]]) != 1 |
inherits(data.blocks[[1]][, 1], "character") == FALSE) {
stop("Response block needs to be a
column with characters representing different classes.")
}
if (min(table(data.blocks[[1]])) < 2) {
stop("Response block needs at least 2 samples by class")
}
data.blocks[[1]] <- stats::model.matrix(~ -1 + data.blocks[[1]])
SVD_X <- rrr_rrr(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
SVD_X$SVD$`w.x` <- SVD_X$SVD$u
svd0 <- SVD_X$SVD
O <- SVD_X$LR
aux <- nu.u
nu.u <- nu.v
nu.v <- aux
aux <- alpha.u
alpha.u <- alpha.v
alpha.v <- aux
left.lab <- "Predictors"
right.lab <- "Responses"
}
if (method == "mblrr-lda") {
if (ncol(data.blocks[[1]]) != 1 |
inherits(data.blocks[[1]][, 1], "character") == FALSE) {
stop("Response block needs to be a column with characters
representing different classes.")
}
if (min(table(data.blocks[[1]])) < 2) {
stop("Response block needs at least 2 samples by class")
}
data.blocks[[1]] <- stats::model.matrix(~ -1 + data.blocks[[1]])
SVD_X <- rrr_mb_rrr(data.blocks, block.class, K.X, K.Y, ncores, verbose, M)
SVD_X$SVD[[1]]$`w.x` <- SVD_X$SVD$u
svd0 <- SVD_X$SVD[[1]]
O <- SVD_X$LR
aux <- nu.u
nu.u <- nu.v
nu.v <- aux
aux <- alpha.u
alpha.u <- alpha.v
alpha.v <- aux
left.lab <- "Predictors"
right.lab <- "Responses"
}
out <- NULL
out$B <- O
# Get subjects projections.
if (method %in% c("pls","lrr","mbpls","mblrr")) {
if (verbose)
message("Getting subjects projections for response block.")
Q <- data.blocks[[1]] %*% svd0$v
}
else Q <- svd0$u
out$Q <- Q
n <- nrow(Q)
out$dense <- SVD_X$SVD
# Store potential number of dimensions.
dsvd_d2 <- c(0, diff(c(0, diff(svd0$d))))
th <- which(dsvd_d2 ^ 2 >= mean(dsvd_d2 ^ 2) + stats::sd(dsvd_d2 ^ 2))
nf_dsvd_d2 <- ifelse(th[length(th)] + 2 <= K.Y,
th[length(th)] + 2,
K.Y)
out$opt.num.dim <- c("Liberal"=which.min(c(0, diff(svd0$d))),
"Conservative"= nf_dsvd_d2)
# Scree plot.
if (plot & K.Y > 2) {
aux.scree <- data.frame(
y = c(svd0$d, c(
0, diff(svd0$d),
c(0, diff(c(0, diff(svd0$d))))
)),
x = rep(1:K.Y, times = 3),
type = rep(c(
"Scree plot",
"Eigen Values\n first derivative",
"Eigen Values\n second derivative"
),
each = K.Y
)
)
aux.scree$type <- factor(aux.scree$type,
levels = c(
"Scree plot",
"Eigen Values\n first derivative",
"Eigen Values\n second derivative"
),
ordered = T
)
suppressWarnings(out$scree_plot <-
ggplot2::ggplot(
aux.scree,
ggplot2::aes_string(
x = "x",
y = "y"
)
) +
ggplot2::geom_line(col = "#21908C80") +
ggplot2::geom_point(col = "#44015480", pch = 15) +
ggplot2::facet_wrap(. ~ type, scales = "free") +
ggplot2::scale_x_continuous("PC index") +
ggplot2::scale_y_continuous("Eigenvalues\n
and derivatives") +
ggplot2::theme_minimal())
}
if (is.null(nu.u) == F | is.null(nu.v) == F) {
# Sparsity constraints.
if (verbose) message("Imposing sparsity constraints.")
if (nu.parallel) {
aux.svd <- ssvdEN_sol_path_par(
O = O, svd.0 = svd0,
scale = scale.arg, center = scale.arg,
dg.grid.right = nu.v, dg.grid.left = nu.u,
n.PC = K.Y, alpha.f = alpha.v, alpha.s = alpha.u,
plot = plot, approx = approx.arg,
verbose = verbose,
left.lab = left.lab,
right.lab = right.lab,
exact.dg = exact.dg,
lib.thresh = lib.thresh
)
}
else {
aux.svd <- ssvdEN_sol_path(
O = O, svd.0 = svd0,
scale = scale.arg, center = scale.arg,
dg.grid.right = nu.v, dg.grid.left = nu.u,
n.PC = K.Y, alpha.f = alpha.v, alpha.s = alpha.u,
plot = plot, approx = approx.arg,
verbose = verbose,
left.lab = left.lab,
right.lab = right.lab,
exact.dg = exact.dg,
lib.thresh = lib.thresh
)
}
out$sparse <- aux.svd$SVD
# Get selected features.
out$selected_items <- moss_select(data.blocks = data.blocks,
resp.block = resp.block,
SVD = out$sparse,
K = axes.pos,
plot = plot)
if (plot) out$tun_dgSpar_plot <- aux.svd$plot
}
# Get tSNE and/or clusters displays.
if (plot == TRUE) {
# No tSNE.
if (is.null(tSNE)) {
if (is.null(clus.lab)) {
aux.name <- rep("Subjects", n)
} else {
aux.name <- clus.lab
}
# Eigenvectors obtained from method-LDA.
if (grepl(method, pattern = "-lda")) {
if (is.null(cluster)) {
out$PC1_2_plot <- tsne2clus(list(Y = scale(svd0$w.x[, 1:2])),
labels = aux.name,
aest = aest.f(aux.name),
xlab = paste0("LDF",1),
ylab = paste0("LDF",2),
clus = FALSE
)
}
else {
if (is.list(cluster) == FALSE) {
cluster <- NULL
cluster$eps_range <- c(0, 4)
cluster$eps_res <- 100
cluster$min_clus_size <- 2
}
if (verbose) message("Getting clusters via DBSCAN.")
out$clus_plot <- tsne2clus(list(Y = scale(svd0$w.x[,1:2])),
labels = aux.name,
aest = aest.f(aux.name),
eps_range = cluster$eps_range,
eps_res = cluster$eps_res,
xlab = paste0("LDF",1),
ylab = paste0("LDF",2),
clus = TRUE,
min.clus.size = cluster$min_clus_size
)
}
}
# Eigenvectors obtained without LDA.
else {
if (is.null(cluster)) {
out$PC1_2_plot <- tsne2clus(list(Y = scale(Q[, axes.pos[1:2]])),
labels = aux.name,
aest = aest.f(aux.name),
xlab = paste0("PC",axes.pos[1]),
ylab = paste0("PC",axes.pos[2]),
clus = FALSE
)
}
else {
if (is.list(cluster) == FALSE) {
cluster <- NULL
cluster$eps_range <- c(0, 4)
cluster$eps_res <- 100
cluster$min_clus_size <- 2
}
if (verbose) message("Getting clusters via DBSCAN.")
out$clus_plot <- tsne2clus(list(Y = scale(Q[, axes.pos[1:2]])),
labels = aux.name,
aest = aest.f(aux.name),
eps_range = cluster$eps_range,
eps_res = cluster$eps_res,
xlab = paste0("PC",axes.pos[1]),
ylab = paste0("PC",axes.pos[2]),
clus = TRUE,
min.clus.size = cluster$min_clus_size
)
}
}
}
# With tSNE
else {
if (verbose) message("Calculating a tSNE map")
# Eigenvectors obtained from method-LDA.
if (grepl(method, pattern = "-lda")) {
# If tSNE isn't a list, create it for default arguments.
if (is.list(tSNE)) {
tSNE$"Z" <- svd0$w.x
tSNE$parallel <- nu.parallel
tSNE <- do.call(pca2tsne, tSNE)
}
else {
tSNE <- do.call(pca2tsne, list(
"Z" = svd0$w.x,
"perp" = 50,
"n.samples" = 1,
"n.iter" = 1e3, "parallel"=nu.parallel
))
}
# Plot TSNE.
if (is.null(clus.lab)) {
aux.name <- rep("Subjects", n)
} else {
aux.name <- clus.lab
}
out$tSNE_plot <- tsne2clus(tSNE,
labels = aux.name,
aest = aest.f(aux.name),
xlab = paste0(
"tSNE_x{LDF", 1, "-",
ncol(data.blocks[[1]]),
"}"
),
ylab = paste0(
"tSNE_y{LDF", 1, "-",
ncol(data.blocks[[1]]),
"}"
),
clus = FALSE
)
# Should we cluster left factors after tSNE?
if (is.null(cluster) == FALSE) {
if (is.logical(cluster) & cluster) {
cluster <- NULL
cluster$eps_range <- c(1, 4)
cluster$eps_res <- 100
cluster$min_clus_size <- 2
}
if (is.null(clus.lab)) {
aux.name <- rep("Subjects", n)
} else {
aux.name <- clus.lab
}
if (verbose) message("Getting clusters via DBSCAN.")
out$clus_plot <- tsne2clus(tSNE,
labels = aux.name,
aest = aest.f(aux.name),
eps_range = cluster$eps_range,
eps_res = cluster$eps_res,
xlab = paste0(
"tSNE_x{LDF",
paste0(range(seq_len(K.Y)[axes.pos]),
collapse = "-"
), "}"
),
ylab = paste0(
"tSNE_y{LDF",
paste0(range(seq_len(K.Y)[axes.pos]),
collapse = "-"
), "}"
),
clus = TRUE,
min.clus.size = cluster$min_clus_size
)
}
}
# Eigenvectors obtained without LDA.
else {
# If tSNE isn't a list, create it for default arguments.
if (is.list(tSNE)) {
tSNE$"Z" <- Q[, axes.pos]
tSNE$parallel <- nu.parallel
tSNE <- do.call(pca2tsne, tSNE)
}
else {
tSNE <- do.call(pca2tsne, list(
"Z" = Q[, axes.pos],
"perp" = 50, "n.samples" = 1,
"n.iter" = 1e3,"parallel" = nu.parallel
))
}
# Plot tSNE.
if (is.null(clus.lab)) {
aux.name <- rep("Subjects", n)
} else {
aux.name <- clus.lab
}
out$tSNE_plot <- tsne2clus(tSNE,
labels = aux.name,
aest = aest.f(aux.name),
xlab = paste0(
"tSNE_x{PC",
paste0(range(seq_len(K.Y)[axes.pos]),
collapse = "-"
),
"}"
),
ylab = paste0(
"tSNE_y{PC",
paste0(range(seq_len(K.Y)[axes.pos]),
collapse = "-"
),
"}"
),
clus = FALSE
)
# Should we cluster left factors after tSNE?
if (is.null(cluster) == F) {
if (is.list(cluster) == FALSE) {
cluster <- NULL
cluster$eps_range <- c(0, 4)
cluster$eps_res <- 100
cluster$min_clus_size <- 2
}
if (is.null(clus.lab)) {
aux.name <- rep("Subjects", n)
} else {
aux.name <- clus.lab
}
if (verbose) message("Getting clusters via DBSCAN.")
out$clus_plot <- tsne2clus(tSNE,
labels = aux.name,
aest = aest.f(aux.name),
eps_range = cluster$eps_range,
eps_res = cluster$eps_res,
xlab = paste0(
"tSNE_x{PC",
paste0(range(seq_len(K.Y)[axes.pos]),
collapse = "-"
),
"}"
),
ylab = paste0(
"tSNE_y{PC",
paste0(range(seq_len(K.Y)[axes.pos]),
collapse = "-"
),
"}"
),
clus = TRUE,
min.clus.size = cluster$min_clus_size
)
}
}
}
}
# Multi-block: contribution by omic block.
if (grepl(method, pattern = "mb")) {
if (method == "mbpca") {
M1 <- M + 1
block.names1 <- names(data.blocks)
}
else {
if (method == "mbpca-lda") {
M1 <- M
block.names1 <- names(data.blocks)[-1]
}
else {
M1 <- M
block.names1 <- names(data.blocks)[-1]
}
}
names(out$dense) <- c("Global", block.names1)
if (plot) {
block.w <- data.frame(
b = do.call(
"c",
lapply(
2:M1,
function(i) out$dense[[i]]$b[1:K.Y]
)
),
Omic = rep(block.names1, each = K.Y),
LX.m = rep(1:K.Y, times = M1 - 1)
)
out$block_weights <- ggplot2::ggplot(
block.w,
ggplot2::aes_string(
x = "LX.m",
y = "b",
col = "Omic",
shape = "Omic"
)
) +
ggplot2::scale_shape_manual(values = c(14:(13 + M1))) +
ggplot2::scale_color_manual(values = viridis::viridis(M1 - 1)) +
ggplot2::geom_point(size = 2.5) +
ggplot2::scale_x_continuous("PC index") +
ggplot2::scale_y_continuous("Weights") +
ggplot2::geom_line() +
ggplot2::theme_minimal() +
ggplot2::theme(legend.position = "top")+
ggplot2::guides("col"=ggplot2::guide_legend(title = "Omic",
title.position = "top",
title.hjust = 0.5),
"shape"=ggplot2::guide_legend(title = "Omic",
title.position = "top",
title.hjust = 0.5) )
}
pc.aux <- Q
}
else {
pc.aux <- Q
}
# Evaluating the overlap between principal components and pre-defined
# groups of subjects.
if (plot == TRUE & is.null(clus.lab) == FALSE) {
if (is.null(nu.u) == TRUE | alpha.u == 0) {
if (verbose) {
message("Evaluating association between PCs selected
and pre-established labels.")
}
}
if (is.null(nu.u) == FALSE & alpha.u > 0) {
if (verbose) {
message("Evaluating overlap between groups of selected subjects
and pre-established labels.")
}
}
forml <- ~ -1 + clus.lab
Z_mf <- stats::model.frame(forml, na.action = "na.pass")
Z <- stats::model.matrix(forml, data = Z_mf)
colnames(Z) <- sort(unique(clus.lab[!is.na(clus.lab)]))
# Creating a data.frame to store association between clusters and
# labels.
clus.w <- do.call("rbind", lapply(seq_len(ncol(Z)), function(i) {
if (is.null(nu.u) == FALSE & alpha.u > 0 &
!grepl(method, pattern = "pls")) {
x2.res <- apply(as.matrix(Q), 2, function(x) {
suppressWarnings(test <- stats::chisq.test(table(x != 0, Z[, i])))
return(c("Est" = test$statistic))
})
}
else {
x2.res <- apply(as.matrix(Q), 2, function(x) {
test <- stats::kruskal.test(x, Z[, i])
return(c("Est" = test$statistic))
})
}
x2.res <- as.data.frame(x2.res)
x2.res <- cbind(x2.res, 1:K.Y, colnames(Z)[i])
colnames(x2.res) <- c("Est", "PC", "Label")
return(x2.res)
}))
if (is.null(nu.u) == FALSE & alpha.u > 0) {
test.name <- "Chi-square statistics"
} else {
test.name <- "Kruskal-Wallis statistics"
}
out$subLabels_vs_PCs <-
ggplot2::ggplot(
data = clus.w,
ggplot2::aes_string(
x = "PC",
y = "Est",
col = "Label",
shape = "Label"
)
) +
ggplot2::scale_shape_manual(values = seq(8, 8 + ncol(Z), by = 1)) +
ggplot2::scale_color_manual(values = viridis::viridis(ncol(Z) + 1,
option = "A"
)[-(ncol(Z) + 1)]) +
ggplot2::geom_point(size = 2.5) +
ggplot2::scale_x_continuous("PC index") +
ggplot2::scale_y_continuous(test.name) +
ggplot2::geom_line() +
ggplot2::theme_minimal() +
ggplot2::theme(legend.position = "top")+
ggplot2::guides("col"=ggplot2::guide_legend(title = "Label",
title.position = "top",
title.hjust = 0.5),
"shape"=ggplot2::guide_legend(title = "Label",
title.position = "top",
title.hjust = 0.5) )
}
# Evaluating the overlap between principal components and
# clusters of subjects.
if (plot == TRUE &
is.null(cluster) == FALSE &
length(unique(out$clus_plot$dbscan.res$cluster[
out$clus_plot$dbscan.res$cluster != 0
])) > 1) {
if (is.null(nu.u) == TRUE | alpha.u == 0) {
if (verbose) message("Evaluating association between
PCs and detected clusters.")
}
if (is.null(nu.u) == FALSE & alpha.u > 0) {
if (verbose) message("Evaluating overlap between groups of
selected subjects and detected clusters.")
}
clus.lab <- out$clus_plot$dbscan.res$cluster
# Getting signatures if sparsity was impossed.
if (is.null(nu.u) == F | is.null(nu.v) == F) {
if (all(unlist(lapply(feature.labels,
is.null)))) feature.labels <- NULL
out$feat_signatures <-
moss_signatures(data.blocks,
out$selected_items,
clus.lab,
plot,
feature.labels)
}
clus.lab[clus.lab == 0] <- NA
forml <- ~ -1 + as.factor(clus.lab)
Z_mf <- stats::model.frame(forml, na.action = "na.pass")
Z <- stats::model.matrix(forml, data = Z_mf)
colnames(Z) <- paste("Cluster", seq_len(ncol(Z)))
# Creating a data.frame to store association between PCs and clusters.
clus.w <- do.call("rbind", lapply(seq_len(ncol(Z)), function(i) {
if (is.null(nu.u) == FALSE & alpha.u > 0) {
x2.res <- apply(as.matrix(Q), 2, function(x) {
suppressWarnings(test <- stats::chisq.test(table(
x != 0,
Z[, i]
)))
return(c("Est" = test$statistic))
})
}
else {
x2.res <- apply(as.matrix(Q), 2, function(x) {
test <- stats::kruskal.test(x, Z[, i])
return(c("Est" = test$statistic))
})
}
x2.res <- as.data.frame(x2.res)
x2.res <- cbind(x2.res, 1:K.Y, colnames(Z)[i])
colnames(x2.res) <- c("Est", "PC", "Cluster")
return(x2.res)
}))
if (is.null(nu.u) == FALSE & alpha.u > 0 &
!grepl(method, pattern = "pls")) {
test.name <- "Chi-square statistics"
} else {
test.name <- "Kruskal-Wallis statistics"
}
out$clusters_vs_PCs <- ggplot2::ggplot(
data = clus.w,
ggplot2::aes_string(
x = "PC",
y = "Est",
col = "Cluster",
shape = "Cluster"
)
) +
ggplot2::scale_shape_manual(values = seq(8, 8 + ncol(Z), by = 1)) +
ggplot2::scale_color_manual(values = viridis::viridis(ncol(Z) + 1,
option = "D"
)[-(ncol(Z) + 1)]) +
ggplot2::geom_point(size = 2.5) +
ggplot2::scale_x_continuous("PC index") +
ggplot2::scale_y_continuous(test.name) +
ggplot2::geom_line() +
ggplot2::theme_minimal() +
ggplot2::theme(legend.position = "top")+
ggplot2::guides("col"=ggplot2::guide_legend(title = "Cluster",
title.position = "top",
title.hjust = 0.5),
"shape"=ggplot2::guide_legend(title = "Cluster",
title.position = "top",
title.hjust = 0.5) )
}
else {
# Getting signatures if sparsity was impossed.
if (is.null(nu.u) == F | is.null(nu.v) == F) {
if (all(unlist(lapply(feature.labels,
is.null)))) feature.labels <- NULL
out$feat_signatures <-
moss_signatures(data.blocks,
out$selected_items,
NULL,
plot,
feature.labels)
}
}
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.