################################
## Functions to do subsetting ##
################################
#' @title Subset groups
#' @name subset_groups
#' @description Method to subset (or sort) groups
#' @param object a \code{\link{MOFA}} object.
#' @param groups character vector with the groups names, numeric vector with the groups indices
#' or logical vector with the groups to be kept as TRUE.
#' @return A \code{\link{MOFA}} object
#' @export
#' @examples
#' # Using an existing trained model on simulated data
#' file <- system.file("extdata", "model.hdf5", package = "MOFA2")
#' model <- load_model(file)
#'
#' # Subset the first group
#' model <- subset_groups(model, groups = 1)
subset_groups <- function(object, groups) {
# Sanity checks
if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA")
stopifnot(length(groups) <= object@dimensions[["G"]])
# Define groups
groups <- .check_and_get_groups(object, groups)
# Subset expectations
if (length(object@expectations)>0) {
if ("Z" %in% names(object@expectations) & length(object@expectations$Z)>0)
object@expectations$Z <- object@expectations$Z[groups]
if ("Y" %in% names(object@expectations) & length(object@expectations$Y)>0)
object@expectations$Y <- sapply(object@expectations$Y, function(x) x[groups], simplify = FALSE, USE.NAMES = TRUE)
}
# Subset data
if (length(object@data)>0) {
object@data <- sapply(object@data, function(x) x[groups], simplify = FALSE, USE.NAMES = TRUE)
}
# Subset imputed data
if (length(object@imputed_data)>0) {
object@imputed_data <- sapply(object@imputed_data, function(x) x[groups], simplify = FALSE, USE.NAMES = TRUE)
}
# Subset intercepts
if (length(object@intercepts[[1]])>0) {
object@intercepts <- sapply(object@intercepts, function(x) x[groups], simplify = FALSE, USE.NAMES = TRUE)
}
# Update dimensionality
object@dimensions[["G"]] <- length(groups)
object@dimensions[["N"]] <- object@dimensions[["N"]][groups]
# Subset sample metadata
stopifnot(groups%in%unique(object@samples_metadata$group))
object@samples_metadata <- object@samples_metadata[object@samples_metadata$group %in% groups,]
object@samples_metadata$group <- factor(object@samples_metadata$group, levels=groups)
# Re-order samples
samples <- unname(unlist(lapply(object@data[[1]],colnames)))
object@samples_metadata <- object@samples_metadata[match(samples, object@samples_metadata$sample),]
# Sanity checks
stopifnot(object@samples_metadata$sample == unlist(lapply(object@data[[1]],colnames)))
stopifnot(object@samples_metadata$sample == unlist(lapply(object@expectations$Z,rownames)))
stopifnot(object@samples_metadata$sample == unlist(lapply(object@expectations$Y,colnames)))
# Update groups names
# groups_names(object) <- groups # don't need to run this
object@data_options$groups <- groups
# Subset variance explained
object@cache[["variance_explained"]]$r2_per_factor <- object@cache[["variance_explained"]]$r2_per_factor[groups]
object@cache[["variance_explained"]]$r2_total <- object@cache[["variance_explained"]]$r2_total[groups]
return(object)
}
#' @title Subset views
#' @name subset_views
#' @description Method to subset (or sort) views
#' @param object a \code{\link{MOFA}} object.
#' @param views character vector with the views names, numeric vector with the views indices,
#' or logical vector with the views to be kept as TRUE.
#' @return A \code{\link{MOFA}} object
#' @export
#' @examples
#' # Using an existing trained model on simulated data
#' file <- system.file("extdata", "model.hdf5", package = "MOFA2")
#' model <- load_model(file)
#'
#' # Subset the first view
#' model <- subset_views(model, views = 1)
subset_views <- function(object, views) {
# Sanity checks
if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA")
stopifnot(length(views) <= object@dimensions[["M"]])
# warning("Removing views a posteriori is fine for an exploratory analysis, but you should removing them before training!")
# Define views
views <- .check_and_get_views(object, views)
# Subset relevant slots
if (length(object@expectations)>0) {
object@expectations$W <- object@expectations$W[views]
object@expectations$Y <- object@expectations$Y[views]
}
# Subset data
if (length(object@data)>0) {
object@data <- object@data[views]
}
# Subset imputed data
if (length(object@imputed_data)>0) {
object@imputed_data <- object@imputed_data[views]
}
# Subset intercepts
if (length(object@intercepts[[1]])>0) {
object@intercepts <- object@intercepts[views]
}
# Subset feature metadata
if (length(object@features_metadata)>0) {
object@features_metadata <- object@features_metadata[object@features_metadata$view %in% views,]
}
# Subset likelihoods
object@model_options$likelihoods <- object@model_options$likelihoods[views]
# Update dimensionality
object@dimensions[["M"]] <- length(views)
object@dimensions[["D"]] <- object@dimensions[["D"]][views]
# Update view names
object@data_options$views <- views
# Subset variance explained
if ((methods::.hasSlot(object, "cache")) && ("variance_explained" %in% names(object@cache))) {
object@cache[["variance_explained"]]$r2_per_factor <- lapply(object@cache[["variance_explained"]]$r2_per_factor, function(x) x[,views,drop=FALSE])
object@cache[["variance_explained"]]$r2_total <- lapply(object@cache[["variance_explained"]]$r2_total, function(x) x[views])
}
return(object)
}
#' @title Subset factors
#' @name subset_factors
#' @description Method to subset (or sort) factors
#' @param object a \code{\link{MOFA}} object.
#' @param factors character vector with the factor names, or numeric vector with the index of the factors.
#' @param recalculate_variance_explained logical indicating whether to recalculate variance explained values. Default is \code{TRUE}.
#' @export
#' @return A \code{\link{MOFA}} object
#' @examples
#' # Using an existing trained model on simulated data
#' file <- system.file("extdata", "model.hdf5", package = "MOFA2")
#' model <- load_model(file)
#'
#' # Subset factors 1 to 3
#' model <- subset_factors(model, factors = 1:3)
subset_factors <- function(object, factors, recalculate_variance_explained = TRUE) {
# Sanity checks
if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA")
stopifnot(length(factors) <= object@dimensions[["K"]])
# Define factors
factors <- .check_and_get_factors(object, factors)
# Subset expectations
nodes_with_factors <- list(nodes = c("Z", "W", "Sigma", "AlphaZ", "AlphaW", "ThetaZ", "ThetaW"), axes = c(2, 2, 1, 0, 0, 0, 0))
stopifnot(all(nodes_with_factors$axes %in% c(0, 1, 2)))
if (length(object@expectations)>0) {
for (i in seq_len(length(nodes_with_factors$nodes))) {
node <- nodes_with_factors$nodes[i]
axis <- nodes_with_factors$axes[i]
if (node %in% names(object@expectations)) {
if(node != "Sigma") {
if (axis == 1) {
object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[factors,,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE)
} else if (axis == 2) {
object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[,factors,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE)
} else {
object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[factors], simplify = FALSE, USE.NAMES = TRUE)
}
} else {
if (axis == 1) {
object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[factors,,,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE)
} else if (axis == 2) {
object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[,factors,,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE)
} else if (axis == 3) {
object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[,,factors,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE)
}
}
}
}
}
# Subset interpolations
if(length(object@interpolated_Z) > 0) {
object@interpolated_Z <- lapply(object@interpolated_Z, function(g) {
if(!is.null(g$mean)) {
m <- g$mean[factors, , drop = FALSE]
}
if(!is.null(g$variance)) {
v <- g$variance[factors, , drop = FALSE]
}
list(mean = m, variance = v, new_values = g$new_values)
})
}
# Remove total variance explained estimates
if (length(factors)<object@dimensions[["K"]]) {
object@cache[["variance_explained"]]$r2_total <- lapply(object@cache[["variance_explained"]]$r2_per_factor, colSums)
if (recalculate_variance_explained) {
message("Recalculating total variance explained values (r2_total)...")
object@cache[["variance_explained"]]$r2_total <- calculate_variance_explained(object)[["r2_total"]]
}
}
# # Relalculate total variance explained estimates (not valid for non-orthogonal factors)
# if (length(factors) < object@dimensions[["K"]]) {
# object@cache[["variance_explained"]]$r2_total <- lapply(object@cache[["variance_explained"]]$r2_per_factor, colSums)
# Subset per-factor variance explained estimates
if ((methods::.hasSlot(object, "cache")) && ("variance_explained" %in% names(object@cache))) {
object@cache[["variance_explained"]]$r2_per_factor <- lapply(object@cache[["variance_explained"]]$r2_per_factor, function(x) x[factors,,drop=FALSE])
}
# Subset lengthscales per factor
if (!is.null(object@training_stats$structural_sig)) {
object@training_stats$structural_sig <- object@training_stats$structural_sig[factors,drop=FALSE]
}
if (!is.null(object@training_stats$length_scales)) {
object@training_stats$length_scales <- object@training_stats$length_scales[factors,drop=FALSE]
}
if (!is.null(object@training_stats$scales)) {
object@training_stats$scales <- object@training_stats$scales[factors,drop=FALSE]
}
# Update dimensionality
object@dimensions[["K"]] <- length(factors)
# Update factor names
factors_names(object) <- paste0("Factor", as.character(seq_len(object@dimensions[["K"]])))
return(object)
}
#' @title Subset samples
#' @name subset_samples
#' @description Method to subset (or sort) samples
#' @param object a \code{\link{MOFA}} object.
#' @param samples character vector with the sample names or numeric vector with the sample indices.
#' @export
#' @return A \code{\link{MOFA}} object
#' @examples
#' # Using an existing trained model on simulated data
#' file <- system.file("extdata", "model.hdf5", package = "MOFA2")
#' model <- load_model(file)
#'
#' # (TO-DO) Remove a specific sample from the model (an outlier)
subset_samples <- function(object, samples) {
# Sanity checks
if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA")
# Define samples
samples <- .check_and_get_samples(object, samples)
# Check if an entire group needs to be removed
groups <- as.character(unique(object@samples_metadata[match(samples, object@samples_metadata$sample),]$group))
if (length(groups)<length(groups_names(object))) object <- subset_groups(object, groups)
# Subset data and expectations
groups <- groups_names(object)
tmp <- lapply(groups, function(g) samples_names(object)[[g]][samples_names(object)[[g]] %in% samples])
names(tmp) <- groups
for (g in groups) {
samples_g <- tmp[[g]]
# Subset expectations
if (length(object@expectations)>0) {
if ("Z" %in% names(object@expectations) & length(object@expectations$Z)>0) {
object@expectations$Z[[g]] <- object@expectations$Z[[g]][samples_g,, drop=FALSE]
}
if ("Y" %in% names(object@expectations) & length(object@expectations$Y)>0) {
for (m in views_names(object)) {
object@expectations$Y[[m]][[g]] <- object@expectations$Y[[m]][[g]][,samples_g,drop=FALSE]
}
}
if ("Tau" %in% names(object@expectations) & length(object@expectations$Tau)>0) {
for (m in views_names(object)) {
object@expectations$Tau[[m]][[g]] <- object@expectations$Tau[[m]][[g]][samples_g, , drop=FALSE]
}
}
if(g == groups[1]) {# only one Sigma node
if ("Sigma" %in% names(object@expectations) & length(object@expectations$Sigma)>0) {
samples <- unique(unlist(tmp)) # TODO - make Sigma live on covariate level or expand to group-level
object@expectations$Sigma[[1]] <- object@expectations$Sigma[[1]][,samples,samples,drop=FALSE]
}
}
}
# Subset data
if (length(object@data)>0) {
for (m in views_names(object)) {
object@data[[m]][[g]] <- object@data[[m]][[g]][,samples_g,drop=FALSE]
}
}
# Subset imputed data
if (length(object@imputed_data)>0) {
for (m in views_names(object)) {
object@imputed_data[[m]][[g]] <- object@imputed_data[[m]][[g]][,samples_g,drop=FALSE]
}
}
}
# Subset sample metadata
object@samples_metadata <- object@samples_metadata[object@samples_metadata$sample %in% samples,]
# Update dimensionality
object@dimensions[["N"]] <- sapply(tmp, length)
# Update sample names
samples_names(object) <- tmp
# Sanity checks
stopifnot(object@samples_metadata$sample == unlist(lapply(object@data[[1]],colnames)))
stopifnot(object@samples_metadata$sample == unlist(lapply(object@expectations$Z,rownames)))
stopifnot(object@samples_metadata$sample == unlist(lapply(object@expectations$Y,colnames)))
# Remove variance explained estimates
warning("After subsetting the samples the variance explained estimates are not valid anymore, removing them...")
object@cache[["variance_explained"]] <- NULL
return(object)
}
#' @title Subset features
#' @name subset_features
#' @description Method to subset (or sort) features
#' @param object a \code{\link{MOFA}} object.
#' @param view character vector with the view name or integer with the view index
#' @param features character vector with the sample names, numeric vector with the feature indices
#' or logical vector with the samples to be kept as TRUE.
#' @return A \code{\link{MOFA}} object
#' @export
subset_features <- function(object, view, features) {
# Sanity checks
if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA")
stopifnot(length(features) <= sapply(object@dimensions[["D"]], sum))
warning("Removing features a posteriori is fine for an exploratory analysis, but we recommend removing them before training!")
if (is.numeric(view)) view <- views_names(object)[view]
stopifnot(all(view %in% views_names(object)))
# Define features
if (is.character(features)) {
stopifnot(all(features %in% features_names(object)[[view]]))
} else {
features <- features_names(object)[[view]][features]
}
# Subset relevant slots
if (length(object@expectations)>0) {
if ("W" %in% names(object@expectations) & length(object@expectations$W)>0)
object@expectations$W <- lapply(object@expectations$W, function(x) x[features,, drop=FALSE])
if ("Y" %in% names(object@expectations) & length(object@expectations$Y)>0)
object@expectations$Y[[view]] <- lapply(object@expectations$Y[[view]], function(x) x[features,])
if (length(object@data)>0)
object@data <- lapply(object@data, function(x) sapply(x, function(y) y[features,], simplify = FALSE, USE.NAMES = TRUE))
if (length(object@expectations)>0)
object@intercepts <- lapply(object@intercepts, function(x) sapply(x, function(y) y[features], simplify = FALSE, USE.NAMES = TRUE))
if (length(object@imputed_data) != 0) {
stop()
# object@imputed_data <- lapply(object@imputed_data, function(x) sapply(x, function(y) y[,samples], simplify = FALSE, USE.NAMES = TRUE))
}
}
# Update dimensionality
object@dimensions[["D"]][[view]] <- length(features)
# Update features names
features_names(object)[[view]] <- features
# Remove variance explained estimates
warning("After subsetting the features the variance explained estimates are not valid anymore, removing them...")
object@cache[["variance_explained"]] <- NULL
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.