Nothing
#' Collate several subsets of a melted similarity matrix, required for computing
#' metrics.
#'
#' \code{sim_collate} collates several subsets of a melted similarity matrix,
#' required for computing metrics.
#'
#' @details
#'
#'
#' ## 0. Filter out some rows
#'
#' Filter out pairs that match \code{drop_group} in either right or left indices
#'
#' ## 1. Similarity to reference
#'
#' Fetch similarities between
#'
#' - (a) all rows (except, optionally those containing \code{reference}), and
#' - (b) all rows containing \code{reference}
#'
#' Do so only for those (a, b) pairs that
#'
#' - have *same* values in *all* columns of \code{all_same_cols_ref}
#'
#' ## 2. Similarity to replicates (no references)
#'
#' Fetch similarities between
#' - (a) all rows except \code{reference} rows, and
#' - (b) all rows except \code{reference} rows (i.e. to each other)
#'
#' Do so for only those (a, b) pairs that
#' - have *same* values in *all* columns of \code{all_same_cols_rep}
#' - have *different* values in *all* columns of \code{all_different_cols_rep}
#' (if specified)
#' - have *different* values in *at least one* column of
#' \code{any_different_cols_rep} (if specified)
#'
#' Keep, both, (a, b) and (b, a)
#'
#' ## 3. Similarity to replicates (only references)
#'
#' Fetch similarities between
#' - (a) all rows containing \code{reference}, and
#' - (b) all rows containing \code{reference} (i.e. to each other)
#'
#' Do so for only those (a, b) pairs that
#' - have *same* values in *all* columns of \code{all_same_cols_rep_ref}.
#'
#' Keep, both, (a, b) and (b, a)
#'
#' ## 4. Similarity to non-replicates
#'
#' Fetch similarities between
#' - (a) all rows (except, optionally, \code{reference} rows), and
#' - (b) all rows except \code{reference} rows
#'
#' Do so for only those (a, b) pairs that
#' - have *same* values in *all* columns of \code{all_same_cols_non_rep}
#' - have *different* values in *all* columns \code{all_different_cols_non_rep}
#' - have *different* values in *at least one* column of
#' \code{any_different_cols_non_rep}
#'
#' Keep, both, (a, b) and (b, a)
#'
#' ## 5. Similarity to group
#'
#' Fetch similarities between
#' - (a) all rows (except, optionally, \code{reference} rows), and
#' - (b) all rows (except, optionally, \code{reference} rows)
#'
#' Do so for only those (a, b) pairs that
#' - have *same* values in *all* columns of \code{all_same_cols_group}
#' - have *different* values in *at least one* column of
#' \code{any_different_cols_group}
#'
#' Keep, both, (a, b) and (b, a)
#'
#'
#' @param sim_df \code{metric_sim} object.
#' @param annotation_cols character vector specifying which columns from
#' \code{metadata} to annotate the left index of the filtered \code{sim_df}
#' with.
#' @param all_same_cols_rep optional character vector specifying columns.
#' @param any_different_cols_rep optional character vector specifying
#' columns.
#' @param all_different_cols_rep optional character vector specifying
#' columns.
#' @param all_same_cols_ref optional character vector specifying columns.
#' @param all_same_cols_rep_ref optional character vector specifying columns.
#' @param any_different_cols_non_rep optional character vector specifying
#' columns.
#' @param all_same_cols_non_rep optional character vector specifying columns.
#' @param all_different_cols_non_rep optional character vector specifying
#' columns.
#' @param any_different_cols_group optional character vector specifying columns.
#' @param all_same_cols_group optional character vector specifying columns.
#' @param reference optional character string specifying reference.
#' @param drop_reference optional boolean specifying whether to filter (drop)
#' pairs using \code{reference} on the left index.
#' @param drop_group optional tbl; rows that match on \code{drop_group} on the
#' left or right index are dropped.
#'
#' @return \code{metric_sim} object comprising a filtered \code{sim_df} with
#' sets of pairs, preserving the same \code{metric_sim} attributes as
#' \code{sim_df}.
#'
#' @examples
#'
#' sim_df <- matric::sim_calculate(matric::cellhealth)
#'
#' drop_group <-
#' data.frame(Metadata_gene_name = "EMPTY")
#'
#' reference <-
#' data.frame(Metadata_gene_name = c("Chr2"))
#'
#' all_same_cols_ref <-
#' c(
#' "Metadata_cell_line",
#' "Metadata_Plate"
#' )
#'
#' all_same_cols_rep <-
#' c(
#' "Metadata_cell_line",
#' "Metadata_gene_name",
#' "Metadata_pert_name"
#' )
#'
#' all_same_cols_rep_ref <-
#' c(
#' "Metadata_cell_line",
#' "Metadata_gene_name",
#' "Metadata_pert_name",
#' "Metadata_Plate"
#' )
#'
#' any_different_cols_non_rep <-
#' c(
#' "Metadata_cell_line",
#' "Metadata_gene_name",
#' "Metadata_pert_name"
#' )
#'
#' all_same_cols_non_rep <-
#' c(
#' "Metadata_cell_line",
#' "Metadata_Plate"
#' )
#'
#' all_different_cols_non_rep <-
#' c("Metadata_gene_name")
#'
#' all_same_cols_group <-
#' c(
#' "Metadata_cell_line",
#' "Metadata_gene_name"
#' )
#'
#' any_different_cols_group <-
#' c(
#' "Metadata_cell_line",
#' "Metadata_gene_name",
#' "Metadata_pert_name"
#' )
#'
#' annotation_cols <-
#' c(
#' "Metadata_cell_line",
#' "Metadata_gene_name",
#' "Metadata_pert_name"
#' )
#'
#' collated_sim <-
#' matric::sim_collate(
#' sim_df,
#' reference = reference,
#' all_same_cols_rep = all_same_cols_rep,
#' all_same_cols_rep_ref = all_same_cols_rep_ref,
#' all_same_cols_ref = all_same_cols_ref,
#' any_different_cols_non_rep = any_different_cols_non_rep,
#' all_same_cols_non_rep = all_same_cols_non_rep,
#' all_different_cols_non_rep = all_different_cols_non_rep,
#' any_different_cols_group = any_different_cols_group,
#' all_same_cols_group = all_same_cols_group,
#' annotation_cols = annotation_cols,
#' drop_group = drop_group
#' )
#'
#' head(collated_sim)
#'
#' collated_sim %>%
#' dplyr::group_by(type) %>%
#' dplyr::tally()
#' @export
#'
sim_collate <-
function(sim_df,
all_same_cols_rep,
annotation_cols,
any_different_cols_rep = NULL,
all_different_cols_rep = NULL,
all_same_cols_ref = NULL,
all_same_cols_rep_ref = NULL,
all_same_cols_non_rep = NULL,
any_different_cols_non_rep = NULL,
all_different_cols_non_rep = NULL,
any_different_cols_group = NULL,
all_same_cols_group = NULL,
reference = NULL,
drop_reference = FALSE,
drop_group = NULL) {
invisible(sim_validate(sim_df))
sim_df_attr <- attributes(sim_df)
row_metadata <- attr(sim_df, "row_metadata")
sim_cols <- names(sim_df)
fetch_ref <-
!is.null(all_same_cols_ref) &&
!is.null(reference)
fetch_rep_ref <-
!is.null(all_same_cols_ref) &&
!is.null(reference) &&
!is.null(all_same_cols_rep_ref)
fetch_non_rep <-
!is.null(any_different_cols_non_rep) &&
!is.null(all_same_cols_non_rep) &&
!is.null(all_different_cols_non_rep)
fetch_rep_group <-
!is.null(any_different_cols_group) &&
!is.null(all_same_cols_group)
# ---- 0. Filter out some rows ----
# Efficiency notes:
# Efficient because all the component filters are efficient.
# Details:
# - `sim_filter_keep_or_drop_some` is efficient
if (!is.null(drop_group)) {
sim_df <- sim_df %>%
sim_filter_keep_or_drop_some(
row_metadata = row_metadata,
filter_drop = drop_group,
filter_side = "left"
) %>%
sim_filter_keep_or_drop_some(
row_metadata = row_metadata,
filter_drop = drop_group,
filter_side = "right"
)
}
# ---- 1. Similarity to reference ----
# Fetch similarities between
# a. all rows (except, optionally those containing `reference`)
# and
# b. all rows containing `reference`
# Do so only for those (a, b) pairs that
# - have *same* values in *all* columns of `all_same_cols_ref`
# Efficiency notes:
# Efficient because all the component filters are efficient.
# Details:
# - `sim_filter_all_same_keep_some` uses `sim_filter_all_same` and
# `sim_filter_keep_or_drop_some`.
# - `sim_filter_keep_or_drop_some` is efficient
# - `sim_filter_all_same` is efficient (strategically)
if (fetch_ref) {
ref <-
sim_df %>%
sim_filter_all_same_keep_some(
row_metadata = row_metadata,
all_same_cols = all_same_cols_ref,
filter_keep_right = reference,
drop_reference = drop_reference,
annotation_cols = annotation_cols,
sim_cols = sim_cols
)
}
# ---- 2. Similarity to replicates (no references) ----
# Fetch similarities between
# a. all rows except `reference` rows
# and
# b. all rows except `reference` rows (i.e. to each other)
#
# Do so for only those (a, b) pairs that
# - have *same* values in *all* columns of `all_same_cols_rep
# - have *different* values in *all* columns `all_different_cols_rep` (if specified)
# - have *different* values in *at least one* column of
# `any_different_cols_rep` (if specified)
#
# Keep, both, (a, b) and (b, a)
# Efficiency notes:
# Efficient because all the component filters are efficient.
# Details:
# - `sim_filter_keep_or_drop_some` is efficient
# - `sim_filter_all_same` is efficient (strategically)
rep <-
sim_df %>%
sim_filter_keep_or_drop_some(
row_metadata = row_metadata,
filter_drop = reference,
filter_side = "left"
) %>%
sim_filter_keep_or_drop_some(
row_metadata = row_metadata,
filter_drop = reference,
filter_side = "right"
)
if (is.null(any_different_cols_rep) & is.null(all_different_cols_rep)) { # nolint: vector_logic_linter
rep <-
rep %>%
sim_filter_all_same(
row_metadata = row_metadata,
all_same_cols = all_same_cols_rep,
annotation_cols = annotation_cols,
drop_lower = FALSE,
sim_cols = sim_cols
)
} else {
rep <-
rep %>%
sim_filter_some_different_drop_some(
row_metadata = row_metadata,
any_different_cols = any_different_cols_rep,
all_same_cols = all_same_cols_rep,
all_different_cols = all_different_cols_rep,
annotation_cols = annotation_cols,
sim_cols = sim_cols
)
}
# ---- 3. Similarity to replicates (only references) ----
# Fetch similarities between
# a. all rows containing `reference`
# and
# b. all rows containing `reference` (i.e. to each other)
#
# Do so for only those (a, b) pairs that
# - have *same* values in *all* columns of `all_same_cols_rep_ref`.
#
# Keep, both, (a, b) and (b, a)
# Efficiency notes:
# Efficient because all the component filters are efficient.
# Details:
# - `sim_filter_keep_or_drop_some` is efficient
# - `sim_filter_all_same` is efficient (strategically)
if (fetch_rep_ref) {
rep_ref <-
sim_df %>%
sim_filter_keep_or_drop_some(
row_metadata = row_metadata,
filter_keep = reference,
filter_side = "left"
) %>%
sim_filter_keep_or_drop_some(
row_metadata = row_metadata,
filter_keep = reference,
filter_side = "right"
) %>%
sim_filter_all_same(
row_metadata = row_metadata,
all_same_cols = all_same_cols_rep_ref,
annotation_cols = annotation_cols,
drop_lower = FALSE,
sim_cols = sim_cols
)
# Drop tuples in ref that are actually rep_ref
if (!drop_reference) {
ref <-
ref %>%
dplyr::anti_join(
rep_ref %>% dplyr::select(id1, id2),
by = c("id1", "id2")
)
}
}
# ---- 4. Similarity to non-replicates ----
# Fetch similarities between
# a. all rows (except, optionally, `reference` rows)
# and
# b. all rows except `reference` rows
#
# Do so for only those (a, b) pairs that
# - have *same* values in *all* columns of `all_same_cols_non_rep`
# - have *different* values in *all* columns `all_different_cols_non_rep`
# - have *different* values in *at least one* column of
# `any_different_cols_non_rep`
#
# Keep, both, (a, b) and (b, a)
# Efficiency notes:
# INEFFICIENT because one of the component filters is inefficient.
# Details:
# - `sim_filter_some_different_drop_some` is inefficient because of the
# inner join of metadata with itself.
if (fetch_non_rep) {
if (drop_reference) {
reference_left <- reference
} else {
reference_left <- NULL
}
non_rep <-
sim_df %>%
sim_filter_some_different_drop_some(
row_metadata = row_metadata,
any_different_cols = any_different_cols_non_rep,
all_same_cols = all_same_cols_non_rep,
all_different_cols = all_different_cols_non_rep,
filter_drop_left = reference_left,
filter_drop_right = reference,
annotation_cols = annotation_cols,
sim_cols = sim_cols
)
}
# ---- 5. Similarity to group ----
# Fetch similarities between
# a. all rows (except, optionally, `reference` rows)
# and
# b. all rows (except, optionally, `reference` rows)
#
# Do so for only those (a, b) pairs that
# - have *same* values in *all* columns of `all_same_cols_group`
# - have *different* values in *at least one* column of
# `any_different_cols_group`
#
# Keep, both, (a, b) and (b, a)
# Efficiency notes:
# INEFFICIENT because one of the component filters is inefficient.
# Details:
# - `sim_filter_some_different_drop_some` is inefficient because of the
# inner join of metadata with itself.
if (fetch_rep_group) {
if (drop_reference) {
reference_both <- reference
} else {
reference_both <- NULL
}
rep_group <-
sim_df %>%
sim_filter_some_different_drop_some(
row_metadata = row_metadata,
any_different_cols = any_different_cols_group,
all_same_cols = all_same_cols_group,
filter_drop_left = reference_both,
filter_drop_right = reference_both,
annotation_cols = annotation_cols,
sim_cols = sim_cols
)
}
# 6. Combine
combined <-
rep %>% dplyr::mutate(type = "rep")
if (fetch_rep_ref) {
combined <- combined %>%
# same tag as ref
dplyr::bind_rows(rep_ref %>% dplyr::mutate(type = "rep"))
}
if (fetch_non_rep) {
combined <- combined %>%
dplyr::bind_rows(non_rep %>% dplyr::mutate(type = "non_rep"))
}
if (fetch_ref) {
combined <- combined %>%
dplyr::bind_rows(ref %>% dplyr::mutate(type = "ref"))
}
if (fetch_rep_group) {
combined <- combined %>%
dplyr::bind_rows(rep_group %>% dplyr::mutate(type = "rep_group"))
}
# add attributes
attr(combined, "all_different_cols_non_rep") <-
all_different_cols_non_rep
attr(combined, "all_same_cols_group") <- all_same_cols_group
attr(combined, "all_same_cols_non_rep") <- all_same_cols_non_rep
attr(combined, "all_same_cols_ref") <- all_same_cols_ref
attr(combined, "all_same_cols_rep") <- all_same_cols_rep
attr(combined, "all_same_cols_rep_ref") <- all_same_cols_rep_ref
attr(combined, "annotation_cols") <- annotation_cols
attr(combined, "any_different_cols_group") <-
any_different_cols_group
attr(combined, "any_different_cols_non_rep") <-
any_different_cols_non_rep
attr(combined, "drop_group") <- drop_group
attr(combined, "drop_reference") <- drop_reference
attr(combined, "reference") <- reference
combined <- combined %>% as.data.frame()
sim_restore(combined, sim_df_attr)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.