#' Make a list of CoRecMotifs
#'
#' Creates a list of [CoRecMotifs][CoRecMotif-class] using fluorescence data
#' from a CoRec experiment. This is a convenience function that calls
#' [annotate_fluorescence_table()], [fluorescence_to_zscore_table()], and then
#' [zscore_table_to_corecmotifs()].
#'
#' By default no output files are created. To save output files, you must
#' provide `output_directory`, `output_base_name`, or both. The output files are
#' named according to the following rules.
#'
#' * If `output_directory` is provided but `output_base_name` is not, the files
#' will be saved in the provided directory using the base name "output".
#' * If `output_base_name` is provided but `output_directory` is not, the files
#' will be saved in the working directory using the provided base name.
#' * If `array_id` is provided, it will be appended to the output base name.
#' * The individual output file names will be identified by a suffix appended
#' to the output base name after the array ID (if present).
#' * The output of [annotate_fluorescence_table()] will have the suffix
#' "fluorescence.tsv".
#' * The output of [fluorescence_to_zscore_table()] will have the suffix
#' "zscores.tsv"
#' * The output of [zscore_table_to_corecmotifs()] will have the suffix
#' "all_corecmotifs.rds"
#'
#' @inheritParams annotate_fluorescence_table
#' @inheritParams zscore_table_to_corecmotifs
#' @param output_directory `character(1)` or `NULL`. The path to the directory
#' where output files will be saved. No output files will be created unless
#' `output_directory` or `output_base_name` is provided. (Default: NULL)
#' @param output_base_name `character(1)` or `NULL`. The base name for output
#' files. No output files will be created unless `output_directory` or
#' `output_base_name ` is provided. (Default: NULL)
#'
#' @return A list of [CoRecMotifs][CoRecMotif-class], one for each possible
#' combination of the probe sets annotated in `annotation` and the PBM
#' conditions listed in `pbm_conditions`.
#'
#' @seealso [annotate_fluorescence_table()], [fluorescence_to_zscore_table()],
#' [zscore_table_to_corecmotifs()]
#'
#' @export
#'
#' @examples
#' print("FILL THIS IN!")
make_corecmotifs <-
function(
fluorescence_table,
fluorescence_columns,
annotation,
array_id = NULL,
output_directory = NULL,
output_base_name = NULL
) {
# Make sure all the arguments are the right type
assertthat::assert_that(
is.data.frame(fluorescence_table),
is.character(fluorescence_columns),
is.data.frame(annotation),
assertthat::is.string(array_id) || is.null(array_id),
assertthat::is.string(output_directory) || is.null(output_directory),
assertthat::is.string(output_base_name) || is.null(output_base_name)
)
# If no array ID is given, generate a random ID
# CoRecMotifs have to have different names or check_replicates() won't work
if (is.null(array_id)) {
array_id <- create_array_id()
}
# Update the output base name with the output directory and array ID
output_base_name <-
update_output_base_name(output_directory, output_base_name, array_id)
# Do not save any output files by default
fluorescence_output <- NULL
zscore_output <- NULL
corec_output <- NULL
# Create names for the output files if necessary
if (!is.null(output_base_name)) {
fluorescence_output <- paste0(output_base_name, "_fluorescence.tsv")
zscore_output <- paste0(output_base_name, "_zscores.tsv")
corec_output <- paste0(output_base_name, "_all_corecmotifs.rds")
}
# Load and annotate the table of fluorescence values
annotated_fluorescence_table <-
annotate_fluorescence_table(
fluorescence_table = fluorescence_table,
fluorescence_columns = fluorescence_columns,
annotation = annotation,
output_file = fluorescence_output
)
# Convert the fluorescence values into condition-wise z-scores
zscore_table <-
fluorescence_to_zscore_table(
fluorescence_table = annotated_fluorescence_table,
fluorescence_columns = fluorescence_columns,
output_file = zscore_output
)
# Make CoRecMotifs for all the seed/condition combos
corecmotifs <-
zscore_table_to_corecmotifs(
zscore_table = zscore_table,
zscore_columns = fluorescence_columns,
array_id = array_id,
output_file = corec_output
)
# Return the list of CoRecMotifs
return(corecmotifs)
}
#' Filter a list of CoRecMotifs and match them to reference motifs
#'
#' Removes [CoRecMotifs][CoRecMotif-class] with low z-scores or motif strengths
#' and CoRecMotifs that do not replicate, then compares the remaining
#' CoRecMotifs to a database of reference motifs to identify the best match.
#' This is a convenience function that calls [filter_corecmotifs()],
#' [check_replicates()], [find_match()], then [filter_corecmotifs()] and
#' [check_replicates()] again, and finally [summarize_corecmotifs()] and
#' optionally [compare_conditions()].
#'
#' By default no output files are created. To save output files, you must
#' provide `output_directory`, `output_base_name`, or both. The output files are
#' named according to the following rules.
#'
#' * If `output_directory` is provided but `output_base_name` is not, the files
#' will be saved in the provided directory using the base name "output".
#' * If `output_base_name` is provided but `output_directory` is not, the files
#' will be saved in the working directory using the provided base name.
#' * The individual output file names will be identified by a suffix appended
#' to the output base name.
#' * The output after the first call to [filter_corecmotifs()] and
#' [check_replicates()] will have the suffix "filtered_corecmotifs.rds".
#' * The output of [find_match()] will have the suffix
#' "matched_corecmotifs.rds".
#' * The output after the second call to [filter_corecmotifs()] and
#' [check_replicates()] will have the suffix "significant_corecmotifs.rds".
#' * The output of [summarize_corecmotifs()] will have the suffix
#' "significant_corecmotifs_summary.tsv".
#' * The output of [summarize_corecmotifs()] with `by_cluster = TRUE` will have
#' the suffix "significant_corecmotifs_summary_by_cluster.tsv".
#' * The output of [compare_conditions()] will have the suffix
#' "condition_comparisons.tsv".
#'
#' @inheritParams check_replicates
#' @inheritParams find_match
#' @inheritParams make_corecmotifs
#' @param corecmotifs `list`. The [CoRecMotifs][CoRecMotif-class] to process.
#' @param motif_strength `numeric(1)` or `NULL`. The minimum motif strength to
#' keep or NULL not to filter by motif strength. (Default: 1)
#' @param rolling_ic `numeric(1)` or `NULL`. The minimum rolling IC to keep or
#' NULL not to filter by rolling IC. (Default: 1)
#' @param match_pvalue `numeric(1)` or `NULL`. The maximum match p-value to keep
#' or NULL not to filter by match p-value. (Default: 0.01)
#' @param pbm_condition_groups `list(character)` or `NULL`. The names of the PBM
#' conditions to compare. Each element of the list should contain a group of
#' PBM conditions to compare to each other. If the list elements are named,
#' the names will be passed to the `group_name` parameter of
#' [compare_conditions()]. (Default: NULL)
#'
#' @return A filtered list of replicated [CoRecMotifs][CoRecMotif-class] that
#' match a reference motif.
#'
#' @seealso [filter_corecmotifs()], [check_replicates()], [find_match()],
#' [summarize_corecmotifs()], [compare_conditions()]
#'
#' @export
#'
#' @examples
#' print("FILL THIS IN")
process_corecmotifs <-
function(
corecmotifs,
reference_motifs_file,
cluster_assignments = NULL,
meme_path = NULL,
motif_strength = 1,
rolling_ic = 1,
n_replicates = 2,
eucl_distance = 0.4,
min_overlap = 5,
match_pvalue = 0.01,
pbm_condition_groups = NULL,
output_directory = NULL,
output_base_name = NULL
) {
# Make sure all the arguments are the right type
assertthat::assert_that(
assertthat::is.string(reference_motifs_file) &&
file.exists(reference_motifs_file),
is.data.frame(cluster_assignments) || is.null(cluster_assignments),
assertthat::is.string(meme_path) || is.null(meme_path),
assertthat::is.number(motif_strength) || is.null(motif_strength),
assertthat::is.number(rolling_ic) || is.null(rolling_ic),
assertthat::is.count(n_replicates),
assertthat::is.number(eucl_distance) || is.null(eucl_distance),
assertthat::is.count(min_overlap),
assertthat::is.number(match_pvalue) || is.null(match_pvalue),
is.null(pbm_condition_groups) || (
is.list(pbm_condition_groups) &&
all(vapply(pbm_condition_groups, is.character, logical(1)))
),
assertthat::is.string(output_directory) || is.null(output_directory),
assertthat::is.string(output_base_name) || is.null(output_base_name)
)
# Update the output base name with the output directory
output_base_name <-
update_output_base_name(output_directory, output_base_name)
# Do not save any output files by default
filtered_output <- NULL
matched_output <- NULL
final_output <- NULL
summary_output <- NULL
cluster_output <- NULL
comparison_output <- NULL
# Create names for the output files if necessary
if (!is.null(output_base_name)) {
filtered_output <-
paste0(output_base_name, "_filtered_corecmotifs.rds")
matched_output <-
paste0(output_base_name, "_matched_corecmotifs.rds")
final_output <-
paste0(output_base_name, "_significant_corecmotifs.rds")
summary_output <-
paste0(output_base_name, "_significant_corecmotifs_summary.tsv")
cluster_output <-
paste0(
output_base_name,
"_significant_corecmotifs_summary_by_cluster.tsv"
)
comparison_output <-
paste0(output_base_name, "_condition_comparisons.tsv")
}
# Filter out CoRecMotifs with low motif strength and/or rolling IC scores
filtered_corecmotifs <-
filter_corecmotifs(
corecmotifs,
motif_strength = motif_strength,
rolling_ic = rolling_ic,
check_corecmotifs = TRUE
)
# Make sure you didn't filter out all the CoRecMotifs
if (length(filtered_corecmotifs) < 1) {
warning(
"No CoRecMotifs passed the following filters\n",
"\tMotif strength: ", motif_strength, "\n",
"\tRolling IC: ", rolling_ic, "\n",
"Consider setting motif_strength or rolling_ic to a smaller value",
call. = FALSE
)
return()
}
# Filter out CoRecMotifs that don't replicate
replicated_corecmotifs <-
check_replicates(
corecmotifs = filtered_corecmotifs,
n_replicates = n_replicates,
eucl_distance = eucl_distance,
output_file = filtered_output,
check_corecmotifs = FALSE
)
# Make sure you didn't filter out all the CoRecMotifs
if (length(replicated_corecmotifs) < 1) {
warning(
"No CoRecMotifs passed the following filters\n",
"\tNumber of replicates: ", n_replicates, "\n",
"\tMaximum Euclidean distance between replicates: ",
eucl_distance, "\n",
"If your data is unreplicated, set n_replicates = 1\n",
"Consider setting eucl_distance to a larger value",
call. = FALSE
)
return()
}
# Find the best matching reference motif for each CoRecMotif
matched_corecmotifs <-
find_match(
corecmotifs = replicated_corecmotifs,
reference_motifs_file = reference_motifs_file,
cluster_assignments = cluster_assignments,
min_overlap = min_overlap,
meme_path = meme_path,
output_file = matched_output,
check_corecmotifs = FALSE
)
# Filter out CoRecMotifs that don't match any reference motifs well
final_corecmotifs <-
filter_corecmotifs(
matched_corecmotifs,
match_pvalue = match_pvalue,
check_corecmotifs = FALSE
)
# Make sure you didn't filter out all the CoRecMotifs
if (length(final_corecmotifs) < 1) {
warning(
"No CoRecMotifs passed the following filter\n",
"\tMatch p-value: ", match_pvalue, "\n",
"Consider setting match_pvalue to a larger value",
call. = FALSE
)
return()
}
# Make sure at least n_replicates match a reference motif well
final_corecmotifs <-
check_replicates(
final_corecmotifs,
n_replicates = n_replicates,
eucl_distance = NULL,
output_file = final_output,
check_corecmotifs = FALSE
)
# Make sure you didn't filter out all the CoRecMotifs
if (length(final_corecmotifs) < 1) {
warning(
"No CoRecMotifs passed the following filter\n",
"\tNumber of replicates matching a reference motif: ",
n_replicates, "\n",
"Consider setting match_pvalue to a larger value",
call. = FALSE
)
return()
}
# Make and save summary tables if necessary
if (!is.null(summary_output)) {
# Summarize at the level of individual motifs
corecmotif_summary <-
summarize_corecmotifs(final_corecmotifs, check_corecmotifs = FALSE)
try_catch_save_output(corecmotif_summary, summary_output, "tsv")
# Summarize at the level of clusters
cluster_summary <-
summarize_corecmotifs(
final_corecmotifs,
by_cluster = TRUE,
check_corecmotifs = FALSE
)
try_catch_save_output(cluster_summary, cluster_output, "tsv")
# Compare across conditions if necessary
if (!is.null(pbm_condition_groups)) {
condition_comparison_df <-
lapply(1:length(pbm_condition_groups), function(i) {
compare_conditions(
matched_corecmotifs,
pbm_conditions = pbm_condition_groups[[i]],
group_name = names(pbm_condition_groups[i]),
check_corecmotifs = FALSE
)
}) %>%
# Combine all the data frames
dplyr::bind_rows()
try_catch_save_output(
condition_comparison_df, comparison_output, "tsv"
)
}
}
# Return the list of CoRecMotifs
return(final_corecmotifs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.