#' @title Scores transcripts with position weight matrices
#'
#' @description
#' This function is used to count the binding sites in a set of sequences for
#' all or a
#' subset of RNA-binding protein sequence
#' motifs and returns the result in a data frame, which is subsequently used by
#' \code{\link{calculate_motif_enrichment}} to
#' obtain binding site enrichment scores.
#'
#' @param motifs a list of motifs that is used to score the specified sequences.
#' If \code{is.null(motifs)} then all Transite motifs are used.
#' @param n_cores the number of cores that are used
#' @param cache either logical or path to a directory where scores are cached.
#' The scores of each
#' motif are stored in a
#' separate file that contains a hash table with RefSeq identifiers and
#' sequence type
#' qualifiers as keys and the number of putative binding sites as values.
#' If \code{cache} is \code{FALSE}, scores will not be cached.
#' @inheritParams score_transcripts_single_motif
#'
#' @return A list with three entries:
#'
#' (1) df: a data frame with the following columns:
#' \tabular{rl}{
#' \code{motif_id} \tab the motif identifier that is used in the original
#' motif library\cr
#' \code{motif_rbps} \tab the gene symbol of the RNA-binding protein(s)\cr
#' \code{absolute_hits} \tab the absolute frequency of putative binding
#' sites per motif in all
#' transcripts \cr
#' \code{relative_hits} \tab the relative, i.e., absolute divided by total,
#' frequency of
#' binding sites per motif in all transcripts \cr
#' \code{total_sites} \tab the total number of potential binding sites \cr
#' \code{one_hit}, \code{two_hits}, ... \tab number of transcripts with one,
#' two,
#' three, ... putative binding sites
#' }
#' (2) total_sites: a numeric vector with the total number of potential
#' binding sites
#' per transcript
#'
#' (3) absolute_hits: a numeric vector with the absolute (not relative)
#' number of putative
#' binding sites per transcript
#' @examples
#' foreground_set <- c(
#' "CAACAGCCUUAAUU", "CAGUCAAGACUCC", "CUUUGGGGAAU",
#' "UCAUUUUAUUAAA", "AAUUGGUGUCUGGAUACUUCCCUGUACAU",
#' "AUCAAAUUA", "AGAU", "GACACUUAAAGAUCCU",
#' "UAGCAUUAACUUAAUG", "AUGGA", "GAAGAGUGCUCA",
#' "AUAGAC", "AGUUC", "CCAGUAA"
#' )
#' # names are used as keys in the hash table (cached version only)
#' # ideally sequence identifiers (e.g., RefSeq ids) and region labels
#' # (e.g., 3UTR for 3'-UTR)
#' names(foreground_set) <- c(
#' "NM_1_DUMMY|3UTR", "NM_2_DUMMY|3UTR", "NM_3_DUMMY|3UTR",
#' "NM_4_DUMMY|3UTR", "NM_5_DUMMY|3UTR", "NM_6_DUMMY|3UTR",
#' "NM_7_DUMMY|3UTR", "NM_8_DUMMY|3UTR", "NM_9_DUMMY|3UTR",
#' "NM_10_DUMMY|3UTR", "NM_11_DUMMY|3UTR", "NM_12_DUMMY|3UTR",
#' "NM_13_DUMMY|3UTR", "NM_14_DUMMY|3UTR"
#' )
#'
#' # specific motifs, uncached
#' motifs <- get_motif_by_rbp("ELAVL1")
#' scores <- score_transcripts(foreground_set, motifs = motifs, cache = FALSE)
#' \dontrun{
#' # all Transite motifs, cached (writes scores to disk)
#' scores <- score_transcripts(foreground_set)
#'
#' # all Transite motifs, uncached
#' scores <- score_transcripts(foreground_set, cache = FALSE)
#'
#' foreground_df <- transite:::ge$foreground1_df
#' foreground_set <- foreground_df$seq
#' names(foreground_set) <- paste0(foreground_df$refseq, "|",
#' foreground_df$seq_type)
#' scores <- score_transcripts(foreground_set)
#' }
#' @family matrix functions
#' @importFrom parallel makeCluster
#' @importFrom parallel clusterExport
#' @importFrom parallel parLapply
#' @importFrom parallel stopCluster
#' @export
score_transcripts <- function(sequences, motifs = NULL, max_hits = 5,
threshold_method = c("p_value", "relative"),
threshold_value = 0.25^6,
n_cores = 1, cache = paste0(tempdir(), "/sc/")) {
# if threshold_method == "p_value": default threshold_value == 0.25^6
# (0.25^6: lowest p-value that can be achieved by hexamer motifs, the
#shortest supported motifs)
# if threshold_method == "relative": default threshold_value == 0.9
# (0.9: 90% of the maximum PWM score)
threshold_method <- match.arg(threshold_method,
choices = c("p_value", "relative"))
if (max_hits < 1) {
stop("max_hits must not be smaller than 1")
}
if (is.null(motifs)) {
motifs <- get_motifs()
}
if (is.logical(cache)) {
if (cache) {
cache_path <- paste0(tempdir(), "/sc/")
} else {
cache_path <- NULL
}
} else {
cache_path <- as.character(cache)
}
if (!is.null(cache_path)) {
if (is.null(names(sequences))) {
stop("names(sequences) cannot be null if cache is used")
}
names(sequences) <- paste0(names(sequences), "|",
threshold_method, "|", threshold_value)
dir.create(file.path(cache_path), showWarnings = FALSE,
recursive = TRUE)
}
if (n_cores == 1) {
motif_scores <- lapply(motifs, function(motif) {
return(score_transcripts_single_motif(
motif, sequences, max_hits, threshold_method,
threshold_value, cache_path
))
})
} else {
cluster <- parallel::makeCluster(mc <- getOption("cl.cores", n_cores))
parallel::clusterExport(
cl = cluster,
varlist = c(
"score_transcripts_single_motif", "sequences", "max_hits",
"threshold_method", "threshold_value", "cache_path",
"get_id", "get_rbps", "get_motif_matrix", "get_width",
"read_motif_cache", "write_motif_cache", "lock", "unlock",
"get_lock_object", "score_sequences",
"cached_score_sequences_helper", "score_sequences_helper"
),
envir = environment()
)
motif_scores <- parallel::parLapply(
cl = cluster, motifs, function(motif) {
return(score_transcripts_single_motif(motif, sequences,
max_hits,
threshold_method,
threshold_value,
cache_path))})
parallel::stopCluster(cl = cluster)
}
motif_scores_proto_df <- lapply(motif_scores, function(motif_score) {
return(motif_score$df)
})
total_sites <- lapply(motif_scores, function(motif_score) {
return(motif_score$total_sites)
})
absolute_hits <- lapply(motif_scores, function(motif_score) {
return(motif_score$absolute_hits)
})
motif_scores_df <- as.data.frame(do.call("rbind", motif_scores_proto_df),
stringsAsFactors = FALSE
)
motif_scores_df$motif_id <- as.character(motif_scores_df$motif_id)
motif_scores_df$motif_rbps <- as.character(motif_scores_df$motif_rbps)
motif_scores_df$absolute_hits <- as.numeric(motif_scores_df$absolute_hits)
motif_scores_df$relative_hits <- as.numeric(motif_scores_df$relative_hits)
motif_scores_df$total_sites <- as.numeric(motif_scores_df$total_sites)
motif_scores_df$one_hit <- as.numeric(motif_scores_df$one_hit)
if (max_hits > 1) {
motif_scores_df$two_hits <- as.numeric(motif_scores_df$two_hits)
}
if (max_hits > 2) {
motif_scores_df$three_hits <- as.numeric(motif_scores_df$three_hits)
}
if (max_hits > 3) {
motif_scores_df$four_hits <- as.numeric(motif_scores_df$four_hits)
}
if (max_hits > 4) {
motif_scores_df$five_hits <- as.numeric(motif_scores_df$five_hits)
}
if (max_hits > 5) {
motif_scores_df$six_hits <- as.numeric(motif_scores_df$six_hits)
}
if (max_hits > 6) {
motif_scores_df$seven_hits <- as.numeric(motif_scores_df$seven_hits)
}
if (max_hits > 7) {
motif_scores_df$eight_hit <- as.numeric(motif_scores_df$eight_hit)
}
if (max_hits > 8) {
motif_scores_df$nine_hits <- as.numeric(motif_scores_df$nine_hits)
}
motif_scores_df$more_hits <- as.numeric(motif_scores_df$more_hits)
return(list(df = motif_scores_df, total_sites = total_sites,
absolute_hits = absolute_hits))
}
#' @title Scores transadsadscripts with position weight matrices
#'
#' @description
#' This function is used to count the putative binding sites (i.e., motifs)
#' in a set of
#' sequences for the specified RNA-binding protein sequence
#' motifs and returns the result in a data frame, which is aggregated by
#' \code{\link{score_transcripts}} and
#' subsequently used by \code{\link{calculate_motif_enrichment}} to
#' obtain binding site enrichment scores.
#'
#' @param motif a Transite motif that is used to score the specified sequences
#' @param sequences character vector of named sequences
#' (only containing upper case characters A, C, G, T), where the names are
#' RefSeq identifiers
#' and sequence
#' type qualifiers (\code{"3UTR"}, \code{"5UTR"}, \code{"mRNA"}), e.g.
#' \code{"NM_010356|3UTR"}
#' @param max_hits maximum number of putative binding sites per mRNA
#' that are counted
#' @param cache_path the path to a directory where scores are cached.
#' The scores of each
#' motif are stored in a
#' separate file that contains a hash table with RefSeq identifiers
#' and sequence type
#' qualifiers as keys and the number of binding sites as values.
#' If is.null(cache_path), scores will not be cached.
#' @param threshold_method either \code{"p_value"} (default) or
#' \code{"relative"}.
#' If \code{threshold_method} equals \code{"p_value"}, the default
#' \code{threshold_value}
#' is \code{0.25^6}, which is
#' lowest p-value that can be achieved by hexamer motifs, the shortest
#' supported motifs.
#' If \code{threshold_method} equals \code{"relative"}, the default
#' \code{threshold_value}
#' is \code{0.9}, which is 90\% of the maximum PWM score.
#' @param threshold_value semantics of the \code{threshold_value} depend on
#' \code{threshold_method} (default is 0.25^6)
#' @return A list with the following items:
#' \tabular{rl}{
#' \code{motif_id} \tab the motif identifier of the specified motif\cr
#' \code{motif_rbps} \tab the gene symbol of the RNA-binding protein(s)\cr
#' \code{absolute_hits} \tab the absolute frequency of binding sites per
#' motif in all
#' transcripts \cr
#' \code{relative_hits} \tab the relative, i.e., absolute divided by
#' total, frequency of
#' binding sites per motif in all transcripts \cr
#' \code{total_sites} \tab the total number of potential binding sites \cr
#' \code{one_hit}, \code{two_hits}, ... \tab number of transcripts with
#' one, two, three,
#' ... binding sites
#' }
#'
#' @family matrix functions
#' @importFrom TFMPvalue TFMpv2sc
#' @importFrom Biostrings maxScore
score_transcripts_single_motif <- function(motif, sequences, max_hits = 5,
threshold_method = c("p_value",
"relative"),
threshold_value = 0.25^6,
cache_path = paste0(tempdir(),
"/sc/")) {
# if threshold_method == "p_value": default threshold_value == 0.25^6
# (0.25^6: lowest p-value that can be achieved by hexamer motifs,
# the shortest supported motifs)
# if threshold_method == "relative": default threshold_value == 0.9
# (0.9: 90% of the maximum PWM score)
threshold_method <- match.arg(threshold_method,
choices = c("p_value", "relative"))
pwm <- t(get_motif_matrix(motif))
rownames(pwm)[4] <- "T"
if (threshold_method == "p_value") {
threshold_score <- TFMPvalue::TFMpv2sc(
pwm, threshold_value,
c(A = 0.25, C = 0.25, G = 0.25, T = 0.25), "PWM"
)
} else if (threshold_method == "relative") {
threshold_score <- Biostrings::maxScore(pwm) * threshold_value
}
if (threshold_score < 0) {
warning(paste0(
get_id(motif), ": threshold score below zero (",
threshold_score, "), set to 0.1"
))
threshold_score <- 0.1
}
total_sites <- unlist(lapply(sequences, function(sequence) {
if (nchar(sequence) < get_width(motif)) {
return(0)
} else {
return(nchar(sequence) - get_width(motif) + 1)
}
}))
sum_total_sites <- sum(total_sites)
if (!is.null(cache_path)) {
motif_id_file <- gsub("[^[:alnum:]]", "_", get_id(motif))
motif_cache_file <- paste0(cache_path, motif_id_file, ".rds")
if (file.exists(motif_cache_file)) {
absolute_hits <- rep(0, length(sequences))
motif_cache <- read_motif_cache(cache_path, motif_id_file)
cached <- vapply(names(sequences), function(seq_id) {
return(exists(seq_id, envir = motif_cache, inherits = FALSE))
}, logical(1))
cached_ids <- names(sequences)[cached]
if (length(cached_ids) > 0) {
absolute_hits[cached] <- unlist(lapply(
cached_ids,
function(seq_id) {
return(get(seq_id, envir = motif_cache,
inherits = FALSE))
}))
}
uncached_ids <- names(sequences)[!cached]
if (length(uncached_ids) > 0) {
uncached_sequences <- sequences[!cached]
absolute_hits[!cached] <- cached_score_sequences_helper(
uncached_sequences, uncached_ids,
as.matrix(get_motif_matrix(motif)),
threshold_score, motif_cache,
cache_path, motif_id_file
)
}
} else {
motif_cache <- new.env(hash = TRUE, parent = emptyenv(),
size = length(sequences))
absolute_hits <- cached_score_sequences_helper(
sequences, names(sequences),
as.matrix(get_motif_matrix(motif)),
threshold_score, motif_cache,
cache_path, motif_id_file
)
}
} else {
absolute_hits <- score_sequences_helper(
sequences, as.matrix(get_motif_matrix(motif)), threshold_score)
}
if (max_hits > 9) {
max_hits_col <- 9
} else {
max_hits_col <- max_hits
}
max_hits_col <- max_hits_col + 1
multiple_hits <- unlist(lapply(seq_len(max_hits_col), function(hit_count) {
if (hit_count == max_hits_col) {
sum(vapply(absolute_hits, function(x) {
x >= hit_count
}, logical(1)))
} else {
sum(vapply(absolute_hits, function(x) {
x == hit_count
}, logical(1)))
}
}))
absolute_hits[absolute_hits > max_hits] <- max_hits
sum_absolute_hits <- sum(absolute_hits)
# add names to absolute hits
if(is.null(names(sequences))) {
names(absolute_hits) <- sequences
} else {
names(absolute_hits) <- names(sequences)
}
relative_hits <- sum_absolute_hits / sum_total_sites
if (max_hits_col == 2) {
return(list(
df = list(
motif_id = get_id(motif),
motif_rbps = paste0(get_rbps(motif), collapse = ", "),
absolute_hits = sum_absolute_hits,
relative_hits = relative_hits,
total_sites = sum_total_sites,
one_hit = multiple_hits[1], more_hits = multiple_hits[2]
),
total_sites = total_sites,
absolute_hits = absolute_hits
))
} else if (max_hits_col == 3) {
return(list(
df = list(
motif_id = get_id(motif),
motif_rbps = paste0(get_rbps(motif), collapse = ", "),
absolute_hits = sum_absolute_hits,
relative_hits = relative_hits,
total_sites = sum_total_sites,
one_hit = multiple_hits[1], two_hits = multiple_hits[2],
more_hits = multiple_hits[3]
),
total_sites = total_sites,
absolute_hits = absolute_hits
))
} else if (max_hits_col == 4) {
return(list(
df = list(
motif_id = get_id(motif),
motif_rbps = paste0(get_rbps(motif), collapse = ", "),
absolute_hits = sum_absolute_hits,
relative_hits = relative_hits,
total_sites = sum_total_sites,
one_hit = multiple_hits[1], two_hits = multiple_hits[2],
three_hits = multiple_hits[3], more_hits = multiple_hits[4]
),
total_sites = total_sites,
absolute_hits = absolute_hits
))
} else if (max_hits_col == 5) {
return(list(
df = list(
motif_id = get_id(motif),
motif_rbps = paste0(get_rbps(motif), collapse = ", "),
absolute_hits = sum_absolute_hits,
relative_hits = relative_hits,
total_sites = sum_total_sites,
one_hit = multiple_hits[1], two_hits = multiple_hits[2],
three_hits = multiple_hits[3], four_hits = multiple_hits[4],
more_hits = multiple_hits[5]
),
total_sites = total_sites,
absolute_hits = absolute_hits
))
} else if (max_hits_col == 6) {
return(list(
df = list(
motif_id = get_id(motif),
motif_rbps = paste0(get_rbps(motif), collapse = ", "),
absolute_hits = sum_absolute_hits,
relative_hits = relative_hits,
total_sites = sum_total_sites,
one_hit = multiple_hits[1], two_hits = multiple_hits[2],
three_hits = multiple_hits[3], four_hits = multiple_hits[4],
five_hits = multiple_hits[5], more_hits = multiple_hits[6]
),
total_sites = total_sites,
absolute_hits = absolute_hits
))
} else if (max_hits_col == 7) {
return(list(
df = list(
motif_id = get_id(motif),
motif_rbps = paste0(get_rbps(motif), collapse = ", "),
absolute_hits = sum_absolute_hits,
relative_hits = relative_hits,
total_sites = sum_total_sites,
one_hit = multiple_hits[1], two_hits = multiple_hits[2],
three_hits = multiple_hits[3], four_hits = multiple_hits[4],
five_hits = multiple_hits[5], six_hits = multiple_hits[6],
more_hits = multiple_hits[7]
),
total_sites = total_sites,
absolute_hits = absolute_hits
))
} else if (max_hits_col == 8) {
return(list(
df = list(
motif_id = get_id(motif),
motif_rbps = paste0(get_rbps(motif), collapse = ", "),
absolute_hits = sum_absolute_hits,
relative_hits = relative_hits,
total_sites = sum_total_sites,
one_hit = multiple_hits[1], two_hits = multiple_hits[2],
three_hits = multiple_hits[3], four_hits = multiple_hits[4],
five_hits = multiple_hits[5], six_hits = multiple_hits[6],
seven_hits = multiple_hits[7], more_hits = multiple_hits[8]
),
total_sites = total_sites,
absolute_hits = absolute_hits
))
} else if (max_hits_col == 9) {
return(list(
df = list(
motif_id = get_id(motif),
motif_rbps = paste0(get_rbps(motif), collapse = ", "),
absolute_hits = sum_absolute_hits,
relative_hits = relative_hits,
total_sites = sum_total_sites,
one_hit = multiple_hits[1], two_hits = multiple_hits[2],
three_hits = multiple_hits[3], four_hits = multiple_hits[4],
five_hits = multiple_hits[5], six_hits = multiple_hits[6],
seven_hits = multiple_hits[7], eight_hit = multiple_hits[8],
more_hits = multiple_hits[9]
),
total_sites = total_sites,
absolute_hits = absolute_hits
))
} else if (max_hits_col == 10) {
return(list(
df = list(
motif_id = get_id(motif),
motif_rbps = paste0(get_rbps(motif), collapse = ", "),
absolute_hits = sum_absolute_hits,
relative_hits = relative_hits,
total_sites = sum_total_sites,
one_hit = multiple_hits[1], two_hits = multiple_hits[2],
three_hits = multiple_hits[3], four_hits = multiple_hits[4],
five_hits = multiple_hits[5], six_hits = multiple_hits[6],
seven_hits = multiple_hits[7], eight_hit = multiple_hits[8],
nine_hits = multiple_hits[9], more_hits = multiple_hits[10]
),
total_sites = total_sites,
absolute_hits = absolute_hits
))
}
}
score_sequences_helper <- function(sequences, motif_matrix, threshold_score) {
seq_char_vectors <- lapply(sequences, function(seq) {
unlist(strsplit(seq, ""))
})
scores <- score_sequences(seq_char_vectors, motif_matrix)
absolute_hits <- unlist(lapply(scores, function(scores_per_seq) {
sum(scores_per_seq >= threshold_score)
}))
return(absolute_hits)
}
cached_score_sequences_helper <- function(sequences, seq_ids, motif_matrix,
threshold_score, motif_cache,
cache_path, motif_id_file) {
absolute_hits <- score_sequences_helper(sequences, motif_matrix,
threshold_score)
for (i in seq_len(length(absolute_hits))) {
assign(seq_ids[i], absolute_hits[i], envir = motif_cache)
}
write_motif_cache(motif_cache, cache_path, motif_id_file)
return(absolute_hits)
}
#' @title Binding Site Enrichment Value Calculation
#'
#' @description
#' This function is used to calculate binding site enrichment / depletion scores
#' between predefined foreground and background sequence sets. Significance
#' levels of
#' enrichment values are obtained by Monte Carlo tests.
#'
#' @param foreground_scores_df result of \code{\link{score_transcripts}} on
#' foreground sequence
#' set (foreground sequence sets must be a subset of the background
#' sequence set)
#' @param background_scores_df result of \code{\link{score_transcripts}}
#' on background sequence set
#' @param background_total_sites number of potential binding sites per sequence
#' (returned by \code{\link{score_transcripts}})
#' @param background_absolute_hits number of putative binding sites per sequence
#' (returned by \code{\link{score_transcripts}})
#' @param n_transcripts_foreground number of sequences in the foreground set
#' @param max_fg_permutations maximum number of foreground permutations
#' performed in
#' Monte Carlo test for enrichment score
#' @param min_fg_permutations minimum number of foreground permutations
#' performed in
#' Monte Carlo test for enrichment score
#' @param e integer-valued stop criterion for enrichment score Monte Carlo
#' test: aborting
#' permutation process after
#' observing \code{e} random enrichment values with more extreme values than
#' the actual
#' enrichment value
#' @param p_adjust_method adjustment of p-values from Monte Carlo tests to
#' avoid alpha error
#' accumulation, see \code{\link[stats]{p.adjust}}
#' @return A data frame with the following columns:
#' \tabular{rl}{
#' \code{motif_id} \tab the motif identifier that is used in the original
#' motif library\cr
#' \code{motif_rbps} \tab the gene symbol of the RNA-binding protein(s)\cr
#' \code{enrichment} \tab binding site enrichment between foreground
#' and background sequences \cr
#' \code{p_value} \tab unadjusted p-value from Monte Carlo test \cr
#' \code{p_value_n} \tab number of Monte Carlo test permutations \cr
#' \code{adj_p_value} \tab adjusted p-value from Monte Carlo test
#' (usually FDR)
#' }
#' @examples
#' foreground_seqs <- c("CAGUCAAGACUCC", "AAUUGGUGUCUGGAUACUUCCCUGUACAU",
#' "AGAU", "CCAGUAA")
#' background_seqs <- c(foreground_seqs, "CAACAGCCUUAAUU", "CUUUGGGGAAU",
#' "UCAUUUUAUUAAA", "AUCAAAUUA", "GACACUUAAAGAUCCU",
#' "UAGCAUUAACUUAAUG", "AUGGA", "GAAGAGUGCUCA",
#' "AUAGAC", "AGUUC")
#' foreground_scores <- score_transcripts(foreground_seqs, cache = FALSE)
#' background_scores <- score_transcripts(background_seqs, cache = FALSE)
#' enrichments_df <- calculate_motif_enrichment(foreground_scores$df,
#' background_scores$df,
#' background_scores$total_sites, background_scores$absolute_hits,
#' length(foreground_seqs),
#' max_fg_permutations = 1000
#' )
#' @family matrix functions
#' @importFrom dplyr filter
#' @importFrom stats p.adjust
#' @export
calculate_motif_enrichment <- function(foreground_scores_df,
background_scores_df,
background_total_sites,
background_absolute_hits,
n_transcripts_foreground,
max_fg_permutations = 1000000,
min_fg_permutations = 1000,
e = 5, p_adjust_method = "BH") {
# avoid CRAN note
motif_id <- NULL
if (!setequal(foreground_scores_df$motif_id,
background_scores_df$motif_id)) {
stop("foreground and background scores used different motifs")
}
i <- 1
enrichments <- lapply(foreground_scores_df$motif_id, function(id) {
f <- dplyr::filter(foreground_scores_df, motif_id == id)
b <- dplyr::filter(background_scores_df, motif_id == id)
if (b$absolute_hits == 0) {
enrichment <- 1
p_value <- 1
p_value_n <- 0
} else {
enrichment <- (f$absolute_hits / f$total_sites) / (b$absolute_hits / b$total_sites)
mc_result <- calculate_transcript_mc(
background_absolute_hits[[i]], background_total_sites[[i]],
f$absolute_hits / f$total_sites, n_transcripts_foreground,
max_fg_permutations, min_fg_permutations, e
)
p_value <- mc_result$p_value
p_value_n <- mc_result$n
}
i <<- i + 1
return(list(
motif_id = id, motif_rbps = f$motif_rbps,
enrichment = enrichment, p_value = p_value, p_value_n = p_value_n
))
})
enrichment_df <- as.data.frame(do.call("rbind", enrichments),
stringsAsFactors = FALSE)
enrichment_df$motif_id <- as.character(enrichment_df$motif_id)
enrichment_df$motif_rbps <- as.character(enrichment_df$motif_rbps)
enrichment_df$enrichment <- as.numeric(enrichment_df$enrichment)
enrichment_df$p_value <- as.numeric(enrichment_df$p_value)
enrichment_df$p_value_n <- as.numeric(enrichment_df$p_value_n)
enrichment_df$adj_p_value <- stats::p.adjust(enrichment_df$p_value,
method = p_adjust_method)
return(enrichment_df)
}
read_motif_cache <- function(cache_path, motif_id) {
motif_cache_file <- paste0(cache_path, motif_id, ".rds")
succeeded <- FALSE
while (!succeeded) {
tryCatch({
motif_cache_lock <- lock(paste0(cache_path, motif_id, ".lock"))
if (motif_cache_lock$success) {
motif_cache <- readRDS(motif_cache_file)
if (!unlock(motif_cache_lock)) {
warning(paste0("cannot unlock cache file lock of ",
motif_id, " motif"))
}
succeeded <- TRUE
}
}, finally = {
unlock(motif_cache_lock)
})
Sys.sleep(0.05)
}
return(motif_cache)
}
write_motif_cache <- function(motif_cache, cache_path, motif_id) {
motif_cache_file <- paste0(cache_path, motif_id, ".rds")
succeeded <- FALSE
while (!succeeded) {
tryCatch({
motif_cache_lock <- lock(paste0(cache_path, motif_id, ".lock"))
if (motif_cache_lock$success) {
saveRDS(motif_cache, file = motif_cache_file)
if (!unlock(motif_cache_lock)) {
warning(paste0("cannot unlock cache file lock of ",
motif_id, " motif"))
}
succeeded <- TRUE
}
}, finally = {
unlock(motif_cache_lock)
})
Sys.sleep(0.05)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.