#' Find Instances of Motifs Across Regions
#'
#' Calls \code{findMotifsGenome.pl} with the \code{-find} option set
#' to run a search for instances of specified motifs across a given
#' set of regions. Results are then saved to a specified file. This
#' function does not do motif finding, but rather uses a motif file
#' to search for and return the exact regions in which motifs are found.
#' Motifs are provided as matrices as generated by \code{find_motifs_genome}
#' and read in by \code{read_*_results}.
#'
#' @param x \code{data.frame} with the first three columns being
#' chromosome, start, and end coordinates, with a fourth column
#' corresponding to the region identifier; extra columns may be kept;
#' x may alternately be a path to an existing bed file
#' @param path path of file to save motif instances results to
#' @param genome ID of installed genome; check installed genomes using
#' \code{list_homer_packages()}; examples include "hg38" and "mm10";
#' add an 'r' at the end to mask repeats, e.g. "mm10r"
#' @param motif_file path to file containing all instances of motifs
#' to be scanned for; can be written be \code{write_homer_motif}
#' @param scan_size size of sequence to scan; this can be a numeric to
#' specify the number of bases to scan centered on the region, or
#' alternately can be set to "given" to scan the entire region;
#' if using "given", will use the "-chopify" option to cut large
#' background sequences to average of target sequence size
#' [default: \code{given}]
#' @param cores number of cores to use [default: all cores available]
#' @param cache number in MB to use as cache to store sequences in memory
#' [default: calculates free memory and divides by 4]
#' @return Nothing; called for its side-effect of producing HOMER results
#'
#' @export
#' @seealso \code{\link{read_known_results}}, \code{\link{read_denovo_results}}
## @param motif_pwm vector of motif_pwm's either derived from \code{find_motifs_genome}
## #' and read in by \code{read_*_results}, formatted with columns
## #' 'A', 'C', 'G', 'T', and rows as nucleotide probabilities such that
## #' each row sum equal to 1
## find_instances("inst/extdata/test_regions.bed", "inst/extdata/test_inst.txt", "mm10r",
## "inst/extdata/test.motif",
## scan_size = "given")
find_motifs_instances <- function(x, path, genome,
motif_file,
scan_size = 'given',
cores = parallel::detectCores(),
cache = .calc_free_mem() / 4) {
## Error checking -----------------------------------------------------
## File format for `x` - write bed files if data.frame; assign if a character
if ("data.frame" %in% class(x)) {
target_bed <- tempfile("target_")
.write_bed(x, path = target_bed)
} else {
if (file.exists(x) != TRUE) {
stop("Check that your bed file for `x` exists")
}
target_bed <- x
}
## Run findMotifsGenome.pl ---------------------------------------------
## Make HOMER results output dir
## Construct and run command
homer_base <- get_homer_bin()
cmd <- paste(
paste0(homer_base, "findMotifsGenome.pl"),
target_bed, genome, tempfile(pattern = "fi_"),
"-find", motif_file,
"-size", scan_size,
"-p", cores,
"-cache", cache,
">", path
)
system(cmd)
if (scan_size == "given") {
cmd <- paste(cmd, "-chopify")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.