utils-sequence | R Documentation |
Sequence-related utility functions.
calc_complexity(string, complexity.method = c("WoottonFederhen",
"WoottonFederhenFast", "Trifonov", "TrifonovFast", "DUST"), alph = NULL,
trifonov.max.word.size = 7)
calc_windows(n, window = 1, overlap = 0, return.incomp = TRUE)
count_klets(string, k = 1, alph)
get_klets(lets, k = 1)
mask_ranges(seqs, ranges, letter = "-")
mask_seqs(seqs, pattern, RC = FALSE, letter = "-")
meme_alph(core, file = stdout(), complements = NULL, ambiguity = NULL,
like = NULL, alph.name = NULL, letter.names = NULL, colours = NULL)
shuffle_string(string, k = 1, method = c("euler", "linear", "markov"),
rng.seed = sample.int(10000, 1))
slide_fun(string, FUN, FUN.VALUE, window = 1, overlap = 0,
return.incomp = TRUE)
window_string(string, window = 1, overlap = 0, return.incomp = TRUE,
nthreads = 1)
string |
|
complexity.method |
|
alph |
|
trifonov.max.word.size |
|
n |
|
window |
|
overlap |
|
return.incomp |
|
k |
|
lets |
|
seqs |
|
ranges |
|
letter |
|
pattern |
|
RC |
|
core |
|
file |
Output file. |
complements |
|
ambiguity |
|
like |
|
alph.name |
|
letter.names |
|
colours |
|
method |
|
rng.seed |
|
FUN |
|
FUN.VALUE |
The expected return type for |
nthreads |
|
For calc_complexity()
: A vector of numeric
values.
For calc_windows()
: A data.frame
with columns start
and stop
.
For count_klets()
: A data.frame
with columns lets
and counts
.
For get_klets()
: A character
vector of k-lets.
For mask_ranges()
: The masked XStringSet
object.
For mask_seqs()
: The masked XStringSet
object.
For meme_alph()
: NULL
, invisibly.
For shuffle_string()
: A single character
string.
For slide_fun()
: A vector with type FUN.VALUE
.
For window_string()
: A character
vector.
Benjamin Jean-Marie Tremblay, benjamin.tremblay@uwaterloo.ca
create_sequences()
, get_bkg()
, sequence_complexity()
,
shuffle_sequences()
#######################################################################
## calc_complexity
## Calculate complexity for abitrary strings
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "WoottonFederhen")
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "WoottonFederhenFast")
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "Trifonov")
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "TrifonovFast")
calc_complexity("GTGCCCCGCGGGAACCCCGC", c = "DUST")
#######################################################################
## calc_windows
## Calculate window coordinates for any value 'n'.
calc_windows(100, 10, 5)
#######################################################################
## count_klets
## Count k-lets for any string of characters
count_klets("GCAAATGTACGCAGGGCCGA", k = 2)
## The default 'k' value (1) counts individual letters
count_klets("GCAAATGTACGCAGGGCCGA")
#######################################################################
## get_klets
## Generate all possible k-lets for a set of characters
get_klets(c("A", "C", "G", "T"), 3)
## Note that each element in 'lets' is considered a single unit;
## see:
get_klets(c("AA", "B"), k = 2)
#######################################################################
## mask_ranges
## Mask arbitrary ranges
if (requireNamespace("GenomicRanges", quiet = TRUE)) {
ranges <- GenomicRanges::GRanges("A", IRanges::IRanges(1, 5))
seq <- Biostrings::DNAStringSet(c(A = "ATGACTGATTACTTATA"))
mask_ranges(seq, ranges, "-")
}
#######################################################################
## mask_seqs
## Mask repetitive seqeuences
data(ArabidopsisPromoters)
mask_seqs(ArabidopsisPromoters, "AAAAAA")
#######################################################################
## meme_alph
## Create MEME custom alphabet definition files
meme_alph("ACm", complements = "TGM", alph.name = "MethDNA",
letter.names = c(A = "Adenine", C = "Cytosine", G = "Guanine",
T = "Thymine", m = "Methylcytosine", M = "mC:Guanine"),
like = "DNA", ambiguity = c(N = "ACGTmM"))
#######################################################################
## shuffle_string
## Shuffle any string of characters
shuffle_string("ASDADASDASDASD", k = 1)
#######################################################################
## slide_fun
## Apply a function to a character vector along sliding windows
FUN <- function(x) grepl("[GC]", x)
data.frame(
Window = window_string("ATGCATCTATGCA", 2, 1),
HasGC = slide_fun("ATGCATCTATGCA", FUN, logical(1), 2, 1)
)
#######################################################################
## window_string
## Get sliding windows for a string of characters
window_string("ABCDEFGHIJ", 2, 1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.