Description Usage Arguments Details Value Author(s) See Also Examples
Methods for annotating the query GenomicRanges with various properties on the subject regions, and the subject regions could be either GenomicRanges or GRangesList objects.
The extractRegionProperty
function extract the annotations on each query ranges from the properties of their overlapping/nearest subject regions.
Other functions are specific implementations of extractRegionProperty
,
and their purpose is to summarize the genomic features frequently involved in functional genomics analysis.
extractRegionOverlap
computes the overlapping of each query ranges with the subject regions.
Likewise, the extractRegionLength
, extractRegionLetterFrequency
, and extractRegionScores
functions
compute the Lengths, Sequence Contents, and GenomicScores of the overlapped subjects.
extractRegionYCount
and extractRegionNearestDistToY
quantify the density/distance of an annotation (Y)
over the subject regions, the resulting properties can capture the clustering effect between the subject region and the annotation object.
The functions extractDistToRegion5end
& extractDistToRegion3end
retrieve the distance between the query and ends of the subject, and
the extractRegionRelativePosition
function extract the relative position of the query in the matched regions.
Specifically, the relative position is calculated by dividing the distance of the query to the 5'-end of the region by the length of the region.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 | extractRegionProperty(
x,
region,
property,
ambiguityMethod = c("auto", "mean", "sum", "min", "max"),
maxgap = 0L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "FALSE", "nearest"),
ignore.strand = FALSE
)
extractRegionOverlap(
x,
region = NULL,
ambiguityMethod = c("auto", "mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
ignore.strand = FALSE,
output.logical = TRUE
)
extractRegionLength(
x,
region = NULL,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
ignore.strand = FALSE
)
extractRegionLetterFrequency(
x,
sequence,
region = NULL,
letters = "GC",
as.prob = TRUE,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
ignore.strand = FALSE,
efficient = TRUE,
...
)
extractRegionScores(
x,
gscores,
region = NULL,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
missingScores = c("zero", "mean", "none"),
ignore.strand = FALSE,
efficient = TRUE,
...
)
extractRegionYCount(
x,
y,
region = NULL,
normalize = FALSE,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
ignore.strand = FALSE,
efficient = TRUE
)
extractRegionNearestDistToY(
x,
y,
region = NULL,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
maxDist = 3e+06,
ignore.strand = FALSE
)
extractRegionRelativePosition(
x,
region = NULL,
ambiguityMethod = c("mean", "sum", "min", "max"),
nomapValue = c("NA", "0"),
ignore.strand = FALSE
)
extractDistToRegion5end(
x,
region = NULL,
ignore.strand = FALSE,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest")
)
extractDistToRegion3end(
x,
region = NULL,
ignore.strand = FALSE,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest")
)
## S4 method for signature 'GRanges'
extractRegionProperty(
x,
region,
property,
ambiguityMethod = c("auto", "mean", "sum", "min", "max"),
maxgap = 0L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "FALSE", "nearest"),
ignore.strand = FALSE
)
## S4 method for signature 'GRanges'
extractRegionOverlap(
x,
region = NULL,
ambiguityMethod = c("auto", "mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
ignore.strand = FALSE,
output.logical = TRUE
)
## S4 method for signature 'GRanges'
extractRegionLength(
x,
region = NULL,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
ignore.strand = FALSE
)
## S4 method for signature 'GRanges'
extractRegionLetterFrequency(
x,
sequence,
region = NULL,
letters = "GC",
as.prob = TRUE,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
ignore.strand = FALSE,
efficient = TRUE,
...
)
## S4 method for signature 'GRanges'
extractRegionScores(
x,
gscores,
region = NULL,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
missingScores = c("zero", "mean", "none"),
ignore.strand = FALSE,
efficient = TRUE,
...
)
## S4 method for signature 'GRanges'
extractRegionYCount(
x,
y,
region = NULL,
normalize = FALSE,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
ignore.strand = FALSE,
efficient = TRUE
)
## S4 method for signature 'GRanges'
extractRegionNearestDistToY(
x,
y,
region = NULL,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest"),
maxDist = 3e+06,
ignore.strand = FALSE
)
## S4 method for signature 'GRanges'
extractRegionRelativePosition(
x,
region = NULL,
ambiguityMethod = c("mean", "sum", "min", "max"),
nomapValue = c("NA", "0"),
ignore.strand = FALSE
)
## S4 method for signature 'GRanges'
extractDistToRegion5end(
x,
region = NULL,
ignore.strand = FALSE,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest")
)
## S4 method for signature 'GRanges'
extractDistToRegion3end(
x,
region = NULL,
ignore.strand = FALSE,
ambiguityMethod = c("mean", "sum", "min", "max"),
maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within", "equal"),
nomapValue = c("NA", "0", "nearest")
)
|
x |
A GRanges object for the query. |
region |
A GRanges or GRangesList object for the subject.
If not provided ( |
property |
A vector specifying the properties/attributes of the |
ambiguityMethod |
By default, for |
maxgap, minoverlap, type |
See |
nomapValue |
When |
ignore.strand |
When set to |
output.logical |
If |
sequence |
A BSgenome or XStringSet object. See the available.genomes function for how to install a genome. |
as.prob, letters |
See |
efficient |
|
... |
Additional arguments, it passes to letterFrequency in method |
gscores |
A GScores object.
See the vignette for more details: |
missingScores |
Specifying the imputation methods on missing scores in |
y |
A GRanges or GRangesList object for the clustering annotation. |
normalize |
If |
maxDist |
A value of the maximum nucleotide distance returned in |
Please check the documentation of extractRegionProperty for details.
For specific extractor functions, when only x is provided with region = NULL
,
the functions directly use the query ranges to calculate the corresponding properties.
In case the region ranges are provided, the extractor functions first find the index where x overlaps with the region and then matches the property of the region to the query GRanges according to the overlapping index.
For nomapValue = "nearest"
,
when ranges in x
do not overlap with ranges in regions
, the extractor functions will additionally match the unmapped x with its nearest region.
For extractRegionNearestDistToY
, if ranges in y
overlap the ranges in x
,
the overlapped ranges in y
are skipped and the ranges precede or follow x
are used to calculate the distances.
A logical or a numeric vector with the same length as x
.
For extractRegionProperty
, the vector type depends on the type of property
.
For extractRegionOverlap
either logical when output.logical = TRUE
or a numeric otherwise.
Zhen Wei
The genomeDerivedFeatures function for the extraction of genome-derived features.
The sequenceDerivedFeatures function for the extraction of sequence-derived features.
The topologyOnTranscripts function for generating the topology of annotations on transcripts of genes.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | ## ---------------------------------------------------------------------
## BASIC EXAMPLES
## ---------------------------------------------------------------------
library(GenomicRanges)
## Build the query GRanges object:
x_gr <- GRanges(rep(c("chr1", "chr2"), c(5, 15)),
IRanges(c(sample(11874:12127, 5), sample(38814:41527,15)), width=100),
strand=Rle(c("+", "-"), c(5, 15)))
x_gr
## The region GRanges or GRangesList object:
exons_gr <- GRanges(c("chr1","chr2","chr2"),
IRanges(start=c(11874,38814,45440),end=c(12227,41627,46588)),
strand=c("+","-","-"))
genes_grl <- GRangesList(gene1=exons_gr[1],gene2=exons_gr[c(2,3)])
## Extract lengths of the query:
extractRegionLength(x_gr)
## Extract length of the exon overlapped by the query:
extractRegionLength(x_gr, exons_gr)
## Extract exonic length of the genes overlapped by the query:
extractRegionLength(x_gr, genes_grl)
## Exract self defined property on exons overlapped by the query:
exons_property <- c(1,6,8)
extractRegionProperty(x_gr, exons_gr, exons_property)
## ---------------------------------------------------------------------
## MORE FEATURE EXTRACTORS
## ---------------------------------------------------------------------
## Quantifying clustering effect with annotation y:
## Self clustering
extractRegionYCount(x_gr, x_gr)
## Self clustering on overlapping exons/genes
extractRegionYCount(x_gr, x_gr, exons_gr)
extractRegionYCount(x_gr, x_gr, genes_grl)
## Clustering with another annotation:
y_gr <- GRanges(rep(c("chr1", "chr2"), c(50, 50)),
IRanges(c(sample(11874:12127, 50),
sample(38814:41527,50)), width=1),
strand=Rle(c("+", "-"), c(50, 50)))
extractRegionYCount(x_gr, y_gr)
extractRegionYCount(x_gr, y_gr, exons_gr)
extractRegionYCount(x_gr, y_gr, genes_grl)
extractRegionNearestDistToY(x_gr, y_gr)
## Relative position on exons/genes:
extractRegionRelativePosition(x_gr, exons_gr)
extractRegionRelativePosition(x_gr, genes_grl)
## Distance to 5'end/3'end of exons/genes:
extractDistToRegion5end(x_gr, exons_gr)
extractDistToRegion5end(x_gr, genes_grl)
extractDistToRegion3end(x_gr, exons_gr)
extractDistToRegion3end(x_gr, genes_grl)
## Extract features that depends on sequence/annotation packages:
## Not run:
library(BSgenome.Hsapiens.UCSC.hg19)
bsgenome <- BSgenome.Hsapiens.UCSC.hg19
## GC content of the query:
extractRegionLetterFrequency(x_gr, bsgenome, letters="GC")
## GC contents of exons/genes containing the query:
extractRegionLetterFrequency(x_gr, bsgenome, exons_gr, letters="GC")
extractRegionLetterFrequency(x_gr, bsgenome, genes_grl, letters="GC")
library(phastCons100way.UCSC.hg19)
gscores <- phastCons100way.UCSC.hg19
## PhastCons scores of the query:
extractRegionScores(x_gr, gscores)
## PhastCons scores of exons/genes containing the query:
extractRegionScores(x_gr, gscores, exons_gr)
extractRegionScores(x_gr, gscores, genes_grl)
## End(Not run)
## ---------------------------------------------------------------------
## VISUALIZE FEATURE IN PREDICTION MODEL
## ---------------------------------------------------------------------
## Load the GRanges of the m6A miCLIP dataset prepared for the classification model:
GSE63753_abcam <- readRDS(system.file("extdata", "GSE63753_abcam.rds", package = "WhistleR"))
## The metadata column of the GRanges is a 0/1 vector, 1 for the positive m6A site, 0 is the negative DRACH:
GSE63753_abcam
table(GSE63753_abcam$target)
## Extract exon length overlapped by the DRACH sites:
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
GSE63753_abcam$exon_length <- extractRegionLength(GSE63753_abcam, exons(TxDb.Hsapiens.UCSC.hg19.knownGene))
## Plot the logistic regression fit with cubic splines:
library(ggplot2)
ggplot(na.omit(as.data.frame(mcols(GSE63753_abcam))), aes(log(exon_length), target)) +
geom_smooth(formula = y ~ splines::ns(x, 3), method = "glm", method.args = list(family = "binomial")) +
geom_hline(yintercept = 0.5, linetype = 3) +
scale_x_continuous(limits = c(4,9)) +
scale_y_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1)) +
theme_classic() + labs(x = "log(length of exon)", y = "prob of m6A = 1", title = "LR fit with cubic splines")
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.