Nothing
# We define two classes for genome intervals. The basic class extends Interval_full with:
# a seq_name factor to represent chromosomes and more generally any sequence origin. The second also stores strand orientation.
# a logical vector "inter_base" (to flag zero-length features such as restriction sites)
######## Class definitions
#### Genome_intervals
# extends Intervals_full of intervals package with a seqName factor
setClass(
"Genome_intervals",
representation = representation( annotation = "data.frame" ),
prototype ( type ="Z"),
contains = "Intervals_full",
validity = function( object ) {
# check 'annotation' data.frame
if(!is.data.frame(object@annotation) || nrow( object@.Data ) != nrow( object@annotation ) )
return("The 'annotation' slot must be a data.frame with as many rows as the endpoint matrix.")
# Check 'seq_name' column within annotation data.frame slot
if ( !('seq_name' %in% names(object@annotation)) || !is.factor( object@annotation$seq_name ) || any(is.na(object@annotation$seq_name)) )
return( "The 'annotation' slot should have a column named 'seq_name' that is a factor and does not contain missing values." )
# Check 'inter_base' column within annotation data.frame slot
if ( !('inter_base' %in% names(object@annotation)) || !is.logical( object@annotation$inter_base ) || any(is.na(object@annotation$inter_base)) )
return( "The 'annotation' slot should have a column named 'inter_base' that is logical and does not contain missing values." )
if( object@type!='Z')
return( "The intervals 'type' should be 'Z'." )
return( TRUE )
}
)
#### Genome_intervals_stranded
setClass(
"Genome_intervals_stranded",
contains = "Genome_intervals",
validity = function( object ) {
# Check 'strand' column within annotation data.frame slot
if ( !('strand' %in% names(object@annotation)) || !is.factor( object@annotation$strand ) )
return( "The 'annotation' slot should have column named 'strand' that is a factor." )
if ( !nlevels(object@annotation$strand)==2 )
return( "The 'strand' slot should be a factor with exactly two levels." )
return( TRUE )
}
)
######## Accessors and replacement methods
#### annotation
## implements the generics of BiocGenerics
setMethod(
f="annotation",
signature="Genome_intervals",
definition= function(object){
object@annotation
})
setReplaceMethod(
f="annotation",
signature="Genome_intervals",
definition=function( object, value ) {
object@annotation <- value
return(object)
})
#### inter_base
setGeneric( "inter_base", function(x) standardGeneric( "inter_base" ) )
setMethod(
"inter_base",
signature( "Genome_intervals" ),
function( x ) x@annotation$inter_base
)
setGeneric( "inter_base<-", function( x, value ) standardGeneric( "inter_base<-" ) )
setReplaceMethod(
"inter_base", "Genome_intervals",
function( x, value ) {
if(!( length( value ) %in% c(1,nrow(x)) ) | !is.logical(value) )
stop( "The 'inter_base' argument should be a logical vector length equal to 1 or to the number of rows of the end point matrix." )
if(length(value)==1) value = rep(value, nrow(x))
x@annotation$inter_base <- value
return(x)
}
)
#### seq_name
setGeneric( "seq_name", function(x) standardGeneric( "seq_name" ) )
setMethod(
"seq_name",
signature( "Genome_intervals" ),
function( x ){
.Deprecated(new = "seqnames")
seqnames(x)
}
)
setGeneric( "seq_name<-", function( x, value ) standardGeneric( "seq_name<-" ) )
setReplaceMethod(
"seq_name", "Genome_intervals",
function( x, value ) {
.Deprecated(new = "seqnames<-")
return(seqnames(x)<-value)
}
)
#### seqnames
# TODO ask for seqnames to be BiocGeneric
setMethod(
"seqnames",
signature( "Genome_intervals" ),
function( x ){
x@annotation$seq_name
}
)
setReplaceMethod(
f="seqnames", "Genome_intervals",
function( x, value ) {
if ( is.vector( value ) )
value = factor(value)
if(!( length( value ) %in% c(1,nrow(x)) ) )
stop( "The 'seqnames' argument should be a vector or a factor of length equal to 1 or to the number of rows of the end point matrix." )
if(length(value)==1) value = rep(value, nrow(x))
x@annotation$seq_name <- value
return(x)
}
)
#### strand
## setGeneric( "strand", function(x) standardGeneric( "strand" ) )
setMethod(
"strand",
signature( "Genome_intervals_stranded" ),
function( x ) x@annotation$strand
)
## setGeneric( "strand<-", function( x, value ) standardGeneric( "strand<-" ) )
setReplaceMethod(
"strand", "Genome_intervals_stranded",
function( x, value ) {
if ( is.vector( value ) )
value = factor(value)
if(nlevels(value)!=2)
stop( "The 'strand' argument should be a vector with exactly 2 distinct values or a factor with 2 levels." )
if(!( length( value ) %in% c(1,nrow(x)) ) )
stop( "The 'strand' argument should be a vector or a factor of length equal to 1 or to the number of rows of the end point matrix." )
if(length(value)==1) value = rep(value, nrow(x))
x@annotation$strand <- value
return(x)
}
)
#### type
setReplaceMethod(
"type", "Genome_intervals",
function( x, value ) {
if ( length( value ) != 1 || value!= "Z" )
stop( "The 'type' slot should be 'Z'." )
x@type <- value
return(x)
}
)
######## Subsetting
setMethod(
"[",
signature( "Genome_intervals" ),
function( x, i, j, ..., drop ) {
if ( missing(i) ) i <- rep( TRUE, nrow(x) )
if ( missing(j) ) {
# Preserve class. Note that both [i,] and [i] syntax subset rows.
if ( is.character(i) ) i <- match( i, rownames( x ) )
x@annotation <- x@annotation[i,,drop=FALSE]
}
callNextMethod( x, i, j, ..., drop )
}
)
setMethod(
"[<-",
signature( x = "Genome_intervals", i = "ANY", j = "missing", value = "Genome_intervals" ),
function( x, i, j, value ) {
#### Error checking
if ( is.character(i) ) i <- match( i, rownames( x ) )
if ( any( is.na( i ) ) )
stop( "Cannot assign to NA indices or row names which do not exist." )
n <- length( (1:nrow(x))[i] )
if ( n != nrow( value ) )
stop( "Replacement object is of the wrong size." )
if( length(annotation(value)) != length(annotation(x)) )
stop("Number of columns of annotation slots do not match. Check if you're assigning a Genome_intervals_stranded into rows of a Genome_intervals or vice-versa.")
if( any( names(annotation(value)) != names(annotation(x)) ) )
stop("Names of annotation do not match. Check if you're assigning a Genome_intervals_stranded into rows of a Genome_intervals or vice-versa.")
#### Intervals
x@.Data[i,] <- value@.Data
x@closed[i,] <- value@closed
## Annotation
annotation(x)[i,] <- annotation(value)
#### Rownames
has_names_x <- !is.null( rownames(x) )
has_names_value <- !is.null( rownames(value) )
if ( has_names_x ) {
if ( has_names_value ) rownames(x)[i] <- rownames(value)
else rownames(x)[i] <- ""
}
else {
if ( has_names_value ) {
rownames(x) <- rep( "", nrow(x) )
rownames(x)[i] <- rownames(value)
}
}
return(x)
}
)
setMethod("$", "Genome_intervals", function(x, name) {
eval(substitute(annotation(x)$NAME_ARG, list(NAME_ARG=name)))
})
setReplaceMethod("$", "Genome_intervals", function(x, name, value) {
x[[name]] <- value
x
})
setMethod("[[", "Genome_intervals", function(x, i, j, ...) annotation(x)[[i]] )
setReplaceMethod("[[",
signature=signature(x="Genome_intervals"),
function(x, i, j, ..., value) {
annotation(x)[[i]] <- value
x
})
######## Coercion
setMethod(
"coerce",
signature( from = "Genome_intervals", to = "Intervals_full" ),
function( from, to, strict ) {
new(
"Intervals_full",
from@.Data,
type = type( from ),
closed = closed(from)
)
}
)
setMethod(
"coerce",
signature( from = "Genome_intervals", to = "character" ),
function( from, to, strict ) {
if ( nrow( from ) == 0 )
return( character() )
else {
# call to Intervals coercion method for the intervals
ints <- as( as(from, "Intervals_full"), "character")
# add seq_name in first column, inter_base in last column
result <- paste(seqnames(from), ints, ifelse( inter_base(from), "*", ""))
return( result )
}
}
)
setMethod(
"coerce",
signature( from = "Genome_intervals_stranded", to = "character" ),
function( from, to, strict ) {
if ( nrow( from ) == 0 )
return( character() )
else {
# call to Intervals coercion method for the intervals
ints <- as( as(from, "Intervals_full"), "character")
# add seq_name and strand in first columns, inter_base in last column
result <- paste(seqnames(from), strand(from), ints, ifelse( inter_base(from), "*", "") )
return( result )
}
}
)
## more user-friendly constructor function:
"GenomeIntervals" <- function(chromosome="",
start=0, end=0,
strand=NULL,
inter.base=NULL, leftOpen=NULL,
rightOpen=NULL, ...)
{
stopifnot(length(chromosome)==length(start),
length(chromosome)==length(end))
## any inter-base intervals specified
if (is.null(inter.base)){
inter.base <- vector("logical", length(chromosome))
} else {
stopifnot(is.logical(inter.base), length(inter.base)==length(chromosome))
}
## by default, all intervals are assumed to be closed,
## unless specified otherwise:
if (!is.null(leftOpen)){
stopifnot(is.logical(leftOpen), length(leftOpen)==length(chromosome))
} else {
leftOpen <- vector("logical", length(chromosome))
}
if (!is.null(rightOpen)){
stopifnot(is.logical(rightOpen), length(rightOpen)==length(chromosome))
} else {
rightOpen <- vector("logical", length(chromosome))
}
## prepare annotation data.frame for object:
annoDf <- data.frame("seq_name"=factor(chromosome),
"inter_base"=inter.base)
## additional annotation columns?
further.args <- as.list(match.call(expand.dots=FALSE)[["..."]])
if (length(further.args)>0){
for (i in 1:length(further.args))
annoDf[[names(further.args)[i]]] <- eval(further.args[[i]])
}
## create object depending on whether or not the strand was specified
if (!is.null(strand)){
## create object of class "Genome_intervals_stranded"
stopifnot(length(strand)==length(chromosome),
all(strand %in% c("+", "-")))
annoDf$strand <- factor(strand, levels=c("+", "-"))
obj <- new("Genome_intervals_stranded",
cbind(start, end),
closed=cbind(!leftOpen, !rightOpen),
annotation=annoDf)
} else {
### non-stranded intervals
obj <- new("Genome_intervals",
cbind(start, end),
closed=cbind(!leftOpen, !rightOpen),
annotation=annoDf)
}
stopifnot(validObject(obj))
return(obj)
}# GenomeIntervals
## coercion to data.frame
setAs("Genome_intervals","data.frame",function(from){
## change depending on whether we have a gff3 or not
gff3 <- ifelse(ncol(annotation(from))==8,
all(colnames(annotation(from)) == c("seq_name", "strand", "inter_base", "source", "type", "score", "phase", "gffAttributes")),
FALSE)
## create the df
if(gff3){
df <- cbind(annotation(from),from[,1:2])[,c(1,4,5,9,10,6,2,7,8)]
names(df) <- c("seqname","source","feature","start","end","score","strand","frame","attribute")
} else {
df <- cbind(annotation(from),from[,1:2])[,-grep("inter_base",colnames(annotation(from)))]
}
## convert factors to characters
df[,sapply(df,is.factor)] <- apply(df[,sapply(df,is.factor),drop=FALSE],2,as.character)
## to avoid introducing warnings when replacing NA values by dots
df[is.na(df)] <- "."
return(df)
})
## extract the width as for GRanges
setMethod(f = "width",
signature = "Genome_intervals",
definition=function(x){
# the width
x[,2] - x[,1] +
# right end (we add one if right closed)
x@closed[,2] -
# left end (we subtract one if left open)
!x@closed[,1]
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.