setClass("GenomeRepeatTrack",
slots = list(
genome = "character",
repInstances = "GRangesList"
),
package = "epiRepeatR"
)
setMethod("initialize", "GenomeRepeatTrack",
function(.Object,
genome,
trackFile=NULL
){
columnMatching <- c(
chrom = "genoName",
start = "genoStart",
end = "genoEnd",
strand = "strand",
repName = "repName",
repClass = "repClass",
repFamily = "repFamily"
)
if (is.null(trackFile)){
require(rtracklayer)
session <- browserSession()
genome(session) <- genome
tt <- getTable(ucscTableQuery(session, track="RepeatMasker", table="rmsk"))
} else {
tt <- read.table(trackFile, sep="\t", header=TRUE, stringsAsFactors=FALSE, comment.char="")
}
missingCols <- !(columnMatching %in% colnames(tt))
if (any(missingCols)){
stop(paste0("The following columns were not found in the repeat track annotation", paste(columnMatching[missingCols], collapse=",")))
}
repeatTrackGr <- muRtools::df2granges(tt, ids=NULL, chrom.col=columnMatching["chrom"], start.col=columnMatching["start"], end.col=columnMatching["end"], strand.col=columnMatching["strand"], coord.format="B1RI", assembly=genome, doSort=TRUE, adjNumChromNames=TRUE)
repeatTrackGr <- muRtools::setGenomeProps(repeatTrackGr, genome, onlyMainChrs=TRUE, dropUnknownChrs=TRUE) #drop chromosomes except for the main ones
reps <- elementMetadata(repeatTrackGr)[,"repName"]
repNames <- sort(unique(reps))
repInstances <- lapply(repNames, FUN=function(rn){
repeatTrackGr[reps==rn]
})
names(repInstances) <- repNames
repInstances <- GRangesList(repInstances)
.Object@genome <- genome
.Object@repInstances <- repInstances
.Object
}
)
GenomeRepeatTrack <- function(genome, trackFile=NULL){
object <- new("GenomeRepeatTrack",
genome, trackFile
)
return(object)
}
if (!isGeneric("getRepeatInstances")) setGeneric("getRepeatInstances", function(.obj) standardGeneric("getRepeatInstances"))
setMethod("getRepeatInstances", signature(.obj="GenomeRepeatTrack"),
function(.obj){
return(.obj@repInstances)
}
)
if (!isGeneric("getRepeatGenomeCovg")) setGeneric("getRepeatGenomeCovg", function(.obj) standardGeneric("getRepeatGenomeCovg"))
setMethod("getRepeatGenomeCovg", signature(.obj="GenomeRepeatTrack"),
function(.obj){
covgBases <- sapply(.obj@repInstances, FUN=function(x){
sum(width(reduce(x, min.gapwidth=0L, ignore.strand=TRUE)))
})
names(covgBases) <- names(.obj@repInstances)
# genomeLength <- sum(seqlengths(.obj@repInstances))
# covgBases <- covgBases/genomeLength
return(covgBases)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.