R/BED2GRangesSeq.R

BED2GRangesSeq <- function (data.BED, header=FALSE, upstream.seq.ind=7, 
                               downstream.seq.ind=8, withSeq) 
{
    if (missing(data.BED) || (class(data.BED) != "data.frame") || 
            dim(data.BED)[2] < 3) {
        stop("No valid data.BED passed in, which is a data frame as BED format 
             file with at least 3 fields in the order of: chromosome, start and 
             end. Optional fields are name, score and strand etc. Please refer 
             to http://genome.ucsc.edu/FAQ/FAQformat#format1 for details.")
    }
    if (header == TRUE) {
        myPeak <- data.BED[-1, ]
    }
    else {
        myPeak <- data.BED
    }
    if (dim(myPeak)[2] >= 4) {
        names <- as.character(myPeak[, 4])
        if (length(unique(names)) == 1) {
            names <- formatC(1:dim(myPeak)[1], width=nchar(dim(myPeak)[1]), 
                            flag="0")
        }
    }
    else {
        names <- formatC(1:dim(myPeak)[1], width=nchar(dim(myPeak)[1]), 
                        flag="0")
    }
    if (dim(myPeak)[2] >= 5) {
        score <- as.numeric(as.character(myPeak[, 5]))
    }
    else {
        score <- rep(1, dim(myPeak)[1])
    }
    if (dim(myPeak)[2] >= 6) {
        strand <- as.character(myPeak[, 6])
        strand[strand == -1] <- "-"
        strand[strand == 1] <- "+"
    }
    else {
        strand <- rep(1, dim(myPeak)[1])
    }
    if(withSeq)
    {
        upstream.seq <- as.character(myPeak[,upstream.seq.ind])
        downstream.seq <- as.character(myPeak[,downstream.seq.ind])
        GRanges(seqnames=sub("chr", "", as.character(myPeak[, 1])), 
                ranges=IRanges(start=as.numeric(as.character(myPeak[, 2])), 
                               end=as.numeric(as.character(myPeak[, 3])), 
                               names=names), 
                strand=strand,
                score=score, 
                upstream.seq=upstream.seq, 
                downstream.seq=downstream.seq
                )
    }
    else
    {
        GRanges(seqnames=sub("chr", "", as.character(myPeak[, 1])), 
                ranges=IRanges(start=as.numeric(as.character(myPeak[, 2])), 
                               end=as.numeric(as.character(myPeak[, 3])), 
                               names=names), 
                strand=strand,
                score=score
        )
    }
}

Try the cleanUpdTSeq package in your browser

Any scripts or data that you put into this service are public.

cleanUpdTSeq documentation built on Nov. 8, 2020, 8:30 p.m.