R/bintable_functions.R

Defines functions return_chrominfo_df Split_genomic_coordinates get_chrom_info CheckContinuousRanges Validate_table Read_bintable ._ColIndexError_

._ColIndexError_ <- function(x){
    Error.col <- c("chr","start",NA,NA,NA,"more")
    ColClasses <- list(is.character,is.numeric,is.numeric,
        is.character,is.character)
    ColNames <- c("chr","start","end","strand","names")
    Index.type.error <- c("index is missing.")
    Norm.x <- (x - min(x))+1
    Len.x <- length(Norm.x)

    if(Len.x < 3 | Len.x > 5){
        if(Len.x > 5){
            Index.type.error <- "indices were provided."
        }
        stop(paste("col.index expects as bare minimum chr,start,end.",
            Error.col[x],Index.type.error,"\n"))
    }
    Alist <- list("Names" = ColNames[Norm.x], "Classes" = ColClasses[Norm.x])
    return(Alist)
}
Read_bintable = function(Filename = NULL, read.delim = " ", 
    col.index = c(1,2,3), impose.discontinuity=TRUE){
    ColMetrics <- ._ColIndexError_(col.index)
    Colnames<-ColMetrics[["Names"]]
    ColClasses<- ColMetrics[["Classes"]]
    
    Table <- Filename
    if(is.character(Filename)){
        Table <- fread(file = Filename, sep=read.delim, stringsAsFactors=FALSE, 
            verbose=FALSE, showProgress=FALSE, data.table=FALSE)
    }
    colnames(Table) <- Colnames
    Ranges.table <- Table[,col.index]
    is.stranded <- col.index[4] != NA
    has.names <- col.index[5] != NA
    Validate_table(Table=Table, colClasses=ColClasses, colnames = Colnames, 
        col.index=col.index)
    if(impose.discontinuity){
        CheckContinuousRanges(Table=Table,StartCol=c("start"),EndCol=c("end"))
    }
    # Ranges.table <- Table[order(Table[,'chr'],Table[,'start']),]
    Table.list <- list('main.tab' = Ranges.table, 'stranded' = is.stranded, 
        'named' = has.names)
    return(Table.list)
}
Validate_table = function(Table = NULL, colnames = NULL, colClasses = NULL, 
    col.index = NULL) {
    for (i in seq_len(length(colnames))) {
        if(!colClasses[[i]](Table[,i])){
            stop(paste("Values expected for",colnames[i],"at col",col.index[i],"
                found values of class",class(Table[,i])))
        }
    }
    if(any(!(Table[,'start'] %% 1 == 0)) | any(!(Table[,'end'] %% 1 == 0))) {
        stop("Genomic coordinates at col",col.index[2],"and",col.index[3],
            "cannot have float values")
    }
    if( any( Table[,'start'] > Table[,'end'] ) ){
        stop("start coordinates cannot be greater than end coordinates")
    }
    # if( is.unsorted(Table[,'chr']) ){
    #     stop("Table must be sorted by chromosome!")
    # }
}
CheckContinuousRanges = function(Table = NULL, StartCol = NULL, EndCol = NULL){
    Starts<-Table[,StartCol]
    Starts<-Starts[seq_len(length(Starts))[-1]]
    End<-Table[,EndCol]
    End<-End[seq_len(length(End)-1)]
    if( any(Starts==End) ){
        stop("Found continuous ranges in file! Cannot proceed further!
            Use impose.discontinuity = FALSE to load continuous ranges.")
    }
}
get_chrom_info <- function(bin.table = NULL, chrom = NULL, FUN = NULL, 
    col.name = NULL){
    if(is.null(chrom)){
        chrom <- unique(bin.table[,"chr"])
    }
    Info <- sapply(chrom,function(x){
        FUN(bin.table[bin.table[,'chr']==x,col.name])
    })
    names(Info) <- chrom
    return(Info)
}
Split_genomic_coordinates = function(Coordinate = NULL){
    Reference.object <- GenomicMatrix$new()
    Sep <- Reference.object$Ranges.separator
    Coord.Split<-stringr::str_split(pattern=Sep,string=Coordinate)
    if(length(Coord.Split[[1]])!=3 | length(Coord.Split[[1]])!=3){
        stop("Coordinate must be separated by :")
    }
    Chrom<-Coord.Split[[1]][1]
    start<-as.numeric(Coord.Split[[1]][2])
    stop<-as.numeric(Coord.Split[[1]][3])
    if(any(!is.character(Chrom) | !is.numeric(start) | 
        !is.numeric(stop))){
        stop("Provided chromosome,start,end do not match expected ",
            "class definitions of character, numeric, numeric")
    }
    return(Coord.Split)
}

return_chrominfo_df <- function(bintable_df, chromosomes){
    Chrom.lengths <- get_chrom_info(bin.table = bintable_df,
        chrom = chromosomes, FUN = length, col.name = 'chr')
    Chrom.sizes <- get_chrom_info(bin.table = bintable_df,
        chrom = chromosomes, FUN = max, col.name = 'end')
    Chrom.info.df <- data.frame(chr = names(Chrom.lengths),
        nrow = as.vector(Chrom.lengths),
        size = as.vector(Chrom.sizes), stringsAsFactors = FALSE)
    return(Chrom.info.df)
}
koustav-pal/HiCBricks documentation built on Oct. 25, 2022, 12:06 a.m.