R/collapseBins.R
In chromstaR: Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data

Documented in collapseBins

#' Collapse consecutive bins
#'
#' The function will collapse consecutive bins which have, for example, the same combinatorial state.
#'
#' The following tables illustrate the principle of the collapsing:
#'
#' Input data:
#' \tabular{rrrrrr}{
#' seqnames \tab start \tab end \tab column2collapseBy \tab moreColumns \tab columns2sumUp \cr
#' chr1     \tab     0 \tab 199 \tab                 2 \tab        1 10 \tab           1 3 \cr
#' chr1     \tab   200 \tab 399 \tab                 2 \tab        2 11 \tab           0 3 \cr
#' chr1     \tab   400 \tab 599 \tab                 2 \tab        3 12 \tab           1 3 \cr
#' chr1     \tab   600 \tab 799 \tab                 1 \tab        4 13 \tab           0 3 \cr
#' chr1     \tab   800 \tab 999 \tab                 1 \tab        5 14 \tab           1 3 \cr
#' }
#' Output data:
#' \tabular{rrrrrr}{
#' seqnames \tab start \tab end \tab column2collapseBy \tab moreColumns \tab columns2sumUp \cr
#' chr1     \tab     0 \tab 599 \tab                 2 \tab        1 10 \tab           2 9 \cr
#' chr1     \tab   600 \tab 999 \tab                 1 \tab        4 13 \tab           1 6 \cr
#' }
#' 
#' @param data A data.frame containing the genomic coordinates in the first three columns.
#' @param column2collapseBy The number of the column which will be used to collapse all other inputs. If a set of consecutive bins has the same value in this column, they will be aggregated into one bin with adjusted genomic coordinates.
#' @param columns2sumUp Column numbers that will be summed during the aggregation process.
#' @param columns2average Column numbers that will be averaged during the aggregation process.
#' @param columns2getMax Column numbers where the maximum will be chosen during the aggregation process.
#' @param columns2drop Column numbers that will be dropped after the aggregation process.
#' @return A data.frame.
#' @author Aaron Taudt
#' @export
#' @examples
#'## Load example data
#'## Get an example multiHMM
#'file <- system.file("data","multivariate_mode-combinatorial_condition-SHR.RData",
#'                     package="chromstaR")
#'model <- get(load(file))
#'df <- as.data.frame(model$bins)
#'shortdf <- collapseBins(df, column2collapseBy='state', columns2sumUp='width', columns2average=6:9)
#'
collapseBins = function(data, column2collapseBy=NULL, columns2sumUp=NULL, columns2average=NULL, columns2getMax=NULL, columns2drop=NULL) {

    ## Name to index
    if (is.character(column2collapseBy)) {
        column2collapseBy <- which(column2collapseBy == names(data))
    }
    if (is.character(columns2sumUp)) {
        columns2sumUp <- unlist(lapply(columns2sumUp, function(x) { which(x == names(data)) }))
    }
    if (is.character(columns2average)) {
        columns2average <- unlist(lapply(columns2average, function(x) { which(x == names(data)) }))
    }
    if (is.character(columns2getMax)) {
        columns2getMax <- unlist(lapply(columns2getMax, function(x) { which(x == names(data)) }))
    }
    if (is.character(columns2drop)) {
        columns2drop <- unlist(lapply(columns2drop, function(x) { which(x == names(data)) }))
    }
    ## Indices
    ind_coords <- 1:3
    ind_morecols <- setdiff(1:ncol(data), c(ind_coords, columns2sumUp, columns2average, columns2getMax, columns2drop))
    ind_sumcols <- columns2sumUp
    ind_meancols <- columns2average
    ind_maxcols <- columns2getMax

    ## Make the comparison vector
    ptm <- startTimedMessage('Making comparison vector ...')
    if (is.null(column2collapseBy)) {
        c <- data$start
        cShift1 <- rep(NA,length(c))
        cShift1[2:length(cShift1)] <- data$end[-length(c)] + 1
    } else {
        if (is(data[,column2collapseBy], "factor")) {
            c <- as.integer(data[,column2collapseBy])
        } else {
            c <- data[,column2collapseBy]
        }
        cShift1 <- rep(NA,length(c))
        cShift1[-1] <- c[-length(c)]
    }
    compare_custom <- c != cShift1
    ## Make the comparison vector to separate chromosomes
    c <- as.integer(data[,1])
    cShift1 <- rep(NA,length(c))
    cShift1[-1] <- c[-length(c)]
    compare_chrom <- c != cShift1
    ## Combine the vectors
    compare <- compare_custom | compare_chrom
    compare[1] <- TRUE
    numcollapsedbins <- length(which(compare==TRUE))
    numbins <- nrow(data)
    stopTimedMessage(ptm)
    if (any(is.na(compare))) {
        stop("NAs in vector 'compare'")
    }

    ## Select the collapsed rows
    ptm <- startTimedMessage('Selecting rows ...')
    collapsed.bins <- list()
    collapsed.bins[[names(data)[1]]] <- data[which(compare),1] #which to remove NAs which shouldn't be there in the first place
    collapsed.bins[[names(data)[2]]] <- data[which(compare),2]
    collapsed.bins[[names(data)[3]]] <- data[c((which(compare)-1)[-1],numbins), 3]
    if (length(ind_morecols)==1) {
        collapsed.bins[[names(data)[ind_morecols]]] <- data[which(compare), ind_morecols]
    } else if (length(ind_morecols)>1) {
        lcb <- length(collapsed.bins)
        lmc <- length(ind_morecols)
        collapsed.bins[(lcb+1):(lcb+lmc)] <- data[which(compare), ind_morecols]
        names(collapsed.bins)[(lcb+1):(lcb+lmc)] <- names(data)[ind_morecols]
    }
    stopTimedMessage(ptm)

    ## Sum up columns
    xfuns <- list(sum, mean, max)
    xstrings <- list('sum', 'mean', 'max')
    columns2xs <- list(columns2sumUp, columns2average, columns2getMax)
    inds_xcols <- list(ind_sumcols, ind_meancols, ind_maxcols)
    for (ix in 1:length(xfuns)) {
        xfun <- xfuns[[ix]]
        xstring <- xstrings[[ix]]
        columns2x <- columns2xs[[ix]]
        ind_xcols <- inds_xcols[[ix]]
        if (!is.null(columns2x)) {
            ptm <- startTimedMessage('Calculating ',xstring,' ...')
            xcols <- as.matrix(data[,columns2x])
            collapsed.xcols <- matrix(NA, nrow=numcollapsedbins, ncol=length(columns2x))
            icount <- 1
            i1_lasttrue <- 1
            for (i1 in 1:length(compare)) {
                if (compare[i1]==TRUE) {
                    if (length(columns2x)==1) {
                        collapsed.xcols[icount-1] <- xfun(xcols[i1_lasttrue:(i1-1),])
                    } else if (length(columns2x) > 1) {
                        if (i1_lasttrue==i1-1 | i1==1) {
                            collapsed.xcols[icount-1,] <- as.numeric(xcols[i1_lasttrue,])
                        } else {
                            collapsed.xcols[icount-1,] <- apply(xcols[i1_lasttrue:(i1-1),],2,xfun)
                        }
                    }
                    icount <- icount+1
                    i1_lasttrue <- i1
                }
            }
            i1 = i1+1
            if (length(columns2x)==1) {
                collapsed.xcols[icount-1] <- xfun(xcols[i1_lasttrue:(i1-1),])
            } else if (length(columns2x) > 1) {
                if (i1_lasttrue==i1-1 | i1==1) {
                    collapsed.xcols[icount-1,] <- as.numeric(xcols[i1_lasttrue,])
                } else {
                    collapsed.xcols[icount-1,] <- apply(xcols[i1_lasttrue:(i1-1),],2,xfun)
                }
            }
            if (length(ind_xcols) > 0) {
                lcb <- length(collapsed.bins)
                lsc <- length(ind_xcols)
                collapsed.bins[(lcb+1):(lcb+lsc)] <- as.data.frame(collapsed.xcols)
                names(collapsed.bins)[(lcb+1):(lcb+lsc)] <- paste(xstring, names(data)[ind_xcols], sep='.')
            }
            stopTimedMessage(ptm)
        }
    }

    return(as.data.frame(collapsed.bins))

}

Any scripts or data that you put into this service are public.

chromstaR documentation built on Nov. 8, 2020, 8:29 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

chromstaR
Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data

R/collapseBins.R
In chromstaR: Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data

Defines functions collapseBins

Documented in collapseBins

Try the chromstaR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

chromstaR Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data

R/collapseBins.R In chromstaR: Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data

Defines functions collapseBins

Documented in collapseBins

Try the chromstaR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

chromstaR
Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data

R/collapseBins.R
In chromstaR: Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data