R/util.R
In recoup: An R package for the creation of complex genomic profile plots

Documented in getr kmeansDesign mergeRuns removeData setr sliceObj

#splitBySeqname <- function(gr,rc=NULL) {
#    #message("Splitting input regions by seqname...")
#    grList <- cmclapply(levels(seqnames(gr)),function(x,lib) {
#        #message("  ",x)
#        tmp <- lib[seqnames(lib)==x]
#        if (length(tmp)>0) return(tmp) else return(NULL)
#    },gr,rc=rc)
#    names(grList) <- levels(seqnames(gr))
#    null <- which(sapply(grList,is.null))
#    if (length(null)>0)
#        grList <- grList[-null]
#    return(grList)
#}

contVector <- function(x,size=NULL,flank=NULL,where=NULL) {
    if (!is.null(flank)) {
        switch(where,
            upstream = {
                size <- flank[1]
                if (is(x,"Rle") && !is.na(runValue(x)))
                    x <- x[1:flank[1]]
                else
                    return(rep(0,size))
            },
            downstream = {
                size <- flank[2]
                if (is(x,"Rle") && !is.na(runValue(x)))
                    x <- x[(length(x)-flank[2]+1):length(x)]
                else
                    return(rep(0,size))
            }
        )
    }
    else {
        if (!is(x,"Rle") || all(is.na(runValue(x))))
            return(rep(0,size))
    }
    return(as.numeric(x))
}

splitVector <- function(x,n,flank,where,interp,stat) {
    isRle <- ifelse(is(x,"Rle"),TRUE,FALSE)
    if (!is.null(flank)) {
        switch(where,
            center = {
                if (is(x,"Rle") && all(!is.na(runValue(x))))
                    x <- x[(flank[1]+1):(length(x)-flank[2])]
                else
                    return(rep(0,n))
            },
            upstream = {
                if (is(x,"Rle") && all(!is.na(runValue(x))))
                    x <- x[1:flank[1]]
                else
                    return(rep(0,n))
            },
            downstream = {
                if (is(x,"Rle") && all(!is.na(runValue(x))))
                    x <- x[(length(x)-flank[2]+1):length(x)]
                else
                    return(rep(0,n))
            }
        )
    }
    else {
        if (!is(x,"Rle") || all(is.na(runValue(x))))
            return(rep(0,n))
    }
    if (length(x)<n) {
        if (isRle)
            x <- as.numeric(x)
        x <- interpolateSignal(x,n,interp)
        if (isRle)
            x <- Rle(x)
    }
    binSize <- floor(length(x)/n)
    dif <- length(x) - binSize*n 
    binFac <- rep(binSize,n)
    # Random bin increase size to avoid problems
    add <- sample(1:n,dif)
    binFac[add] <- binFac[add]+1
    f <- factor(rep(1:n,binFac))
    #S <- split(x,f)
    #return(llply(S,stat))
    S <- split(1:length(x),f)
    s <- vapply(S,function(x) x[1],integer(1))
    return(aggregate(x,FUN=stat,start=s,width=binFac))
}

interpolateSignal <- function(x,n,interp) {
    switch(interp,
        auto = {
            d <- (n-length(x))/n
            if (d < 0.2) { # Then quite safe for neighborhood method
                y <- rep(NA,n)
                y[1:2] <- x[1:2]
                y[(n-1):n] <- x[(length(x)-1):length(x)]
                orig.pos <- sort(sample(3:(n-2),length(x)-4))
                y[orig.pos] <- x[3:(length(x)-2)]
                na <- which(is.na(y))
                avinds <- lapply(na,function(z) {
                    return(c(z-2,z-1,z+1,z+2))
                })
                xx <- unlist(lapply(avinds,function(ii,yy) {
                    return(mean(yy[ii],na.rm=TRUE))
                },y))
                y[na] <- xx
                x <- y
            }
            else { # Spline is the safest
                x <- spline(x,n=n)$y
                x[x<0] <- 0
            }
        },
        spline = {
            x <- spline(x,n=n)$y
            x[x<0] <- 0
        },
        linear = {
            x <- approx(x,n=n)$y
            x[x<0] <- 0
        },
        neighborhood = {
            y <- rep(NA,n)
            y[1:2] <- x[1:2]
            y[(n-1):n] <- x[(length(x)-1):length(x)]
            orig.pos <- sort(sample(3:(n-2),length(x)-4))
            y[orig.pos] <- x[3:(length(x)-2)]
            na <- which(is.na(y))
            avinds <- lapply(na,function(z) {
                return(c(z-2,z-1,z+1,z+2))
            })
            xx <- unlist(lapply(avinds,function(ii,yy) {
                return(mean(yy[ii],na.rm=TRUE))
            },y))
            y[na] <- xx
            x <- y
        }
    )
    return(x)
}

covLengthsEqual <- function(rl) {
    if (is(rl,"RleList") || is(rl,"list"))
        len <- lengths(rl)
    else if (is(rl,"GRanges"))
        len <- width(rl)
    else if (is(rl,"GRangesList"))
        len <- vapply(width(rl),sum,integer(1))
    z <- which(len==1)
    if (length(z)>0)
        len <- len[-z]
    return(ifelse(all(len==len[1]),TRUE,FALSE))
}

readConfig <- function(input) {
    if (missing(input) || !file.exists(input))
        stop("File to read sample info from should be a valid existing text ",
            "file!")
    tab <- read.delim(input)
    if (is.null(tab$id))
        stop("Sample id column not found in ",input,"!")
    if (is.null(tab$file))
        stop("Sample file path column not found in ",input,"!")
    if (is.null(tab$format))
        stop("Sample file format column not found in ",input,"!")
    samples <- as.character(tab$id)
    if (length(samples) != length(unique(samples)))
        stop("Sample identifiers must be unique for each sample!")
    if (length(grep(" ",samples))>0)
        stop("White space is not allowed in sample ids!")
    if (is.null(tab$name))
        nams <- as.character(tab$name)
    else
        nams <- samples
    files <- as.character(tab$file)
    if (any(!file.exists(files))) {
        bi <- which(!file.exists(files))
        stop("Input file ",files[bi]," does not exist! Please check paths...")
    }
    formats <- as.character(tab$format)
    if (!all(formats %in% c("bam","bed","bigwig")))
        stop("Input formats must be one of \"bam\", \"bed\", \"bigwig\"")
    cls <- NULL
    if (!is.null(tab$color)) {
        cls <- as.character(tab$color)
        chkcls <- areColors(cls)
        if (!all(chkcls)) {
            warning("Invalid colors found in color column in config file ",
                input,"! Will use automatic colors...")
            cls <- NULL
        }
    }
    output <- vector("list",nrow(tab))
    for (i in 1:nrow(tab)) {
        output[[i]]$id <- samples[i]
        output[[i]]$name <- nams[i]
        output[[i]]$file <- files[i]
        output[[i]]$format <- formats[i]
        output[[i]]$color <- cls[i]
        output[[i]]$ranges <- NULL
        output[[i]]$coverage <- NULL
        output[[i]]$profile <- NULL
    }
    names(output) <- samples
    return(output)
}

kmeansDesign <- function(input,design=NULL,kmParams) {
    if (missing(kmParams)) {
        if (!is.null(input$data))
            kmParams <- getr(input,"kmParams")
        else
            kmParams <- getDefaultListArgs("kmParams")
    }
    else {
        kmParamsDefault <- getDefaultListArgs("kmParams")
        kmParams <- setArg(kmParamsDefault,kmParams)
        kmParams <- validateListArgs("kmParams",kmParams)
    }
    if (!is.null(input$data)) # Fed with recoup object
        input <- input$data
    hasProfile <- vapply(input,function(x) is.null(x$profile),logical(1))
    if (any(hasProfile))
        stop("Profile matrices for k-means clustering are missing from the ",
            "input object. Have you called the profileMatrix function?")
    if (kmParams$k>0) {
        if (is.null(kmParams$reference)) {
            # Merge matrices to one and perform k-means. As normally coverages 
            # are normalized (the user is responsible to tell recoup how to do 
            # this and has been done at this point), we are legalized to do that
            message("Performing k-means (k=",kmParams$k,") clustering on ",
                "total profiles")
            theBigMatrix <- do.call("cbind",lapply(input,function(x) {
                return(as.matrix(x$profile))
            }))
            kcl <- kmeans(theBigMatrix,centers=kmParams$k,
                iter.max=kmParams$iterMax,nstart=kmParams$nstart,
                algorithm=kmParams$algorithm)
        }
        else {
            message("Performing k-means (k=",kmParams$k,") clustering using ",
                "the ",input[[kmParams$reference]]$name," sample profile as ",
                "reference")
            #set.seed(kmParams$seed)
            kcl <- kmeans(input[[kmParams$reference]]$profile,
                centers=kmParams$k,iter.max=kmParams$iterMax,
                nstart=kmParams$nstart,algorithm=kmParams$algorithm)
        }
        kmorder <- kcl$cluster
        #names(kmorder) <- rownames(input[[1]]$profile)
        if (!is.null(design)) {
            kmorder <- kmorder[rownames(design)]
            card <- table(kmorder)[kmorder]
            design$kcluster <- as.factor(paste("Cluster ",kmorder," (",card,
                ")",sep=""))
        }
        else {
            card <- table(kmorder)[kmorder]
            design <- data.frame(kcluster=paste("Cluster ",kmorder," (",card,
                ")",sep=""))
            rownames(design) <- names(kmorder)
        }
    }
    return(design)
}

# Experimental! Not working properly at the moment.
# In the final version, the design variable should be replaced by an "obj"
# variable. The obj can be a recoup list object (with design stored) or a design
# data frame
reorderClusters <- function(design,newOrder) {
    # Check if newOrder is numeric
    if (!is.numeric(newOrder))
        stop("newOrder must be numeric")
    # Check if design is a data frame
    if (!is.data.frame(design))
        stop("design must be a recoup design data frame!")
    # Check if design is a proper clutering design
    if (!("kcluster" %in% names(design)))
        stop("design must hold clustering information! Are you sure you run ",
            "kmeansDesign first?")
    # Check correct number of clusters in new order
    if (length(newOrder) != length(unique(as.character(design$kcluster))))
        stop("The number of clusters in the new order vector does not match ",
            "with the existing one!")
    
    # Do job
    cur <- as.character(design$kcluster)
    # They are of the form Cluster X (YYY)
    spl <- strsplit(cur," ")
    num <- as.numeric(vapply(spl,function(x) return(x[2]),character(1)))
    
    # Before going furher, check that newOrder contains the same clusters
    unum <- unique(num)
    poff <- intersect(unum,newOrder)
    if (length(poff) != length(unique(num)))
        stop("The provided clusters are not the same as in design! Cluster ",
            setdiff(newOrder,unum)," not found in design.")
    
    # Proceed
    car <- vapply(spl,function(x) return(x[3]),character(1))
    
    names(newOrder) <- 1:length(newOrder)
    
    # Finally, reorder
    newClus <- num
    for (i in unum)
        newClus[num==i] <- newOrder[as.character(i)]
    
    design$kcluster <- paste("Cluster",newClus,car)
    
    return(design)
}

sliceObj <- function(obj,i=NULL,j=NULL,k=NULL,dropPlots=FALSE,rc=NULL) {
    if (is.null(obj$data))
        stop("No data slot found in obj! Are you sure it's an output from",
            "recoup?")
    if (!is.null(obj$callopts$selector))
        obj$callopts$selector <- NULL
    if (!is.null(i)) {
        if (!is.numeric(i) && !is.character(i))
            stop("Horizontal indexing must be numeric or character!")
        for (s in 1:length(obj$data)) {
            if (!is.null(obj$data[[s]]$coverage))
                obj$data[[s]]$coverage <- obj$data[[s]]$coverage[i]
            if (!is.null(obj$data[[s]]$profile))
                obj$data[[s]]$profile <- obj$data[[s]]$profile[i,,drop=FALSE]
        }
        if (!is.null(obj$design))
            obj$design <- obj$design[i,,drop=FALSE]
    }
    if (!is.null(j)) {
        if (!is.numeric(j))
            stop("Vertical indexing must be numeric!")
        n <- ncol(obj$data[[1]]$profile)
        if (obj$callopts$region %in% c("tss","tes")
            || obj$callopts$customIsBase) {
            if (obj$callopts$binParams$regionBinSize==0)
                obj$callopts$flank <- c(min(j),n-max(j))
            else {
                f <- obj$callopts$flank/obj$callopts$binParams$regionBinSize
                obj$callopts$flank <- c(round(min(j)*f[1]),
                    round((n-max(j))*f[2]))
            }
        }
        else {
            if (obj$callopts$binParams$flankBinSize==0) {
                if (min(j)<obj$callopts$flank[1])
                    obj$callopts$flank[1] <- min(j)
                else
                    obj$callopts$flank[1] <- 0
                if ((n-max(j))<obj$callopts$flank[2])
                    obj$callopts$flank[2] <- n-max(j)
                else
                    obj$callopts$flank[2] <- 0
            }
            else {
                flankSpace <- 2*obj$callopts$binParams$flankBinSize
                oFlank <- obj$callopts$flank
                r <- obj$callopts$flank/sum(obj$callopts$flank)
                original.start.index <- round(r[1]*flankSpace)
                original.end.index <- round(n - r[2]*flankSpace)
                nonZeroFlank <- FALSE
                if (min(j)<original.start.index) {
                    obj$callopts$flank[1] <- 
                        round((original.start.index - min(j))*
                            obj$callopts$flank[1]/original.start.index)
                    nonZeroFlank <- TRUE
                }
                else
                    obj$callopts$flank[1] <- 0
                if (max(j)>original.end.index) {
                    obj$callopts$flank[2] <- 
                        round((max(j) - original.end.index)*
                            obj$callopts$flank[2]/(n - original.end.index))
                    nonZeroFlank <- TRUE
                }
                else
                    obj$callopts$flank[2] <- 0
                if (nonZeroFlank)
                    obj$callopts$binParams$flankBinSize <- round(0.5*flankSpace*
                            sum(obj$callopts$flank)/sum(oFlank))
                obj$callopts$binParams$forcedBinSize[1] <- 
                    obj$callopts$binParams$flankBinSize
                if (all(obj$callopts$flank==0))
                    obj$callopts$region <- "custom"
            }
        }
        for (s in 1:length(obj$data)) {
            if (!is.null(obj$data[[s]]$profile))
                obj$data[[s]]$profile <- obj$data[[s]]$profile[,j,drop=FALSE]
        }
    }
    if (!is.null(k)) {
        if (!is.numeric(k) && !is.character(k))
            stop("Sample indexing must be numeric or character!")
        obj$data <- obj$data[k]
    }
    if (dropPlots)
        obj$plots <- list(profile=NULL,heatmap=NULL,correlation=NULL)
    else {
        message("Recalculating profile plots after slicing...")
        if (obj$callopts$plotParams$profile) {
            message("  profile")
            obj <- recoupProfile(obj,rc=rc)
        }
        if (obj$callopts$plotParams$heatmap) {
            message("  heatmap")
            obj <- recoupHeatmap(obj,rc=rc)
        }
        if (obj$callopts$plotParams$correlation) {
            message("  correlation")
            obj <- recoupCorrelation(obj,rc=rc)
        }
    }
    return(obj)
}

# TODO: Check if the inputs have no names and work with indices
# TODO: Check the callopts one by one. For example, there is the risk of 
#       different organisms if we don't check for "organism".
# TODO: Recalculate plots after merge
mergeRuns <- function(...,withDesign=c("auto","drop"),dropPlots=TRUE) {
    tmp <- list(...)
    withDesign <- tolower(withDesign[1])
    
    # Initialize new object
    merged <- list()
    
    # Check design
    if (withDesign == "auto") {
        allHaveDesign <- 
            all(vapply(tmp,function(x) !is.null(x$design),logical(1)))
        whichDesign <- vapply(tmp,function(x) is.null(x$design),logical(1))
        oneHasDesign <- ifelse(length(which(!whichDesign))==1,TRUE,FALSE)
        noneHasDesign <- all(whichDesign)
        if (noneHasDesign)
            merged$design <- NULL
        if (allHaveDesign) {
            tryCatch({
                # Will fail if some design has different number of columns
                test1 <- 
                    do.call("rbind",lapply(tmp,function(x) return(x$design)))
                # Will fail if some design has different number of rows
                test2 <- 
                    do.call("cbind",lapply(tmp,function(x) return(x$design)))
                # Will fail if some design has different row names
                n1 <- sort(Reduce("intersect",
                    lapply(tmp,function(x) return(rownames(x$design)))))
                n2 <- sort(rownames(tmp[[1]]$design))
                if (!identical(n1,n2))
                    stop("Incompatible row names")
                # Will fail if some design has different column names
                n1 <- sort(Reduce("intersect",
                    lapply(tmp,function(x) return(colnames(x$design)))))
                n2 <- sort(colnames(tmp[[1]]$design))
                if (!identical(n1,n2))
                    stop("Incompatible column names")
                
                # If nothing fails, then assign the design to the new object
                merged$design <- tmp[[1]]$design
            },error=function(e) {
                warning("Caught incompatible designs! Dropping design...",
                    immediate.=TRUE)
                message("----------")
                print(e)
                message("----------")
                merged$design <- NULL
            },
            finally="")
        }
        if (oneHasDesign) # Apply to all
            merged$design <- tmp[[which(whichDesign)]]$design
        # Some have design...
        if (!noneHasDesign && !allHaveDesign && !oneHasDesign) {
            # Gather them
            designs <- lapply(tmp,function(x) {
                if (!is.null(x$design))
                    return(x$design)
                else
                    return(NULL)
            })
            
            # ...and repeat the previous pattern
            tryCatch({
                # Will fail if some design has different number of columns
                test1 <- do.call("rbind",designs)
                # Will fail if some design has different number of rows
                test2 <- do.call("cbind",designs)
                # Will fail if some design has different row names
                n1 <- sort(Reduce("intersect",
                    lapply(designs,function(x) return(rownames(x)))))
                n2 <- sort(rownames(designs[[1]]))
                if (!identical(n1,n2))
                    stop("Incompatible row names")
                # Will fail if some design has different column names
                n1 <- sort(Reduce("intersect",
                    lapply(designs,function(x) return(colnames(x)))))
                n2 <- sort(colnames(designs[[1]]))
                if (!identical(n1,n2))
                    stop("Incompatible column names")
                
                # If nothing fails, then assign the design to the new object
                merged$design <- designs[[1]]
            },error=function(e) {
                warning("Caught incompatible designs! Dropping design...",
                    immediate.=TRUE)
                message("----------")
                print(e)
                message("----------")
                merged$design <- NULL
            },
            finally="")
        }
    }
    else if (withDesign=="drop")
        merged$design <- NULL
    
    # Quick for now
    merged$callopts <- tmp[[1]]$callopts
    
    # Try to merge actual data
    newLen <- sum(vapply(tmp,function(x) return(length(x$data)),integer(1)))
    merged$data <- vector("list",newLen)
    theNames <- character(0)
    for (i in 1:length(tmp))
        theNames <- c(theNames,names(tmp[[i]]$data))
    names(merged$data) <- theNames

    for (m in tmp) {
        for (n in names(m$data)) {
            merged$data[[n]] <- m$data[[n]]
            if (!is.null(merged$design)) {
                if (!is.null(merged$data[[n]]$coverage))
                    merged$data[[n]]$coverage <- 
                        merged$data[[n]]$coverage[rownames(merged$design)]
                if (!is.null(merged$data[[n]]$profile))
                    merged$data[[n]]$profile <- 
                        merged$data[[n]]$profile[rownames(merged$design),]
            }
        }
    }
    
    # Drop all plots for now, later recalculate
    if (dropPlots)
        merged$plots <- list(profile=NULL,heatmap=NULL,correlation=NULL)
    else
        merged$plots <- NULL
    
    return(merged)
}

decideChanges <- function(input,currCall,prevCall) {
    if (is.null(prevCall))
        return(input)
    # Check region and flank
    if (currCall$region != prevCall$region 
        || !all(currCall$flank == prevCall$flank))
        input <- removeData(input,c("coverage","profile"))
    # Check binParams
    if (currCall$binParams$flankBinSize != prevCall$binParams$flankBinSize
        || currCall$binParams$regionBinSize != prevCall$binParams$regionBinSize
        || currCall$binParams$sumStat != prevCall$binParams$sumStat
        || currCall$binParams$interpolation != prevCall$binParams$interpolation
        || currCall$binParams$forceHeatmapBinning != 
            prevCall$binParams$forceHeatmapBinning
        || currCall$binParams$forcedBinSize != prevCall$binParams$forcedBinSize
        || currCall$binParams$chunking != prevCall$binParams$chunking)
        input <- removeData(input,"profile")
    if (currCall$preprocessParams$normalize != 
        prevCall$preprocessParams$normalize
        || currCall$preprocessParams$sampleTo != 
        prevCall$preprocessParams$sampleTo
        || currCall$preprocessParams$spliceAction != 
        prevCall$preprocessParams$spliceAction
        || currCall$preprocessParams$spliceRemoveQ != 
        prevCall$preprocessParams$spliceRemoveQ
        || currCall$preprocessParams$spliceRemoveQ != 
        prevCall$preprocessParams$spliceRemoveQ)
        #|| currCall$preprocessParams$seed != prevCall$preprocessParams$seed)
        input <- removeData(input,c("ranges","coverage","profile"))
        
        return(input)
}

removeData <- function(input,type=c("ranges","coverage","profile")) {
    type <- tolower(type)
    checkTextArgs("type",type,c("ranges","coverage","profile"),multiarg=TRUE)
    if (!is.null(input$data)) # Gave recoup output object
        for (i in 1:length(input$data))
            input$data[[i]][type] <- NULL
    else
        for (i in 1:length(input))
            input[[i]][type] <- NULL
    return(input)
}

calcLinearFactors <- function(input,preprocessParams) {
    hasRanges <- vapply(input,function(x) is.null(x$ranges),logical(1))
    if (any(hasRanges))
        stop("Please provide input reads before calculation normalization ",
            "factors")
    libSizes <- vapply(input,function(x) return(length(x$ranges)),integer(1))
    if (preprocessParams$normalize=="linear" 
        || preprocessParams$normalize=="downsample")
        linFac <- min(libSizes)/libSizes
    else if (preprocessParams$normalize=="sampleto")
        linFac <- preprocessParams$sampleTo/libSizes
    names(linFac) <- names(input)
    return(linFac)
}

cmclapply <- function(...,rc) {
    if (suppressWarnings(!requireNamespace("parallel")) 
        || .Platform$OS.type!="unix")
        m <- FALSE
    else {
        m <- TRUE
        ncores <- parallel::detectCores()
        if (ncores==1) 
            m <- FALSE
        else {
            if (!missing(rc) && !is.null(rc))
                ncores <- ceiling(rc*ncores)
            else 
                m <- FALSE
        }
    }
    if (m)
        return(mclapply(...,mc.cores=ncores,mc.set.seed=FALSE))
    else
        return(lapply(...))
}

ssCI <- function(fit) {
    res <- (fit$yin - fit$y)/(1-fit$lev)
    sigma <- sqrt(var(res))
    upper <- fit$y + 3.0*sigma*sqrt(fit$lev)
    lower <- fit$y - 3.0*sigma*sqrt(fit$lev)
    return(list(lower=lower,upper=upper))
}

getDefaultListArgs <- function(arg,design=NULL,genome=NULL) {
    switch(arg,
        orderBy = {
            return(list(
                what=c("none","suma","sumn","maxa","maxn","hc"),
                order=c("descending","ascending"),
                custom=NULL
            ))
        },
        binParams = {
            return(list(
                flankBinSize=0,
                regionBinSize=0,
                sumStat=c("mean","median"),
                interpolation=c("auto","spline","linear","neighborhood"),
                binType=c("variable","fixed"),
                forceHeatmapBinning=TRUE,
                forcedBinSize=c(50,200),
                chunking=FALSE#,
                #seed=42
            ))
        },
        preprocessParams = {
            return(list(
                fragLen=NA,
                cleanLevel=c(0,1,2,3),
                normalize=c("none","linear","downsample","sampleto"),
                sampleTo=1e+6,
                spliceAction=c("split","keep","remove"),
                spliceRemoveQ=0.75,
                #bedGenome=ifelse(!is.null(genome) && genome %in% c("hg18",
                #    "hg19","hg38","mm9","mm10","rn5","dm3","danrer7","pantro4",
                #    "susscr3"),genome,NA),
                bedGenome=NA#,
                #seed=42
            ))
        },
        selector = {
            return(list(
                id=NULL,
                biotype=NULL,
                exonType=NULL
            ))
        },
        strandedParams = {
            return(list(
                strand=NULL,
                ignoreStrand=TRUE
            ))
        },
        plotParams = {
            return(list(
                plot=TRUE,
                profile=TRUE,
                heatmap=TRUE,
                correlation=TRUE,
                signalScale=c("natural","log2"),
                heatmapScale=c("each","common"),
                heatmapFactor=1,
                corrScale=c("normalized","each"),
                sumStat=c("mean","median"),
                smooth=TRUE,
                corrSmoothPar=ifelse(is.null(design),0.1,0.5),
                singleFacet=c("none","wrap","grid"),
                multiFacet=c("wrap","grid"),
                conf=TRUE,
                device=c("x11","png","jpg","tiff","bmp","pdf","ps"),
                outputDir=".",
                outputBase=NULL
            ))
        },
        saveParams = {
            return(list(
                ranges=TRUE,
                coverage=TRUE,
                profile=TRUE,
                profilePlot=TRUE,
                heatmapPlot=TRUE,
                correlationPlot=TRUE
            ))
        },
        kmParams = {
            return(list(
                k=0,
                nstart=20,
                algorithm=c("Hartigan-Wong","Lloyd","Forgy","MacQueen"),
                iterMax=20,
                reference=NULL#,
                #seed=42
            ))
        }
    )
}

#setr <- function(obj,key=c("design","profile","heatmap","correlation",
#   "orderBy","kmParams","plotParams"),value) {
setr <- function(obj,key,value=NULL) {
    if (is.character(key)) { # Property name, property value pairs
        if (is.null(value))
            stop("When the key argument is a list of property names, the ",
                "value argument must not be NULL.")
        if (length(key)>1) { # Value must be a list of lists
            if (length(key)!=length(lengths(value)))
                stop("The number values to set must be equal to the number of ",
                    "properties to change")
        }
        else
            value <- list(value)
        valid <- key %in% c("design","profile","heatmap","correlation",
            "orderBy","kmParams","plotParams")
        not.valid <- which(!valid)
        if (length(not.valid)>0) {
            warning("The following parameters to set are are invalid and will ",
                "be ignored: ",paste(key[not.valid],collapse=", "),
                    immediate.=TRUE)
            key[not.valid] <- NULL
            value[not.valid] <- NULL
        }
        if (length(key)==0)
            return(obj)
        names(value) <- key
        key <- value
    }
    else if (is.list(key)) { # Only a named list with property names and values
        valid <- names(key) %in% c("design","profile","heatmap","correlation",
            "orderBy","kmParams","plotParams")
        not.valid <- which(!valid)
        if (length(not.valid)>0) {
            warning("The following parameters to set are are invalid and will ",
                "be ignored: ",paste(names(key)[not.valid],collapse=", "),
                    immediate.=TRUE)
            key[not.valid] <- NULL
        }
        if (length(key)==0)
            return(obj)
    }
    for (n in names(key)) {
        switch(n,
            design = {
                if (!is.null(key[[n]])) {
                    if (!is.data.frame(key[[n]]))
                        key[[n]] <- read.delim(key[[n]],row.names=1)
                    nfac <- ncol(key[[n]])
                    if (length(obj$data)>1 && nfac>2)
                        stop("When more than one files are provided for ",
                            "coverage generation, the maximum number of ",
                            "allowed value factors is 2")
                    if (length(obj$data)>1 && nfac>1 
                        && obj$callopts$kmParams$k>0)
                        stop("When more than one files are provided for ",
                            "coverage generation and k-means clustering is ",
                            "also requested, the maximum number of allowed ",
                            "design factors is 1")
                    if (length(obj$data)==1 && nfac>3)
                        stop("The maximum number of allowed design factors is",
                            " 3")
                    if (length(obj$data)==1 && nfac>2 
                        && obj$callopts$kmParams$k>0)
                        stop("The maximum number of allowed design factors ",
                            "when k-means clustering is requested is 2")
                }
                obj$design <- key[[n]]
            },
            profile = {
                if (!is(key[[n]],"gg") && !is(key[[n]],"ggplot")) {
                    warning("The supplied profile plot is not a ggplot ",
                        "object! Ignoring...",immediate.=TRUE)
                    return(obj)
                }
                obj$plots$profile <- key[[n]]
            },
            heatmap = {
                if (!is(key[[n]],"HeatmapList")) {
                    warning("The supplied heatmap plot is not a Heatmap ",
                        "object! Ignoring...",immediate.=TRUE)
                    return(obj)
                }
                obj$plots$heatmap <- key[[n]]
            },
            correlation = {
                if (!is(key[[n]],"gg") && !is(key[[n]],"ggplot")) {
                    warning("The supplied correlation plot is not a ggplot ",
                        " object! Ignoring...",immediate.=TRUE)
                    return(obj)
                }
                obj$plots$correlation <- key[[n]]
            },
            orderBy = {
                orderByDefault <- getDefaultListArgs("orderBy")
                orderBy <- setArg(orderByDefault,key[[n]])
                orderBy <- validateListArgs("orderBy",orderBy)
                obj$callopts$orderBy <- orderBy
                # Should change also complexHeatmapParams
                if (length(grep("hc",orderBy$what))>0
                    && !(obj$callopts$complexHeatmapParams$main$cluster_rows 
                    || obj$callopts$complexHeatmapParams$group$cluster_rows)) {
                    message("Changing also hierarchical clustering parameters ",
                        "to comply with new orderBy settings.")
                    obj$callopts$complexHeatmapParams$main$cluster_rows <- TRUE
                    obj$callopts$complexHeatmapParams$group$cluster_rows <- TRUE
                }
                if ((obj$callopts$complexHeatmapParams$main$cluster_rows 
                    || obj$callopts$complexHeatmapParams$group$cluster_rows)
                    && length(grep("hc",orderBy$what))==0) {
                    message("Changing also hierarchical clustering parameters ",
                        "to comply with new orderBy settings.")
                    obj$callopts$complexHeatmapParams$main$cluster_rows <- FALSE
                    obj$callopts$complexHeatmapParams$group$cluster_rows <- 
                        FALSE
                }
            },
            kmParams = {
                kmParamsDefault <- getDefaultListArgs("kmParams")
                kmParams <- setArg(kmParamsDefault,key[[n]])
                kmParams <- validateListArgs("kmParams",kmParams)
                obj$callopts$kmParams <- kmParams
            },
            plotParams = {
                plotParamsDefault <- getDefaultListArgs("plotParams")
                plotParams <- setArg(plotParamsDefault,key[[n]])
                plotParams <- validateListArgs("plotParams",plotParams)
                obj$callopts$plotParams <- plotParams
            }
        )
    }
    return(obj)
}

getr <- function(obj,key=c("design","profile","heatmap","correlation","orderBy",
    "kmParams","plotParams")) {
    checkTextArgs("key",key,c("design","profile","heatmap","correlation",
        "orderBy","kmParams","plotParams"))
    switch(key,
        design = {
            return(obj$design)
        },
        profile = {
            return(obj$plots$profile)
        },
        heatmap = {
            return(obj$plots$heatmap)
        },
        correlation = {
            return(obj$plots$correlation)
        },
        orderBy = {
            return(obj$callopts$orderBy)
        },
        kmParams = {
            return(obj$callopts$kmParams)
        },
        plotParams = {
            return(obj$callopts$plotParams)
        }
    )
}

getArg <- function(arg.list,arg.name) {
    return(arg.list[arg.name])
}

setArg <- function(arg.list,arg.name,arg.value=NULL) {
    if (is.list(arg.name))
        arg.list[names(arg.name)] <- arg.name
    else if (is.character(arg.name)) {
        tmp <- vector("list",length(arg.name))
        names(tmp) <- arg.name
        i <- 0
        for (n in arg.name) {
            i <- i + 1
            tmp[[n]] <- arg.value[i]
        }
        arg.list[arg.name] <- tmp
    }
    return(arg.list)
}

areColors <- function(x) {
    return(vapply(x,function(y) {
        tryCatch(is.matrix(col2rgb(y)),
            error=function(e) FALSE)
    },logical(1)))
}

################################################################################

# Legacy functions

#splitVectorOld <- function(x,n,interp,stat,seed=42) {
#    isRle <- ifelse(is(x,"Rle"),TRUE,FALSE)
#    if (length(x)<n) {
#        if (isRle)
#            x <- as.numeric(x)
#        switch(interp,
#            auto = {
#                d <- (n-length(x))/n
#                if (d < 0.2) { # Then quite safe for neighborhood method
#                    y <- rep(NA,n)
#                    set.seed(seed)
#                    y[1:2] <- x[1:2]
#                    y[(n-1):n] <- x[(length(x)-1):length(x)]
#                    orig.pos <- sort(sample(3:(n-2),length(x)-4))
#                    y[orig.pos] <- x[3:(length(x)-2)]
#                    na <- which(is.na(y))
#                    avinds <- lapply(na,function(z) {
#                        return(c(z-2,z-1,z+1,z+2))
#                    })
#                    xx <- unlist(lapply(avinds,function(ii,yy) {
#                        return(mean(yy[ii],na.rm=TRUE))
#                    },y))
#                    y[na] <- xx
#                    x <- y
#                }
#                else { # Spline is the safest
#                    x <- spline(x,n=n)$y
#                    x[x<0] <- 0
#                }
#            },
#            spline = {
#                x <- spline(x,n=n)$y
#                x[x<0] <- 0
#            },
#            linear = {
#                x <- approx(x,n=n)$y
#                x[x<0] <- 0
#            },
#            neighborhood = {
#                y <- rep(NA,n)
#                set.seed(seed)
#                y[1:2] <- x[1:2]
#                y[(n-1):n] <- x[(length(x)-1):length(x)]
#                orig.pos <- sort(sample(3:(n-2),length(x)-4))
#                y[orig.pos] <- x[3:(length(x)-2)]
#                na <- which(is.na(y))
#                avinds <- lapply(na,function(z) {
#                    return(c(z-2,z-1,z+1,z+2))
#                })
#                xx <- unlist(lapply(avinds,function(ii,yy) {
#                    return(mean(yy[ii],na.rm=TRUE))
#                },y))
#                y[na] <- xx
#                x <- y
#            }
#        )
#        if (isRle)
#            x <- Rle(x)
#    }
#    binSize <- floor(length(x)/n)
#    dif <- length(x) - binSize*n 
#    binFac <- rep(binSize,n)
#    # Random bin increase size to avoid problems
#    set.seed(seed)
#    add <- sample(1:n,dif)
#    binFac[add] <- binFac[add]+1
#    f <- factor(rep(1:n,binFac))
#    #S <- split(x,f)
#    #return(llply(S,stat))
#    S <- split(1:length(x),f)
#    s <- sapply(S,function(x) x[1])
#    return(aggregate(x,FUN=stat,start=s,width=binFac))
#}
Any scripts or data that you put into this service are public.
recoup documentation built on Nov. 8, 2020, 6:47 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
recoup
An R package for the creation of complex genomic profile plots

R/util.R
In recoup: An R package for the creation of complex genomic profile plots

Defines functions areColors setArg getArg getr setr getDefaultListArgs ssCI cmclapply calcLinearFactors removeData decideChanges mergeRuns sliceObj reorderClusters kmeansDesign readConfig covLengthsEqual interpolateSignal splitVector contVector

Documented in getr kmeansDesign mergeRuns removeData setr sliceObj

Try the recoup package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

recoup An R package for the creation of complex genomic profile plots

R/util.R In recoup: An R package for the creation of complex genomic profile plots

Defines functions areColors setArg getArg getr setr getDefaultListArgs ssCI cmclapply calcLinearFactors removeData decideChanges mergeRuns sliceObj reorderClusters kmeansDesign readConfig covLengthsEqual interpolateSignal splitVector contVector

Documented in getr kmeansDesign mergeRuns removeData setr sliceObj

Try the recoup package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

recoup
An R package for the creation of complex genomic profile plots

R/util.R
In recoup: An R package for the creation of complex genomic profile plots