R/pipelineChipSeq.R

#############################################
##                                         ##
## Pipeline for chipSeq/RNASeq experiments ##
##                                         ##
#############################################
#
# Author  : Romain Fenouil
# Date    : 2011-06-10
#
# THIS VERSION MUST BE USED WITH R >= 2.13.0  AND SHORTREAD LIBRARY >= 1.10.1
# THERE IS AN INCONSISTENCY PROBLEM WITH THE PRECEDENT VERSIONS OF THE LIBRARY (INVERTED STRANDS WHEN USING READALIGNED FUNCTION)


###################################
## Multiprocessors library choice (deprecated since 'parallel' has been included in R)
#
# After several tries, I chose 'multicore' library. Snow and snowfall are designed for being flexible and useable on networks also. 
# It implies that they don't share memory but need copy of objects to each new thread. Since we have to work on big objects, it's
# better to find a library which allows memory share or at least that limit the number of copies in memory. This is the case of
# 'multicore' which uses 'fork' and is designed to be used on a single computer with multi processors or cores. Since the OS is
# supposed to provide Copy-On-Write mechanism , it should avoid copying these big objects to children processes if we don't modify
# them in the function called. One should care about not modifying the big objects in the parallel functions but also limiting the
# size of returned abjects in order to limit the amount of data to copy throught the pipes between the parent process and the children
# (Here it could be very long). I could have used the library 'foreach' which allows more flexibility since it can use either 'snow'
# or 'multicore' as a backend. But 1 -> 'foreach' doesnt provide a ready to use 'lapply' parallel equivalent function that I needed.
# 2-> One should NEVER use this pipeline with 'snow' or network computing since copy of this big objects would be terribly unefficient !
# That's why multicore is the best suited (not ideal though), I just use mclapply directly in a very naive way, trying to minimize the
# number of objects copied. It is also easy to handle text messages from children using 'silent' parameter to 'mclapply'.

#library(multicore)
#library(ShortRead)


# A function that encapsulates the elements of a list (usually useful for big objects) in environments to give a list of environments (pass by environments through mclapply mechanism)
.encapsulateListElementsInEnv=function(listObject)
{
    #cat("\n Encapsulating list in environments")
    return(lapply(listObject,function(x)
                    {
                        capsule <- new.env() # Create the new environment
                        assign("value", x, envir=capsule) # Put the object in the environment
                    }))
}


# A function that checks for the preexistence of an eventual folder or a file with the desired name before actually creating the folder
.safeCreateFolder=function(folderToCreate)
{
    if(!file.exists(folderToCreate))
    {
        dir.create(folderToCreate, recursive=TRUE)
    }
    else
    {
        if(!file.info(folderToCreate)$isdir)
        {
            stop("Cannot create folder for storing results, a file (not a folder) with the same name already exists :", folderToCreate)
        }
    }
}


# Equivalent of deprecated pileup function from ShortRead using the 'coverage' function from Irange
# Inspired from : https://stat.ethz.ch/pipermail/bioconductor/2009-March/026760.html
.newPileup=function(start, fragLength, dir="+", readLength=fragLength)
{
    x_start <- start
    ii <- dir == "-"  # which ranges need to be shifted
    ii_readLength <- if (length(readLength) > 1) readLength[ii] else readLength
    ii_fragLength <- if (length(fragLength) > 1) fragLength[ii] else fragLength
    x_start[ii] <- x_start[ii] + ii_readLength - ii_fragLength #+ 1L
    
    x <- IRanges(start=x_start, width=fragLength)
    
    return(coverage(x))
}


# It is in charge of removing the NULL list elements or objects of length(0)
# Useful to get error messages in forks while getting results from mclapply
.checkErrorsInFork=function(mclapplyResult)
{

    # Find the try-error type class in the result
    hasFailed <- sapply(mclapplyResult, is, "try-error")
    
    if(any(hasFailed))
    {
        stop(paste("\nAt least one job failed with the folling error message(s) :\n  ",
                        paste(sapply(mclapplyResult[hasFailed], as.character), collapse="\n  "),
                        sep=""))
    }
    
    # The other error finding  mechanism (grep in text message) is kept for compatibility
    isNULL_mclapplyResult <- sapply(mclapplyResult, is.null)
    isEmpty_mclapplyResult <- (sapply(mclapplyResult, length)==0)
    
    mclapplyResult <- mclapplyResult[!(isNULL_mclapplyResult | isEmpty_mclapplyResult)]
    
    if(length(mclapplyResult)>0)
    {
        
        # Check for error messages
        isAtomicCharacterResult <- sapply(mclapplyResult, function(x) {return(is.character(x) && length(x==1))})
        if(any(isAtomicCharacterResult))
        {
            mclapplyResult_Character <- mclapplyResult[isAtomicCharacterResult]
            isErrorMessage <- sapply(mclapplyResult_Character, function(x) {return(grepl("error",x))}) 
            
            if(any(isErrorMessage))
            {
                
                cat("\nError(s) in forks :\n")
                sapply(mclapplyResult_Character[isErrorMessage], cat, "\n")
                stop("\nErrors were detected in at least one thread...")
            }
        }
        return(mclapplyResult)
    }
    else
    {
        stop("The resulting list from last loop is empty")
    }
}


# Small function used to display the execution time of the analysis steps
.printTimes=function(returnList)
{
    mapply(function(x, expName)
            {
                cat("\n\n---------------") 
                cat("\nEXP :",expName) 
                mapply(function(x, operationName)
                        {
                            cat("\n", operationName, ":", x["elapsed"], "sec(s) -->", x["elapsed"]/60, "min(s)")
                        }
                        ,x[["execTime"]], names(x[["execTime"]]))
            },
            returnList, names(returnList))
    cat("\n")
    return(NULL)
}


# This is a more generic function that will allow the user to define its own validity function
.checkComplexParam=function(param, paramName, expList, validValuesFct=function(...){return(TRUE)})
{
    if(!is.function(validValuesFct)) stop("validValuesFct must be a a function that tells whether the parameter values are valid or not")
    
    # these param must be atomic, vector, or  list (with same names as experiments names)
    if(is.list(param))
    {
        if(!all(names(expList) %in% names(param))) 
        {
            stop(paste(paramName," parameter is a list but the elements names don't match the experiments names",sep=""))
        }
        else 
        {
            if(!all(sapply(param, validValuesFct), na.rm=TRUE))
            {
                stop(paste(paramName," is a correct list but some elements contain invalid values",sep=""))
            }
            
            # param was already well formatted and the parameters fit the expectations specified by validValuesFct, return it
            return(param)
            
        }
    }
    else
    {
        if(!validValuesFct(param)) 
        {
            stop(paste(paramName,"parameter values are not valid"))
        }
        
        # If it's a vector, we create the list repeating these parameters list for all experiments
        return(lapply(expList, function(x, values){return(values)}, param))
        
    }
}


# addalpha() - adapted from "https://github.com/mylesmharrison/colorRampPaletteAlpha/blob/master/colorRampPaletteAlpha.R"
.addAlpha <- function(colors, alpha=1.0) 
{
    r <- col2rgb(colors, alpha=TRUE)
    # Apply alpha
    r <- r/255
    r[4,] <- alpha
    return(rgb(r[1,], r[2,], r[3,], r[4,]))
}


# Function that plot a density and highligth specific ranges in the distribution (separate by a vertical line and/or fill the range under the curve)
.plotDensityRanges=function(x, ranges=NULL, ranges.col="gray", ranges.alpha=0.3, labels.col="black", include.lower=FALSE, include.upper=TRUE, sep.col="black", sep.lty=2, sep.lwd=1, ...)
{
    
    # Compute and plot the actual density function
    densityPoints <- density(x, n=2048)
    plot(densityPoints, ...)
    
    if(!is.null(ranges))
    {
        
        # Convert 'ranges' to a list to allow the same processing for single and multi ranges
        if(!class(ranges)=="list") ranges=list(ranges)
        
        
        # Summarize all parameters in a data frame to take advantage of recycling mechanism
        parametersDF <- as.data.frame(cbind(ranges = ranges,
                        ranges.col    = .addAlpha(ranges.col, ranges.alpha),
                        labels.col    = labels.col,
                        include.lower = include.lower,
                        include.upper = include.upper))
        
        
        # Keep track of the vertical separation drawn in order to avoid plotting twice on the same coordinate.
        alreadyDrawnSep <- NULL
        
        # Plot all ranges
        for(index in 1:nrow(parametersDF))
        {
            # Get the parameters for the current range in the dataframe
            currentRange      <-  parametersDF[["ranges"]][[index]]
            ranges.col        <-  parametersDF[["ranges.col"]][[index]]
            labels.col        <-  parametersDF[["labels.col"]][[index]]
            include.lower     <-  parametersDF[["include.lower"]][[index]]
            include.upper     <-  parametersDF[["include.upper"]][[index]]
            
            
            # Get the curves coordinates with the closest values from the desired range
            leftInd <- which.min(abs(min(currentRange)-densityPoints$x))
            rightInd <- which.min(abs(max(currentRange)-densityPoints$x))
            
            if(leftInd!=rightInd)
            {
                # Get the coordinates of the range limits (left and right vertical lines)
                coordsRangeLim.x <- c(densityPoints$x[leftInd],densityPoints$x[rightInd])
                coordsRangeLim.y <- c(densityPoints$y[leftInd],densityPoints$y[rightInd])
                
                # Avoid to plot several time the separation lines (detect the ones already plotted)
                alreadyPlotted <- (coordsRangeLim.x %in% alreadyDrawnSep)
                alreadyDrawnSep <- c(alreadyDrawnSep,coordsRangeLim.x[!alreadyPlotted])
                
                # Plot vertical line of separation
                abline(v=coordsRangeLim.x[!alreadyPlotted], col=sep.col, lty=sep.lty, lwd=sep.lwd)
                
                # Define the range name
                labelText <- paste(if(include.lower) "[" else '(',min(currentRange), ",", max(currentRange), if(include.upper) "]" else ')', sep="")
                
                # Plot the label at the middle of the range and defined y coordinate and relative position
                text(x=densityPoints$x[mean(c(leftInd,rightInd))], y=0, labels=paste("    ", labelText, sep=""), adj=0, srt=90, col=labels.col, cex=0.5)
                
                
                # Get all the points corresponding to this part of the curve
                densityPointsInRange.x <- densityPoints$x[leftInd:rightInd]
                densityPointsInRange.y <- densityPoints$y[leftInd:rightInd]
                
                # Add begin and end points at bottom
                densityPointsInRange.x <- c(densityPointsInRange.x[1], densityPointsInRange.x, densityPointsInRange.x[length(densityPointsInRange.x)])
                densityPointsInRange.y <- c(0,densityPointsInRange.y,0)
                polygon(x=densityPointsInRange.x, y=densityPointsInRange.y, col=ranges.col, border=NA)
            }
        }
    }
    
    return(densityPoints)
}


# This function plots a double symmetric histogram
.plotBackToBackBarplot=function(top, bottom, topLab="", bottomLab="" ,main="", cutLineColumn=NULL, gridCol=NULL, outsideLayout=FALSE, plotLegend=TRUE,...)
{
    if(!outsideLayout)
    {
        layout(1:2, heights=c(1,1))
    }
    
    if(is.null(top) || (length(top)==0))
    {
        frame()
    }
    else
    {
        par(mar=c(1.5,4.5,3,3))
        barplot(top, main=main, axes=FALSE, legend.text=plotLegend, ylab=topLab,...)
        if(is.numeric(cutLineColumn)) abline(v=cutLineColumn*1.2+0.1, lwd=2, lty=2, col="red")
        axis(2)
        grid(col=gridCol, nx=0, ny=NULL)
    }
    
    
    
    if(is.null(bottom) || (length(bottom)==0))
    {
        frame()
    }
    else
    {
        par(mar=c(3,4.5,1.5,3))
        colnames(bottom) <- NULL
        barplot(-bottom, main="", axes=FALSE, legend.text=FALSE, ylab=bottomLab, ...)
        if(is.numeric(cutLineColumn)) abline(v=cutLineColumn*1.2+0.1, lwd=2, lty=2, col="red")
        axis(2)
        grid(col=gridCol, nx=0, ny=NULL)
    }
    
}



###############################################################################################################################################
##### These functions are the three main steps of the pipeline (removing artefacts, estimate the elongation size, generate the 'piled' vectors)
###############################################################################################################################################
#
# Each one is designed to be called on one chromosome at a time, the function using them is taking care of reassembling the chromosomes informations after each call
# Some of them can however be used for several chromosomes at a time, it will list the available chromosomes in the provided alignedDataObject, loop on it and return a list instead of an atomic object
# Be carefull NOT TO use part of chromosomes only... would be tricky to handle the results after... and might be meaningless



# Subfunction that plots the thresholds lines (horizontal and vertical) for the graphs relative to reads occupancy (selection made on threshold)
.plotLineThreshold=function(threshold, percentCumulatedReadsInCounts, line.col)
{
    # Get the x coordinates as numeric
    pileHeight <- as.numeric(names(percentCumulatedReadsInCounts))
    
    # Get the bigger value under threshold
    closestHeight <- sapply(lapply(threshold, function(threshold,values){return(values[values<=threshold])}, pileHeight), max)
    percentCorresponding <- percentCumulatedReadsInCounts[as.character(closestHeight)]
    
    
    #plot vertical line
    segments(x0=threshold,y0=0, x1=threshold, y1=percentCorresponding, lty=2, col=line.col)
    #plot horizontal line
    segments(x0=0,y0=percentCorresponding, x1=threshold, y1=percentCorresponding, lty=2, col=line.col)
    
    text(threshold, percentCorresponding,labels=paste(threshold, " -> ", format(percentCorresponding,digits=4, scientific=FALSE), "%", sep=""),adj=c(-0.1,1.2), col=line.col)        
}

# Subfunction that plots the thresholds lines (horizontal and vertical) for the graphs relative to reads occupancy (selection made on percentage of reads to keep)
#    plotLinePercent=function(percent, percentCumulatedReadsInCounts, line.col)
#    {
#        # Get the x coordinates as numeric
#        pileHeight=as.numeric(names(percentCumulatedReadsInCounts))
#        names(pileHeight)=percentCumulatedReadsInCounts
#        
#        # Get the bigger value under threshold
#        closestPercent=sapply(lapply(percent, function(percent,values){return(values[values>=percent])}, percentCumulatedReadsInCounts),min)
#        pileHeightCorresponding=pileHeight[as.character(closestPercent)]
#        
#        
#        #plot vertical line
#        segments(x0=pileHeightCorresponding,y0=0, x1=pileHeightCorresponding, y1=percent, lty=2, col=line.col)
#        #plot horizontal line
#        segments(x0=0,y0=percent, x1=pileHeightCorresponding, y1=percent, lty=2, col=line.col)
#        
#        text(pileHeightCorresponding, percent,labels=paste(format(percent,digits=4, scientific=FALSE), "%", " -> ", pileHeightCorresponding, " -> ", format(closestPercent,digits=4, scientific=FALSE), "%", sep=""),adj=c(-0.1,1.2), col=line.col)
#        
#    }


# This function will plot two graphs relative to reads distribution along the chromosome (This function doesn't take the strand in account, plase split it before if separate analysis required)
# It will also return as a list the computed parameters that might be necessary for the artefact removal
.plotReadOccupancyStats=function(positionTAB, totalNbReads, thresholds, suffixTitle="")
{
    
    # Counting the number of reads aligned to each position
    positionCOUNT <- table(positionTAB)
    tableCount <- table(positionCOUNT)
    
    
    ### Plotting stats about piles
    
    ## 1 piles height distribution
    
    suppressWarnings(plot(tableCount, log="x", main=paste("Piles height distribution - ",suffixTitle, " - Total nb of reads : ", totalNbReads,sep=""), xlab=paste("Nb of piled reads (pile height) - max=",max(positionCOUNT),sep=""), ylab="Occurences (number of piles)"))
    
    ## 2 Getting stats about nb of reads according to eventual threshold
    # Even if we have these stats, we keep on extracting artefacts (the actual index of reads to remove) for all "stat" threshold in the calling function because we want to know more about paired-ends later
    
    # Was used while plotting two strands on the same graph 
    ## Get the maximum pile size to plot the graph (x axis)
    #maxHeightPile=max(positionCOUNT)
    ## Actual plotting
    #plot(NULL, ylim=c(0.1,100), xlim=c(0.1, maxHeightPile), main="", xlab="",ylab="")
    
    ## Plotting the curves    
    #for(currentStrand in strandValues)
    #{
    # Compute the percentage of reads remaining for each threshold
    nbReadsInCounts <- tableCount*as.numeric(names(tableCount))
    cumulatedReadsInCounts <- cumsum(nbReadsInCounts)
    percentCumulatedReadsInCounts <- (cumulatedReadsInCounts/max(cumulatedReadsInCounts))*100
    
    #points(as.numeric(names(percentCumulatedReadsInCounts)), percentCumulatedReadsInCounts,type="s", col=as.numeric(factor(currentStrand, levels=strandValues)))        
    #}
    
    # Plot the curves
    plot(as.numeric(names(percentCumulatedReadsInCounts)), percentCumulatedReadsInCounts,type="s", main=paste("Proportion of remaining reads by threshold - ", suffixTitle, sep=""), ylab="Remaining reads (%)" , xlab="Threshold (pile height) for artefacts")
    
    # Plot the lines corresponding to selected thresholds
    .plotLineThreshold(thresholds, percentCumulatedReadsInCounts, rainbow(length(thresholds)))
    
    # Return the extracted and computed vectors that will be useful for the artefact removal steps 
    return(NULL)
}


############################################
# "getArtefactsIndexes" will check for the artefacts and return the indexes to remove FROM THE OBJECT USED FOR THE CALL 
# (has to be translated to original indexes if you send a subselection)
# If 'remove=TRUE' the function will return the object used for the call minus the reads concerned
# If 'remove=FALSE' the function will return the index to remove in the object alignedDataObject used for the call
# The second option is better for multithreading, specially for 'multicore' because it avoid copying all the data through the pipe.
# Text messages are concatenated and showed in a unique cat in case of multithreading, it helps not to mix all messages from different chromosomes.
# THE INPUT SHOULD NEVER BE A LIST OF AlignedData OBJECT SPLITTED BY CHROMOSOMES, IT MUST BE AN AlignedData OBJECT ALREADY SPLITTED : ONE CHROMOSOME AT A TIME)
getArtefactsIndexes <- function(alignedDataObject, expName, thresholdToUse=1, thresholdForStats=c(1:5,10,20,50,100), resultFolder=".")
{
    
    # Handle the pass-by-environment case
    if(is.environment(alignedDataObject))
    {
        alignedDataObject <- alignedDataObject$value
    }
    
    
    # Check the chromosomes information in the object
    currentChr <- unname(unique(seqnames(alignedDataObject)))
    
    if(length(currentChr)>1)
    {
        stop("getArtefactIndexes expects an AlignedData object with information on a unique chromosome (seqnames)... Please split your object before calling this function.")
    }
    
    cat(paste(currentChr, "... ", sep=""))
    
    # Basic tests on function arguments to exclude obvious misuses
    if(!(is.numeric(thresholdToUse) || is.numeric(thresholdForStats))) stop("The getArtefactsIndexes function requires at least a threshold to use or a vector of thresholds for stats on artefacts")
    if(length(thresholdToUse)>1) stop("The thresholdToUse argument of function getArtefactsIndexes must be an atomic numeric value, NA, or NULL")
    
    
    
    # Concatenate the thresholds to treat them all at the same time in loops
    thresholds <- as.numeric(unique(na.omit(c(thresholdToUse,thresholdForStats))))
    
    # Will receive all the intermediary or final information to be kept along the analysis or to be returned to calling environment
    listResults <- list()
    
    # Used for the loops
    #strandValues <- c("+","-")
    strandValues <- unname(unique(strand(alignedDataObject)))
    
    
    # Preparing the picture file and layout in which will be plotted the 5 figures (two for statistics on piles for each strand and the last one summarizing removed artefacts)
    skipPlotting <- FALSE
    if(length(strandValues)==2)
    {
        pdf(file=file.path(resultFolder, paste(expName, "_pileStats_",currentChr,".pdf",sep="")), width=10, height=10)
        layout(matrix(c(1,1,3,3,2,4,2,4,5,5,6,6), ncol=2, byrow=TRUE)) # Create a layout for figures organization
    } else if(length(strandValues)==1)
    {
        pdf(file=file.path(resultFolder, paste(expName, "_pileStats_",currentChr,".pdf",sep="")), width=10, height=10)
        layout(matrix(c(1:3), ncol=1)) # Create a layout for figures organization
    } else
    {
        skipPlotting <- TRUE
    }
    
    
    #### 1- This block will select the reads subpopulations (strands) and count their occurences by position (distribution of pile heights)
    for(currentStrand in strandValues)
    {
        indexSelectionFromTotal <- which( (seqnames(alignedDataObject)==currentChr) & (strand(alignedDataObject)==currentStrand) )
        
        # Plot the reads occupancy distribution and store the countings for each strand in the list
        # Getting the coordinates of the reads
        positionTAB <- position(alignedDataObject[indexSelectionFromTotal])
        listResults[[currentStrand]] <- list("positionTAB"=positionTAB, "positionCOUNT"=table(positionTAB))
        if(!skipPlotting)
        {
            .plotReadOccupancyStats(positionTAB, length(alignedDataObject[indexSelectionFromTotal]), thresholds=thresholds, suffixTitle=paste("strand '", currentStrand,"'",sep=""))
        }
        listResults[[currentStrand]][["indexSelectionFromTotal"]] <- indexSelectionFromTotal
    }
    
    # Eventual block of automatic 'selection of' threshold or 'fine tuning' based on the precomputed distributions (see previous block)
    # This is the main reason why the two loops are separated and why there is this mechanism of 'listResult' to keep information between loops
    
    #### 2- This block finds the actual artefacts indexes
    for(currentStrand in strandValues)
    {
        # Will help to keep trace of the threshold for the future lapplys
        names(thresholds) <- thresholds
        
        # Getting the coordinates which have more than 'threshold' reads aligned (and for which we should keep only one copy)
        #positionsRepeatedMoreThanThr=names(listResults[[currentStrand]][["positionCOUNT"]][listResults[[currentStrand]][["positionCOUNT"]]>threshold])
        #This one makes the job on several threshold, and return the result as a list, each element being the result of a threshold
        listResults[[currentStrand]][["positionsRepeatedMoreThanThr"]] <- lapply(lapply(lapply(thresholds,"<",listResults[[currentStrand]][["positionCOUNT"]]), function(x){return(listResults[[currentStrand]][["positionCOUNT"]][x])}),names)
        
        #messages <- paste(messages, "\n    listing unique IDs to remove... nb of positions concerned :", length(listResults[[currentStrand]][["positionsRepeatedMoreThanThr"]]))
        
        
        # Get index in PositionTAB of all reads that are repeated more than 'threshold' times (the ones we gonna REMOVE)
        #indexRepeatedInPositionTAB=(listResults[[currentStrand]][["positionTAB"]] %in% positionsRepeatedMoreThanThr[[as.character(threshold)]])
        #This one makes the job on several threshold, and return the result as a list, each element being the result of a threshold
        listResults[[currentStrand]][["indexRepeatedInPositionTAB"]] <- lapply(listResults[[currentStrand]][["positionsRepeatedMoreThanThr"]],function(positionsRepeatedMoreThanThr){return(listResults[[currentStrand]][["positionTAB"]] %in% positionsRepeatedMoreThanThr)})
        
        
        
        # In case of paired end, we cannot take a decision on which kind of read to keep among the ones for which both are in artefactual regions or not 
        # Indirectly, it avoids to take the risk of breaking the reads/mates symmetry
        if(!pairedEnds(alignedDataObject))
        {
            # Get index in PositionTAB of the the FIRST read matching each repeated position (the only one we gonna KEEP for each repeated position) 
            # indexFirstRepeated <- match(listResults[[currentStrand]][["positionsRepeatedMoreThanThr"]][[as.character(thresholdToUse)]], positionTAB)
            # Do it on all thresholds
            indexFirstRepeated <- lapply(listResults[[currentStrand]][["positionsRepeatedMoreThanThr"]], match, listResults[[currentStrand]][["positionTAB"]])
            
            # We want to keep at least one copy for the reads repeated more than thr, we have to remove all repeats BUT the first occurence for each concerned coordinate
            #indexRepeatedInPositionTAB[indexFirstRepeated] <- FALSE
            # Do it on all thresholds
            listResults[[currentStrand]][["indexRepeatedInPositionTAB"]] <- mapply(function(indexRepeatedInPositionTAB, indexFirstRepeated)
                    {
                        indexRepeatedInPositionTAB[indexFirstRepeated] <- FALSE 
                        return(indexRepeatedInPositionTAB)
                    },
                    listResults[[currentStrand]][["indexRepeatedInPositionTAB"]],
                    indexFirstRepeated, SIMPLIFY=FALSE)
            
        }
        
        #messages <- paste(messages, "\n    list to remove",ifelse(pairedEnds,":", "(keep the first occurence) :"), sum(indexRepeatedInPositionTAB))
        
        # Translating the indexes to the ones in the object having all reads (+ and -)
        listResults[[currentStrand]][["indexesToRemoveFromTotal"]] <- lapply(listResults[[currentStrand]][["indexRepeatedInPositionTAB"]], function(indexRepeatedInPositionTAB){return(listResults[[currentStrand]][["indexSelectionFromTotal"]][indexRepeatedInPositionTAB])})
    }
    
    
    #### 3- Once artefacts were computed on both strands, check mate pairs of reads that were considered as "to be removed"
    if(pairedEnds(alignedDataObject))
    {
        for(currentStrand in strandValues)
        {
            otherStrand <- strandValues[!strandValues %in% currentStrand]
            
            
            ### First in pair (FIP)
            # get the "to remove" reads marked as first in pair
#            firsInPairToRemoveFromTotal <- original_indexesToRemoveFromTotal[which(as.logical(bitAnd(64,flag[original_indexesToRemoveFromTotal])))] # first in pair
#            associatedMate <- firsInPairToRemoveFromTotal+1 # This works because we work on an object SORTED by pairs !!!
            # Do it on all thresholds
            FIPindex_in_indexesToRemoveFromTotal <- lapply(listResults[[currentStrand]][["indexesToRemoveFromTotal"]], function(indexesToRemoveFromTotal) {return(which(as.logical(bitAnd(64,flag(alignedDataObject)[indexesToRemoveFromTotal]))))})
            firstInPairToRemoveFromTotal <- mapply("[", listResults[[currentStrand]][["indexesToRemoveFromTotal"]], FIPindex_in_indexesToRemoveFromTotal, SIMPLIFY=FALSE)
            associatedMate <- lapply(firstInPairToRemoveFromTotal, "+", 1) # This works because we work on an object SORTED by pairs !!!
            
            
            # Get the indexes of reads that have their mate already considered as artefacts (for statistics)
            # We search it ON THE OPPOSITE STRAND because by definition pairs have reads on opposite strands
            alreadyConsideredArtefactFIPmate <- mapply("%in%", associatedMate, listResults[[otherStrand]][["indexesToRemoveFromTotal"]], SIMPLIFY=FALSE)
            
            
            ### Second in pair (SIP)
            # get the "to remove" reads marked as second in pair
#            secondInPairToRemoveFromTotal <- original_indexesToRemoveFromTotal[which(as.logical(bitAnd(128,flag[original_indexesToRemoveFromTotal])))] #second in pair
#            associatedMate <- secondInPairToRemoveFromTotal-1 # This works because we work on an object SORTED by pairs !!!
            # Do it on all thresholds
            SIPindex_in_indexesToRemoveFromTotal <- lapply(listResults[[currentStrand]][["indexesToRemoveFromTotal"]], function(indexesToRemoveFromTotal) {return(which(as.logical(bitAnd(128,flag(alignedDataObject)[indexesToRemoveFromTotal]))))})
            secondInPairToRemoveFromTotal <- mapply("[", listResults[[currentStrand]][["indexesToRemoveFromTotal"]], SIPindex_in_indexesToRemoveFromTotal, SIMPLIFY=FALSE)
            associatedMate <- lapply(secondInPairToRemoveFromTotal, "-", 1) # This works because we work on an object SORTED by pairs !!!
            
            # Get the indexes of reads that have their mate already considered as artefacts (for statistics)
            # We search it ON THE OPPOSITE STRAND because by definition pairs have reads on opposite strands
            alreadyConsideredArtefactSIPmate <- mapply("%in%", associatedMate, listResults[[otherStrand]][["indexesToRemoveFromTotal"]], SIMPLIFY=FALSE)
            
            
            ### Process them
            
            # Merge
            #listResults[[currentStrand]][["indexesToRemoveFromTotal_IsPairedArtefact"]] <- mapply(c,alreadyConsideredArtefactFIPmate,alreadyConsideredArtefactSIPmate)
            # Let's try to get them in the same order as 'indexesToRemoveFromTotal', first create a vector of FALSE and then put as TRUE the ones which are pairs (retranslate indexes back, from FIP/SIP to indexesToRemoveFromTotal using the index_in_indexesToRemoveFromTotal)
            listResults[[currentStrand]][["indexesToRemoveFromTotal_isPairedArtefact"]] <- lapply(listResults[[currentStrand]][["indexesToRemoveFromTotal"]], function(x){return(logical(length(x)))}) # Create a boolean vector all FALSE
            
            # FIP
            listResults[[currentStrand]][["indexesToRemoveFromTotal_isPairedArtefact"]] <- mapply(function(indexesToRemoveFromTotal_isPairedArtefact, alreadyConsideredArtefactFIPmate, FIPindex_in_indexesToRemoveFromTotal)
                    {
                        indexesToRemoveFromTotal_isPairedArtefact[FIPindex_in_indexesToRemoveFromTotal[alreadyConsideredArtefactFIPmate]] <- TRUE
                        return(indexesToRemoveFromTotal_isPairedArtefact)
                    },
                    listResults[[currentStrand]][["indexesToRemoveFromTotal_isPairedArtefact"]], alreadyConsideredArtefactFIPmate,FIPindex_in_indexesToRemoveFromTotal)
            
            # SIP
            listResults[[currentStrand]][["indexesToRemoveFromTotal_isPairedArtefact"]] <- mapply(function(indexesToRemoveFromTotal_isPairedArtefact, alreadyConsideredArtefactSIPmate, SIPindex_in_indexesToRemoveFromTotal)
                    {
                        indexesToRemoveFromTotal_isPairedArtefact[SIPindex_in_indexesToRemoveFromTotal[alreadyConsideredArtefactSIPmate]] <- TRUE
                        return(indexesToRemoveFromTotal_isPairedArtefact)
                    },
                    listResults[[currentStrand]][["indexesToRemoveFromTotal_isPairedArtefact"]], alreadyConsideredArtefactSIPmate, SIPindex_in_indexesToRemoveFromTotal)
            
            # This SHOULD NEVER HAPPEN, or this would reveal an algorithmic failure or a file sorting problem
            #if(!all(ArtefactWithOrphanMate %in% indexesToRemoveFromTotal)) stop("Inconsistency while searching for artefacts with orphan mate (some reads considered as artefacts with orphan mates don't appear in artefacts list before removal), check that your file is properly sorted, first in pair and second in pair should be interlaced")
            
            
            # These SHOULD NEVER BE TRUE, or this would reveal an algorithmic failure or a file sorting problem
            #if(any(orphanMateFromArtefact %in% indexesToRemoveFromTotal)) stop("Inconsistency while searching for orphan mate artefacts (the mate supposed orphan already appears in the artefacts list), check that your file is properly sorted, first in pair and second in pair should be interlaced")
            #if(any(duplicated(c(ArtefactWithOrphanMate,orphanMateFromArtefact,indexesToRemoveFromTotal)))) stop("Inconsistency while searching for artefacts with orphan mate (some artefacts seem to appear in several exclusive lists), check that your file is properly sorted, first in pair and second in pair should be interlaced")
            
        } # /currentStrand (+/-)
    } # /If pairedEnds check mates
    
    
    
    
    ### plotting stats about removed artefacts (by threshold) (5th plot, because the two first are made on each strand)
    if(length(strandValues)==2)
    {
        # Preparing to plot a back to back barplot (one per strand)
        top <- NULL
        bottom <- NULL
        
        # Plot artefacts statistics by thresholds
        if(pairedEnds(alignedDataObject))
        {
            # Top
            artefactToRemoveByThr <- sapply(listResults[["+"]][["indexesToRemoveFromTotal"]],length)
            pairedArtefactsByThr <- sapply(listResults[["+"]][["indexesToRemoveFromTotal_isPairedArtefact"]],sum)
            orphanArtefactToRemoveByThr <- artefactToRemoveByThr-pairedArtefactsByThr
            
            top <- rbind(pairedArtefactsByThr, orphanArtefactToRemoveByThr)
            rownames(top) <- c("Paired artefacts","Artefacts with orphan")
            
            # Bottom
            artefactToRemoveByThr <- sapply(listResults[["-"]][["indexesToRemoveFromTotal"]],length)
            pairedArtefactsByThr <- sapply(listResults[["-"]][["indexesToRemoveFromTotal_isPairedArtefact"]],sum)
            orphanArtefactToRemoveByThr <- artefactToRemoveByThr-pairedArtefactsByThr
            
            bottom <- rbind(pairedArtefactsByThr, orphanArtefactToRemoveByThr)
            rownames(bottom) <- c("Paired artefacts","Artefacts with orphan")
        }
        else
        {
            top <- sapply(listResults[["+"]][["indexesToRemoveFromTotal"]],length)
            bottom <- sapply(listResults[["-"]][["indexesToRemoveFromTotal"]],length)
        }
        
        .plotBackToBackBarplot(top, bottom, topLab="Positive strand (Reads)", bottomLab="Negative strand (Reads)", main="Artefacts distribution", cutLineColumn=1, outsideLayout=TRUE, plotLegend=pairedEnds(alignedDataObject), beside=FALSE)
    }
        
    if(!skipPlotting)
    {
        ### Finish plotting stats
        dev.off()
    }    
    
    ### Select only interesting results to report to global function (Try not to overflow memory especially for 'multithreading' because all the data will be serialized and passed through a pipe, serialization fails for too large objects) 
    
    # Select only results relative to information extracted from strands
    listResults <- listResults[strandValues]
    for(currentStrand in strandValues)
    {
        # select only indexesToRemoveFromTotal and indexesToRemoveFromTotal_isPairedArtefact
        listResults[[currentStrand]] <- listResults[[currentStrand]][c("indexesToRemoveFromTotal","indexesToRemoveFromTotal_isPairedArtefact")]
        # return only results about the thresholdToUse
        listResults[[currentStrand]] <- lapply(listResults[[currentStrand]],"[", as.character(thresholdToUse))
    }
    
    return(listResults)
    
}



############################################
# "estimateElongationSize" will return the most probable elongation size for the provided chromosomes in 'alignedDataObject'
estimateElongationSize <- function(alignedDataObject, expName, stepMin=50, stepMax=450, stepBy=10, averageReadSize=NA, resultFolder=".")
{
    
    # Handle the pass-by-environment case
    if(is.environment(alignedDataObject))
    {
        alignedDataObject <- alignedDataObject$value
    }
    
    
    # Check if there is one or several chromosomes provided in alignedDataObject
    chromosomeNames <- unname(unique(seqnames(alignedDataObject)))
    
    # List of file names created (if there is several chromosomes, it will be a list)
    resultingElongation <- NULL
    if(length(chromosomeNames)>1)
    {
        resultingElongation <- list()
    }
    
    
    for(currentChr in chromosomeNames)
    {
        
        cat(paste(currentChr, "... ", sep=""))
        
        #selection of the reads on strand +
        alignedDataPlus <- alignedDataObject[seqnames(alignedDataObject)==currentChr  &  strand(alignedDataObject)=="+"]
        
        #selection of the reads on strand -
        alignedDataMinus <- alignedDataObject[seqnames(alignedDataObject)==currentChr  &  strand(alignedDataObject)=="-"]
        
        
        # Piling the reads in 'per coordinate' values
        piledP <- .newPileup(position(alignedDataPlus), qwidth(alignedDataPlus))
        
        piledM <- .newPileup(position(alignedDataMinus), qwidth(alignedDataMinus))
        
        piledP <- as.numeric(piledP)
        piledM <- as.numeric(piledM)
        
        stepShift <- seq(stepMin,stepMax,stepBy)
        
        # Computing the overlap score for each shifting step
        res <- elongationEstimation(piledP,piledM,stepShift)
        
        maxScoredShift <- stepShift[which.max(res)]
        
        pdf(file=file.path(resultFolder, paste(expName, "_extensionEstimation_",currentChr,"_",maxScoredShift+(averageReadSize),".pdf",sep="")))
        
        if(is.na(averageReadSize)) # in case a global value has not been provided, get it from the current chromosome
        {
            averageReadSize <- trunc(mean(qwidth(alignedDataObject)))
        }
        
        plot(x=stepShift,y=res,main=paste("Chromosome",currentChr,"size :",maxScoredShift+(averageReadSize),sep=" "))
        abline(v=maxScoredShift)
        
        dev.off()
        
        # Stores the score of the current chromosome in this tab indexed by name of the chromosome
        elongationSizeEstimation <- maxScoredShift+(averageReadSize)
        
        rm(alignedDataPlus, alignedDataMinus, piledP, piledM, res)
        invisible(gc())
        
        
        if(length(chromosomeNames)>1)
        {
            resultingElongation[[currentChr]] <- elongationSizeEstimation
        }
        else
        {
            resultingElongation <- elongationSizeEstimation
        }
    }
    
    return(resultingElongation)
    
}



############################################
# "generatePiled" will generate a vector of scores for Coordinates spanning the chromosome(s) provided in 'alignedDataObject'
# It can handle several situations, basically all possible combination of : Paired-end/Single-end, UniReads/MultiReads (weigthed), MidPoint/Normal piling, Elongation/No Elongation, ReadSize Specified/Not Specified 
generatePiled <- function(alignedDataObject, elongationSize, averageReadSize, midPoint=FALSE)
{
    
    # Handle the pass-by-environment case
    if(is.environment(alignedDataObject))
    {
        alignedDataObject <- alignedDataObject$value
    }
    
    
    if(!((is.numeric(elongationSize) && elongationSize>=0 && length(elongationSize)==1) || is.na(elongationSize))) stop("elongationSize parameter must be an atomic positive number or NA value")
    
    if(!is.na(elongationSize))
    {
        if(pairedEnds(alignedDataObject) && (elongationSize!=0))
        {
            warning("A non-zero shifting (elongation) value has been specified to pileup module but the data is claimed to be paired ends, ignoring parameter (NA expected). Consider declaring your data as single-ended before asking for a custom elongation size.")
            elongationSize <- NA
        }
    }
    else if(!pairedEnds(alignedDataObject))
    {
        stop("A non valid elongation has been specified for a dataset declared as single-ended experiment (NA instead of positive or 0 value expected)")    
    }
    
    # Check if there is one or several chromosomes provided in alignedDataObject
    chromosomeNames <- unname(unique(seqnames(alignedDataObject)))
    
    # List of file names created (if there is several chromosomes, it will be a list)
    resultingPiled <- NULL
    if(length(chromosomeNames)>1)
    {
        resultingPiled <- list()
    }
    
    
    for(currentChr in chromosomeNames)
    {
        alnCurrentChrom <- alignedDataObject[seqnames(alignedDataObject)==currentChr]
        
        cat(paste(currentChr, " (",length(alnCurrentChrom)," reads)","... ", sep=""))
        
        #cat("\n Piling chromosome :", currentChr, "- reads number :",length(alnCurrentChrom), "-", ifelse(is.na(elongationSize),"automatic",paste(elongationSize,"bp")), ifelse(midPoint,"shifting","extension"),"...")
        
        # Removing one read of the pair to avoid double representation of the signal while elongating/shifting
        #if(pairedEnds(alnCurrentChrom) && is.na(elongationSize))
        #{
        #    alnCurrentChrom <- alnCurrentChrom[strand(alnCurrentChrom)=="+"]
        #}
        
        
        ### Set readLengthParam value according to what information is given (Transparently handles multiread by taking a decision on readLength param)
        readLengthParam <- NULL
        
        if(length(qwidth(alnCurrentChrom))>0) # If provided in the object
        {
            readLengthParam <- qwidth(alnCurrentChrom) # Use the real one
        }
        else
        {
            if(!is.numeric(averageReadSize) && averageReadSize>0 && (length(averageReadSize)==1)) 
            {
                stop("No information about read length in object and no valid average read size specified (positive number)")
            }
            readLengthParam <- averageReadSize
        }
        
        
        ### Set fragLengthParam value and do specific treatment according to what information is given
        fragLengthParam <- NULL
        
        
        
        if(midPoint)
        {
            fragLengthParam <- 1
            
            if(pairedEnds(alnCurrentChrom) && is.na(elongationSize)) # Shifting reads by isize/2
            {
                # We only have one read per pair left, the one on the positive strand because we removed the other one to avoid double representation of the signal
                position(alnCurrentChrom) <- as.integer(position(alnCurrentChrom)+trunc(isize(alnCurrentChrom)/2))
                
                #position(alnCurrentChrom[strand(alnCurrentChrom)=="+"])=position(alnCurrentChrom[strand(alnCurrentChrom)=="+"])+trunc(isize(alnCurrentChrom[strand(alnCurrentChrom)=="+"])/2)
                #position(alnCurrentChrom[strand(alnCurrentChrom)=="-"])=position(alnCurrentChrom[strand(alnCurrentChrom)=="-"])-trunc(isize(alnCurrentChrom[strand(alnCurrentChrom)=="-"])/2)
            }
            #else # either (SE & whatever elongation) or (PE & elongation 0) because PE and elongation!=0 ignored at the beginning of the function
            else if((!pairedEnds(alnCurrentChrom)) && (elongationSize!=0)) # Limit it to SE and no need to go here for elongation/shifting 0 (elongationSize can't be NA here because of the tests @ the beginning of the function)
            {
                position(alnCurrentChrom) <- as.integer(position(alnCurrentChrom)+ifelse(strand(alnCurrentChrom)=="+",trunc(elongationSize/2), -trunc(elongationSize/2)))
                #position(alnCurrentChrom[strand(alnCurrentChrom)=="+"])=as.integer(position(alnCurrentChrom[strand(alnCurrentChrom)=="+"])+trunc(elongationSize/2))
                #position(alnCurrentChrom[strand(alnCurrentChrom)=="-"])=as.integer(position(alnCurrentChrom[strand(alnCurrentChrom)=="-"])-trunc(elongationSize/2))
            }
        }
        else
        {
            fragLengthParam <- readLengthParam
            
            if(pairedEnds(alnCurrentChrom) && is.na(elongationSize)) 
            {
                fragLengthParam <- isize(alnCurrentChrom)
            }
            else if((!pairedEnds(alnCurrentChrom)) && (elongationSize!=0))
            {
                fragLengthParam <- elongationSize
            }
        }
        
        
        
        if(is.null(fragLengthParam) || is.null(readLengthParam)) 
        {
            stop("The piling module failed to choose a behaviour from data information, please check that you give all necessary informations (see documentation)")
        }
        
        ### Setting weight parameter as 1 if not specified
        weightParam <- if(length(weight(alnCurrentChrom))>0) weight(alnCurrentChrom) else rep(1.0, length(alnCurrentChrom))
        
        ### PILING
        
        # In case of elongation size specified, it overrides the readLength parameter
        # readLEngth is just used to shift the start coordinate for negative strands
        # so it's better to put the read size when elongationsize=0 but it's still 
        # possible to put less in case needed (nucelosomes start for instance)
        
        
#        piledRLE = .newPileup(    start=position(alnCurrentChrom),
#                                fragLength=fragLengthParam,
#                                dir=strand(alnCurrentChrom),
#                                readLength=qwidth(alnCurrentChrom))
        
        
        piledRLE <- Rle(pileupDouble(start=position(alnCurrentChrom),
                        fragLength=fragLengthParam,
                        dir=strand(alnCurrentChrom),
                        readLength=readLengthParam, 
                        weight=weightParam))
        
        # Set the chromosome information in the Rle object metadata in order to be able to retrieve it in further processing (writing files)    
        metadata(piledRLE) <- list("chr"=currentChr)
        
        # Setting the two vectors at the same size
#        if(length(piled)>length(piledMulti))
#        {
#            length(piledMulti)=length(piled)
#            piledMulti[is.na(piledMulti)]=0
#        }
#        else
#        {
#            length(piled)=length(piledMulti)
#            piled[is.na(piled)]=0
#        }
#
#        # Adding the score of uni- and multi- reads
#        piledRLE=Rle(piled+piledMulti)
        
        
        
        
        if(length(chromosomeNames)>1)
        {
            resultingPiled[[currentChr]] <- piledRLE
        }
        else
        {
            resultingPiled <- piledRLE
        }
    }
    
    return(resultingPiled)
    
}








.writeWIGvs_chr <- function(piledRleData, baseFileName, resultFolder=".", compatibilityOutputWIG=FALSE)
{
    
    currentChr <- metadata(piledRleData)[["chr"]]
    
    cat(paste(currentChr, "... ", sep=""))
    
    # Creating the file and writing the track descriptions
    outputFileName <- file.path(resultFolder, paste("TEMP_WIGvs_", baseFileName, ".", currentChr, sep=""))
    
    # Binary mode to avoid conversion of \n for specific platforms
    fileCon <- file(outputFileName, open="wb")

    # Repeat track line for every chromosome only in compatibility mode (non standard WIG format) 
    if(compatibilityOutputWIG)
    {
        trackLine <- paste("track type=wiggle_0 name=\"", baseFileName, "\"", sep="")
        writeLines(trackLine, con=fileCon, sep="\n")
    }
    
    descLine <- paste("variableStep chrom=", currentChr, sep="")
    writeLines(descLine, con=fileCon, sep="\n")
    
    # Using the RLE to compute the coordinates for variable steps
    
    # Modify the rle to coordinates by summing it and adjust to make it fit (+1)
    startCoord <- diffinv(runLength(piledRleData))+1
    startCoord <- format(startCoord[1:(length(startCoord)-1)], scientific=FALSE, trim=TRUE)
    score <- format(runValue(piledRleData), scientific=FALSE, digits=10, trim=TRUE, drop0trailing = TRUE)
    
    # Writing to file
    writeLines(paste(startCoord, score, sep=" "), con=fileCon, sep="\n")
    
    close(fileCon)
    
    
    resultingFiles <- outputFileName
    
    return(resultingFiles)
    
}


.writeBED_chr <- function(piledRleData, baseFileName, resultFolder=".")
{
    
    currentChr <- metadata(piledRleData)[["chr"]]
    
    cat(paste(currentChr, "... ", sep=""))
    
    # Creating the file and writing the track descriptions
    outputFileName <- file.path(resultFolder, paste("TEMP_BED_", baseFileName, ".", currentChr, sep=""))
    
    # Binary mode to avoid conversion of \n for specific platforms
    fileCon <- file(outputFileName, open="wb")

    
    # Using the RLE to compute the coordinates for variable steps
    
    # Modify the rle to coordinates by summing it (BEDgraph coordinates are O-based as opposed to wiggle ones)
    coordBase <- diffinv(runLength(piledRleData))
    endCoord <- format(coordBase[2:(length(coordBase))], scientific=FALSE, trim=TRUE)
    startCoord <- format(coordBase[1:(length(coordBase)-1)], scientific=FALSE, trim=TRUE)
    score <- format(runValue(piledRleData), scientific=FALSE, digits=10, trim=TRUE, drop0trailing = TRUE)
    
    # Writing to file
    writeLines(paste(currentChr, startCoord, endCoord, score, sep=" "), con=fileCon, sep="\n")
    
    close(fileCon)
    
    
    resultingFiles <- outputFileName
    
    return(resultingFiles)
    
}



.writeWIGfs_chr <- function(binnedDataRle, baseFileName, binSize, resultFolder=".", compatibilityOutputWIG=FALSE)
{
    
    currentChr <- metadata(binnedDataRle)[["chr"]]
    
    cat(paste(currentChr, "... ", sep=""))
    
    # Formatting numbers to remove trailing 0s and avoid scientific notation
    formattedScores <- format(as.numeric(binnedDataRle), scientific=FALSE, digits=10, trim=TRUE, drop0trailing = TRUE)
    
    # Creating the file and writing the track descriptions
    outputFileName <- file.path(resultFolder, paste("TEMP_WIGfs_", baseFileName, ".", currentChr, sep=""))
    
    # Binary mode to avoid conversion of \n for specific platforms
    fileCon <- file(outputFileName, open <- "wb")
    
    # Repeat track line for every chromosome only in compatibility mode (non standard WIG format) 
    if(compatibilityOutputWIG)
    {
        trackLine <- paste("track type=wiggle_0 name=\"", baseFileName, "\"", sep="")
        writeLines(trackLine, con=fileCon, sep="\n")
    }
    
    descLine <- paste("fixedStep chrom=", currentChr, " start=", trunc(binSize/2), " step=", binSize, sep="")
    writeLines(descLine, con=fileCon, sep="\n")
    
    # Writing the binned values
    writeLines(formattedScores, con=fileCon, sep="\n")
    
    close(fileCon)
    
    
    resultingFiles <- outputFileName
    
    return(resultingFiles)
    
}


.writeGFF_chr <- function(binnedDataRle, baseFileName, binSize, resultFolder=".")
{
    currentChr <- metadata(binnedDataRle)[["chr"]]
    
    cat(paste(currentChr, "... ", sep=""))
    
    # Formatting numbers to remove trailing 0s and avoid scientific notation
    formattedScores <- format(as.numeric(binnedDataRle), scientific=FALSE, digits=10, trim=TRUE, drop0trailing = TRUE)
    
    # Creating the file and writing the track descriptions
    outputFileName <- file.path(resultFolder, paste("TEMP_GFF_", baseFileName, ".", currentChr, sep=""))
    
    # Binary mode to avoid conversion of \n for specific platforms
    fileCon <- file(outputFileName, open="wb")
    
    coord <- seq(0, by=binSize, length.out=length(formattedScores))
    
    # Writing the GFF lines (alternative with blocks spanning all the chr)
    #writeLines(paste(currentChr, expName, ".", format(coord, scientific=FALSE, trim=TRUE), format(coord+binSize-1, scientific=FALSE, trim=TRUE), piledBinnedFormatted, ".", ".", format(1:length(piledBinnedFormatted), scientific=FALSE, trim=TRUE), sep="\t"), con=fileCon, sep="\n")
    
    # Writing the GFF lines
    writeLines(paste(currentChr, baseFileName, ".", format(coord+trunc(binSize/2), scientific=FALSE, trim=TRUE), format(coord+trunc(binSize/2), scientific=FALSE, trim=TRUE), formattedScores, ".", ".", paste(currentChr,"_",format(1:length(formattedScores), scientific=FALSE, trim=TRUE), sep=""), sep="\t"), con=fileCon, sep="\n")
    
    close(fileCon)
    
    
    resultingFiles <- outputFileName
    
    return(resultingFiles)
    
}




############################################
# ".readTextFile" will be used to extend the number of file types that readAligned can handle
# It will be used by the function 'readAlignedFiles' above to be able to read GFF and BED files
# It returns an 'AlignedRead' (ShortRead) object with as much information as could be found in the file/arguments
# Default arguments defined for basic BED files.
# If there is no SEQ colum (NA) it will generate a 'fake' sequence filled with Ns based on the size ('end-begin' if no fragmentLength provided)
# columnsIndexes=c(chr=1, begin=2, end=6, strand=5, seq=NA, NULL=4, NULL=3)
.readTextFile <- function(fileName, chrToSelect=NA, columnsIndexes=c(chr=1, begin=2, end=3, strand=4, seq=NA), separator=c(" ", "\t"), positiveStrand=c("F", "1",  "+"), negativeStrand=c("R", "-1", "-"), readLength=NA, chunckSize=(2^19))
{
    
    if(is.na(readLength) && (sum(is.na(columnsIndexes[c("begin", "end")]))>=1) && is.na(columnsIndexes["seq"])) warning("Input : There is no sequence column in the input file and not enough information to get the reads sizes (begin and end column or 'readLength' argument) --> your reads will be assumed to have length 0... very unlikely to be true...")
    
    seq <- list()
    chr <- list()
    position <- list()
    strand <- list()
    
    # Will specify to scan the type of the data columns
    # Must be in the order specified in columnsIndexes (names MUST match !!!)
    myWhat <- list(chr=character(), begin=integer(), end=integer(), strand=character(), seq=character(), NULL=NULL)[names(sort(columnsIndexes))]
    
    con <- file(fileName, open="rt")
    
    # Reading the file by chunks
    while(length((currentRead <- scan(file=con, nmax=chunckSize, what=myWhat)))>0)
    {
#cat(".")
        
        print("1")
        
        chrInSelection <- rep(TRUE,chunckSize)
        
        print("2")
        # Remove the prefix and suffix of the chromosomes
        #currentRead[["chr"]]=gsub(paste(chrPrefix, chrSuffix, sep="|"), "", currentRead[["chr"]]) # Done later now
        print("3")
        if(!is.na(chrToSelect))
        {
            # Select the reads on the desired chromosome
            chrInSelection <- (currentRead[["chr"]] %in% chrToSelect)
        }
        
        if(any(chrInSelection))
        {
            print("4")
            chr <- c(chr,currentRead[["chr"]])
            print("5")
            # Begin and End columns are separated so we can be sure to take the minimum (left) as position of the read (as assumed by readAligned)
            # Even if the strand orientation affect in which wolumn the begin and end are
            beginColumn  <-  as.integer(currentRead[["begin"]])
            endColumn    <-  as.integer(currentRead[["end"]])
            print("6")
            # This will work even if there is no begin or no end column (na.rm)
            leftCoord   <-  mapply(min, beginColumn, endColumn, na.rm=TRUE)
            rightCoord  <-  mapply(max, beginColumn, endColumn, na.rm=TRUE)
            print("7")
            position <- c(position, leftCoord)
            print("8")
            strand <- c(strand,currentRead[["strand"]])
            print("9")
            # If there was no column for the sequence (NA), it will fill it with Ns (nb based on readLength or coordinates)
            if(is.na(columnsIndexes["seq"]))
            {
                seqLengthTAB <- if(is.na(readLength)) (rightCoord-leftCoord) else rep(readLength, sum(chrInSelection))
                print("10")
                # Make a list of unique "N" character and repeat them using seqLengthTAB in mapply, then collapse all Ns for each element using sapply
                replacementSeqs <- DNAStringSet(mapply(function(x,seqLength){paste(rep(x,seqLength),collapse="")},"N", seqLengthTAB))
                print("11")
                seq <- c(seq, replacementSeqs)
                print("12")
            }
            else
            {
                seq <- c(seq,currentRead[["seq"]])
            }
        }
        
    } # /WHILE READLINES
    
    close(con)
    
    seq <- do.call(c, seq)
    chr <- do.call(c, chr)
    position <- do.call(c, position)
    strand <- do.call(c, strand)
    
    # Converting various strand names that can be encountered to the standard factor with appropriate levels
    strand[strand %in% positiveStrand] <- "+"
    strand[strand %in% negativeStrand] <- "-"
    strand <- factor(strand, levels=levels(strand()))
    
    return(ShortRead::AlignedRead(sread=seq, seqnames=as.factor(paste("chr", chr, sep="")), position=position, strand=strand))
}

#system.time((a=.readTextFile()))

############################################
# SAVED COPY
.readTextFile <- function(fileName, chrPrefix="chr", chrSuffix="", chrToSelect=NA, columnsIndexes=c("chr"=1, "begin"=2, "end"=3, "strand"=4, "seq"=NA), separator=c(" ", "\t"), positiveStrand=c("F", "1",  "+"), negativeStrand=c("R", "-1", "-"), readLength=NA, chunckSize=(2^16))
{
    
    if(is.na(readLength) && (sum(is.na(columnsIndexes[c("begin", "end")]))>=1) && is.na(columnsIndexes["seq"])) warning("Input : There is no sequence column in the input file and not enough information to get the reads sizes (begin and end column or 'readLength' argument) --> your reads will be assumed to have length 0... very unlikely to be true...")
    
    seq <- list()
    chr <- list()
    position <- list()
    strand <- list()
    
    con <- file(fileName, open="rt")
    
    # Reading the file by chunks
    while(length((currentLine <- readLines(con=con, n=chunckSize)))>0)
    {
#cat(".")
        chrInSelection <- rep(TRUE,chunckSize)
        
        # Split the line by separators and get them as data.frame or matrix
        splittedLine <- do.call(rbind,strsplit(currentLine, paste("[",paste(separator, collapse="|"), "]", sep="")))
        
        # Remove the prefix and suffix of the chromosomes
        splittedLine[,columnsIndexes["chr"]] <- gsub(paste(chrPrefix, chrSuffix, sep="|"), "", splittedLine[,columnsIndexes["chr"]])
        
        if(!is.na(chrToSelect))
        {
            # Select the reads on the desired chromosome
            chrInSelection <- (splittedLine[,columnsIndexes["chr"]] %in% chrToSelect)
        }
        
        if(any(chrInSelection))
        {
            chr <- c(chr,splittedLine[chrInSelection,columnsIndexes["chr"]])
            
            # Begin and End columns are separated so we can be sure to take the minimum (left) as position of the read (as assumed by readAligned)
            # Even if the strand orientation affect in which wolumn the begin and end are
            beginColumn  <-  as.integer(splittedLine[chrInSelection,columnsIndexes["begin"]])
            endColumn    <-  as.integer(splittedLine[chrInSelection,columnsIndexes["end"]])
            
            # This will work even if there is no begin or no end column (na.rm)
            leftCoord   <-  mapply(min, beginColumn, endColumn, na.rm=TRUE)
            rightCoord  <-  mapply(max, beginColumn, endColumn, na.rm=TRUE)
            
            position <- c(position, leftCoord)
            
            strand <- c(strand,splittedLine[chrInSelection,columnsIndexes["strand"]])
            
            # If there was no column for the sequence (NA), it will fill it with Ns (nb based on readLength or coordinates)
            if(is.na(columnsIndexes["seq"]))
            {
                seqLengthTAB <- if(is.na(readLength)) (rightCoord-leftCoord) else rep(readLength, sum(chrInSelection))
                # Make a list of unique "N" character and repeat them using seqLengthTAB in mapply, then collapse all Ns for each element using sapply
                replacementSeqs <- DNAStringSet(mapply(function(x,seqLength){paste(rep(x,seqLength),collapse="")},"N", seqLengthTAB))
                seq <- c(seq, replacementSeqs)
            }
            else
            {
                seq <- c(seq,splittedLine[chrInSelection,columnsIndexes["seq"]])
            }
        }
        
    } # /WHILE READLINES
    
    close(con)
    
    seq <- do.call(c, seq)
    chr <- do.call(c, chr)
    position <- do.call(c, position)
    strand <- do.call(c, strand)
    
    # Converting various strand names that can be encountered to the standard factor with appropriate levels
    strand[strand %in% positiveStrand] <- "+"
    strand[strand %in% negativeStrand] <- "-"
    
    strand <- factor(strand, levels=levels(strand()))
    
    return(ShortRead::AlignedRead(sread=seq, seqnames=as.factor(paste("chr", chr, sep="")), position=position, strand=strand))
}




############################################################################################################################################
##### This is the main function of the pipeline, calling all the other ones
############################################################################################################################################
#
processPipeline <- function(
        # I/O GENERAL PARAMETERS
        INPUTFilesList,
        resultSubFolder               = "Results_Pasha",
        reportFilesSubFolder          = ifelse(length(resultSubFolder)>1,resultSubFolder[2], "ReportFiles"),
        WIGfs                         = TRUE,
        WIGvs                         = FALSE,
        GFF                           = FALSE,
        BED                           = FALSE,
		BIGWIG						  = FALSE,
        compatibilityOutputWIG        = FALSE,
        # COMPLEX PARAMETERS (ATOMIC OR VECTORS OR LIST OF IT)
        incrArtefactThrEvery          = 7000000,
        binSize                       = 50,
        elongationSize                = NA,
        rangeSelection                = IRanges(0,-1), 
        annotationFilesGFF            = NA, # GFF files
        annotationGenomeFiles         = NA, # path to file or "mm9", "hg19"... 
        # SINGLE PARAMETERS
        elongationEstimationRange     = c(mini=150, maxi=400, by=10),
        rehabilitationStep            = c("orphans","orphansFromArtefacts"),
        removeChrNamesContaining      = "random|hap",
        ignoreInsertsOver             = 500,
        nbCPUs                        = 1,
        keepTemp                      = TRUE, # Keep the intermediary files that led to the final ones (rehab and multi)
        logTofile                     = "./log.txt",
        eraseLog                      = FALSE,
        # LIST PARAMETERS (one element per expName)
        multiLocFilesList             = list()) # A list with experiments names and associated filenames to treat
{
    
    # Checking main arguments
    if(!all(c(is.logical(WIGfs), is.logical(WIGvs), is.logical(GFF), is.logical(BED), is.logical(BIGWIG), is.logical(compatibilityOutputWIG)))) stop("Arguments 'WIGfs', 'WIGvs', 'GFF', 'BED', 'BIGWIG', and 'compatibilityOutputWIG' must be logical (and not NA)...")
    
    if(any(c(is.na(WIGfs),is.na(WIGvs),is.na(GFF),is.na(BED), is.na(BIGWIG), is.na(compatibilityOutputWIG)))) stop("Arguments 'WIGfs', 'WIGvs', 'GFF', 'BED', 'BIGWIG', and 'compatibilityOutputWIG' cannot be NA...")
    
    if(compatibilityOutputWIG)
    {
        warningMessage <- "Argument 'compatibilityOutputWIG' is used for compatibility with previous versions only and generates non-standard WIG files (repeated track line), please consider changing this argument to FALSE..."
        cat("\nWARNING :", warningMessage)
        warning(warningMessage)
    }
    
    if(!is.character(rehabilitationStep)) stop("Argument rehabilitationStep must be a character vector (empty or a vector of values in 'orphans' and 'orphansFromArtefacts')")
    if(length(rehabilitationStep)>0)
    {
        if(!all(rehabilitationStep %in% c("orphans","orphansFromArtefacts"))) stop("Argument rehabilitationStep must be a character vector (empty or a vector of values in 'orphans' and 'orphansFromArtefacts')")    
    }
    
    if(!is.na(ignoreInsertsOver))
    {
        if(!(is.numeric(ignoreInsertsOver) && (length(ignoreInsertsOver)==1) && (ignoreInsertsOver>0))) stop("Argument ignoreInsertsOver must be a strictly positive number or NA")
    }
    
    if(!(is.numeric(elongationEstimationRange) && (length(elongationEstimationRange)==3) && all(names(elongationEstimationRange) %in% c("mini","maxi","by")) && all(elongationEstimationRange>0))) stop("Argument elongationEstimationRange must be a named vector of 3 strictly positive numeric values, named 'mini', 'maxi', 'by'")
    
    if(!(is.character(removeChrNamesContaining) && (length(removeChrNamesContaining)==1))) stop("Argument removeChrNamesContaining must be an atomic (length==1) character string (empty or not)")
    
    if(!(is.logical(keepTemp) && (length(keepTemp)==1))) stop("Argument keepTemp must be an atomic logical value")
    
    if(!(is.logical(eraseLog) && (length(eraseLog)==1))) stop("Argument eraseLog must be an atomic logical value")
    
    if(is.numeric(nbCPUs) && (length(nbCPUs)==1))
    {
        options("mc.cores"=nbCPUs)
        # Taking advantage of setting options for parallel to encapsulate mclapply and mcmapply in a wrapper that automatically desactivate the prescheduling
        mclapply <- function(X, FUN,...) {parallel::mclapply(X, FUN,..., mc.preschedule=FALSE)}
        mcmapply <- function(FUN,...) {parallel::mcmapply(FUN,..., mc.preschedule=FALSE)}
    } else
    {
        stop("nbCPUs argument must be a strictly positive number")
    }
    
    # Checking files description and arguments
    
    inputMandatoryInfo <- c("expName", "fileName", "fileType")
    inputOptionalInfo <- c("chrPrefix", "chrSuffix", "pairedEnds", "midPoint")
    inputOptionalInfo_Default <- c("chrPrefix"="", "chrSuffix"="", "pairedEnds"=FALSE, "midPoint"=FALSE)
    
    # Reading files to treat from a list file
    if(is.character(INPUTFilesList))
    {
        # in case the user provided a file name in which experiments parameters are described
        if(file.exists(INPUTFilesList))
        {
            # Read the file and format its info as a list for processing
            expListFromFile <- read.table(INPUTFilesList, header=TRUE, quote="", sep="\t", stringsAsFactors=FALSE)
            
            # Check that minimal information is present
            if(!all(inputMandatoryInfo %in% colnames(expListFromFile)))
            {
                stop(paste("The input file listing the experiments lacks some mandatory information : ",paste(inputMandatoryInfo, collapse=", "),sep=""))        
            }
            
            # Complete missing columns with default values
            for(currentOption in inputOptionalInfo)
            {
                if(!currentOption %in% colnames(expListFromFile))
                {
                    expListFromFile <- cbind(expListFromFile, currentOption=inputOptionalInfo_Default[currentOption])
                }
            }
            
            # Create a list with filename (used to mapply) and named by experiment names
            INPUTFilesList <- as.list(expListFromFile[["fileName"]])
            names(INPUTFilesList) <- expListFromFile[["expName"]]
            
            # Create the final list with all checked arguments (keeping the experiments names)
            INPUTFilesList <- mapply(function(fileName, fileType, chrPrefix, chrSuffix, pairedEnds, midPoint)
                    {
                        return(list(folderName=dirname(fileName), fileName=basename(fileName), fileType=fileType, chrPrefix=chrPrefix , chrSuffix=chrSuffix, pairedEnds=pairedEnds, midPoint=midPoint))
                    }, 
                    INPUTFilesList, 
                    expListFromFile[["fileType"]], 
                    expListFromFile[["chrPrefix"]], 
                    expListFromFile[["chrSuffix"]], 
                    expListFromFile[["pairedEnds"]], 
                    expListFromFile[["midPoint"]], 
                    SIMPLIFY=FALSE)
        } else
        {
            stop("The file describing experiments does not exist !")
        }
    }
    
    
    # check that the list (either read from file or passed directly as argument) has all mandatory fields
    if(!all(sapply(INPUTFilesList, function(x){c("folderName", "fileName", "fileType", "chrPrefix", "chrSuffix", "pairedEnds", "midPoint") %in% names(x)})))
    {
        stop("Your input file list does not contain all required names. It should contain the following named elements:'folderName', 'fileName', 'fileType', 'chrPrefix', 'chrSuffix', 'pairedEnds', 'midPoint'")
    }
    
    
    
    # Checking that experiment names are all different
    if(any(duplicated(names(INPUTFilesList)))) stop("All experiment names must be different in order to avoid confusing returned result. In case one really wants it, the function should be called twice...")
    
    # Checking if files to process exist
    if(!all(sapply(INPUTFilesList, function(x){file.exists(file.path(x$folderName, x$fileName))}))) stop("At least one filename doesn't refer to an existing file to process...")
    # Same but adapted to regular expression pattern (accept gziped files for instance) this is better since the readAligned function does use regular expression pattern
    # -- Lately te regular expression from readaligned are ignored thanks to "^" and "$" added in the pattern, it avoid to load files with approaching names without knowing it...
    #if(!all(sapply(INPUTFilesList, function(x){length(dir(x$folderName, pattern=x$fileName))==1}))) stop("At least one filename pattern doens't refer to an existing file to process or refers to several files")
    
    # Checking that chrPrefix and chrSuffix are characters tabs of length 1
    if(!all(sapply(INPUTFilesList, function(x){return(is.character(x$chrPrefix) && length(x$chrPrefix)==1)}))) stop("At least one experiment has not chrPrefix defined as atomic character value...")
    if(!all(sapply(INPUTFilesList, function(x){return(is.character(x$chrSuffix) && length(x$chrSuffix)==1)}))) stop("At least one experiment has not chrSuffix defined as atomic character value...")
    
    # Checking that PairedEnds, midPoint and dontSort are boolean tabs of length 1
    if(!all(sapply(INPUTFilesList, function(x){return(is.logical(x$pairedEnds) && length(x$pairedEnds)==1)}))) stop("At least one experiment has invalid 'pairedEnds' value (atomic logical expected)...")
    if(!all(sapply(INPUTFilesList, function(x){return(is.logical(x$midPoint) && length(x$midPoint)==1)}))) stop("At least one experiment has invalid 'midPoint' value (atomic logical expected)...")
    #if(!all(sapply(INPUTFilesList, function(x){return(is.logical(x$dontSort) && length(x$dontSort)==1)}))) stop("At least one experiment has invalid 'dontSort' value (atomic logical expected)...")
    
    if(is.list(multiLocFilesList) && (length(is.list(multiLocFilesList))>0))
    {
        # Checking if files for multiLoc also exist
        if(!all(sapply(multiLocFilesList, file.exists))) stop("At least one of the multiLoc file specified does not exist or is not readable...")
        
        # Checking if files for multiLoc actually refer to an existing experiment !
        if(!all(names(multiLocFilesList) %in% names(INPUTFilesList))) stop("At least one of the multiLoc files list does refer to an experiment name that does not exist in the experiments files list...")
    }
    
    
    #### CHECKING AND REFORMATING IMPORTANT OR COMPLEX PARAMETERS
    # Complex parameters are the ones which can be defined in several ways
    # These ones can be either an atomic value, a vector of values, or a list of previous choices sharing the same names as INPUTFilesList (one per experiment)
    # The '.checkComplexParam' function transforms atomic values or vectors to a final list repeating these parameters for each expriment
    # It will also check for structure (experiment names) and values consistency (validity function) 
    
    incrArtefactThrEvery <- .checkComplexParam(incrArtefactThrEvery, "incrArtefactThrEvery", INPUTFilesList) # NA for no artefact removal, otherwise >=0 for estimation of the threshold based on the number of reads in the experiment, negative value for direct specification of the threshold...
    
    binSize <- .checkComplexParam(binSize, "binSize", INPUTFilesList, validValuesFct=function(x){return( (is.numeric(x)) && (length(x)>0) && all(x>0) )})
    
    elongationSize <- .checkComplexParam(elongationSize, "elongationSize", INPUTFilesList, validValuesFct=function(x){return( ((is.numeric(x) && all(x>=0)) || is.na(x)) && (length(x)>0)) }) # NA for automatic estimation or 0 for no elongation, no negative
    
    rangeSelection <- .checkComplexParam(rangeSelection, "rangeSelection", INPUTFilesList, validValuesFct=function(x){return( ((class(x)=="IRanges") && (length(x)>0) && all(start(x)>=0)) || (is.numeric(x) && (length(x)==1) && (x>1)) )})
    
    # Two validation functions that are a bit more complex are defined outside the call
    .validateAnnotationsFileNames <- function(x)
    {
        # check format
        if(length(x)<1) return(FALSE)
        
        if(length(x)==1 && is.na(x)) return(TRUE)
        # Checking if it's an existing file
        if(is.character(x) && all(file.exists(x))) return(TRUE)
        # Not valid
        return(FALSE)
    }
    
    annotationFilesGFF <- .checkComplexParam(annotationFilesGFF, "annotationFilesGFF", INPUTFilesList, validValuesFct=.validateAnnotationsFileNames)
    
    
    .validateGenomeFileNames <- function(x)
    {
        # check format
        if(length(x)!=1) return(FALSE)
        
        if(is.na(x)) return(TRUE)
        # Checking if it's an existing file
        if(is.character(x) && file.exists(x)) return(TRUE)
        # Otherwise checking if it's a reference to a precomputedReferenceFile in resources ("mm9" ...)
        precomputedReferenceFilesFolder="resources"
        if(sum(nchar(system.file(precomputedReferenceFilesFolder, paste(x, ".ref", sep="") ,package="Pasha")))>0) return(TRUE)
        # Not valid
        return(FALSE)
        
    }
    
    annotationGenomeFiles <- .checkComplexParam(annotationGenomeFiles, "annotationGenomeFiles", INPUTFilesList, validValuesFct=.validateGenomeFileNames)
    

    #pairedEnds=.checkComplexParam(pairedEnds, "pairedEnds", INPUTFilesList, validValuesFct=function(x){return( (is.logical(x)) && (length(x)==1) )})
    #dontSort=.checkComplexParam(dontSort, "dontSort", INPUTFilesList, validValuesFct=function(x){return( (is.logical(x)) && (length(x)==1) )})
    #midPoint=.checkComplexParam(midPoint, "midPoint", INPUTFilesList, validValuesFct=function(x){return( (is.logical(x)) && (length(x)==1) )})
    
    
    ####
    
#
#    if((!is.null(previousReturn)) & (!is.na(previousReturn)))
#    {
#        if(is.character(previousReturn))
#        {
#            trycatch(
#            {
#                load(previousReturn)
#            },
#            error=function(x)
#            {
#                stop("The 'previousReturn' parameter is not a valid R file")
#            })
#            previousReturn=returnList
#        }
#        else if(is.list(previousReturn))
#        {
#            # Has been removed because when a computation crash it might be useful to use it, even if there is not all experiments informations
#            #if(!all(names(INPUTFilesList) %in% names(previousReturn))) stop("The 'previousReturn' argument doens't contain information on all experiments")
#        }
#        else
#        {
#            stop("The 'previousReturn' parameter must be a list or a valid filename")
#        }
#    }
    
    ####
    
    # Flag indicating whether a generallog file has finally been created or not (to release it at the end)
    generalLogFile <- FALSE
    
    if(is.null(logTofile))
    {
        cat("\n\nNo general log file specified...")
    } else if((length(logTofile)==1) & is.character(logTofile) & (nchar(logTofile)>0))# Redirecting all the console messages to the specified log file
    {
        if(file.exists(logTofile) && (!eraseLog)) stop("The general log file (for all experiments) specified already exists, erase or rename it first !")
        sink(file=logTofile, split=TRUE)
        generalLogFile <- TRUE
        cat("\n")
        cat("\nLog to file :", logTofile)
    } else
    {
        stop("Invalid log file parameter (expected NULL or a valid filename)")
    }
    
    #cat("\n")
    #cat("\nCALL :", deparse(match.call))
    
    cat("\n\n")
    
    cat("\nNumber of experiment(s) :", length(INPUTFilesList))
    
    
    ## COMPLEX PARAMETERS
    
    cat("\nArtefacts threshold :") # , if(is.na(incrArtefactThrEvery) | incrArtefactThrEvery<=0) "NotRemoving" else incrArtefactThrEvery)
    mapply(function(expName, incrArtefactThrEverys)
            {
                cat("\n   ",expName,":", replace(incrArtefactThrEverys, is.na(incrArtefactThrEverys),"NotRemoving"))
            }, names(incrArtefactThrEvery), incrArtefactThrEvery)
    
    cat("\nBins size :")# , if(is.na(binSize) | binSize<0) "Not piling" else binSize)
    mapply(function(expName, binSizes)
            {
                cat("\n   ",expName,":", replace(binSizes, is.na(binSizes),"NoPiling"))
            }, names(binSize), binSize)
    
    cat("\nElongation size (shifting if midpoint) :")# , if(is.na(elongationSize) | elongationSize<0) "Automatic" else elongationSize)
    mapply(function(expName, elongationSizes)
            {
                cat("\n   ",expName,":", replace(elongationSizes, is.na(elongationSizes),"Automatic"))
            }, names(elongationSize), elongationSize)
    
    cat("\nSelection of inserts/reads specific range(s) :")# , if(is.na(elongationSize) | elongationSize<0) "Automatic" else elongationSize)
    resNULL <- mapply(function(expName, rangeSelection)
            {
                cat("\n   ",expName,":", if(is.numeric(rangeSelection)) paste(rangeSelection, "equal groups and a group with all") else paste(ifelse(width(rangeSelection)==0,"AllReads",paste(start(rangeSelection),"-", end(rangeSelection),sep="")), collapse=" "))
            }, names(rangeSelection), rangeSelection)
    
    ## EXPERIMENT PARAMETERS
    
    cat("\nSequencing and aligning strategy :")
    mapply(function(expName, expParams)
            {
                cat("\n   ",expName,"declared as :", ifelse(expParams$pairedEnds, "Paired ends","Single end"))
            }, names(INPUTFilesList), INPUTFilesList)
    
    cat("\nElongation and piling strategy :")
    mapply(function(expName, expParams)
            {
                cat("\n   ",expName,"will be piled with", ifelse(expParams$midPoint, "MIDPOINT", "classic"), "strategy")
            }, names(INPUTFilesList), INPUTFilesList)
    
    
#
#cat("\nMultiple location reads file :")
#mapply(function(expName, multiLocFile)
#        {
#            cat("\n   ",expName,":", replace(multiLocFile, is.na(multiLocFile),"No multiloc file"))
#        }, names(multiLocFilesList), multiLocFilesList)
    
    cat("\n\nOutput piled-up format(s)", paste(if(WIGfs) "WIG fixed steps", if(WIGvs) "WIG variable steps", if(GFF) "GFF fixed steps", if(BED) "Bedgraph variable step", sep= " - "), sep=" : ")
    cat("\nNumber of CPUs or Cores to use (if available) :", nbCPUs)
    cat(if(keepTemp) "\nThe temporary output files for each pileup category will be kept" else "\nThe temporary output files for each pileup category will be discarded")
    cat("\nWhen applicable, elongation will be estimated from ", elongationEstimationRange["mini"], "bp to ", elongationEstimationRange["maxi"], "bp, every ", elongationEstimationRange["by"], "bp", sep="")
    
    if(length(rehabilitationStep)>0)
    {
        cat("\nRehabilitation module will be loaded with (if applicable) :", paste(rehabilitationStep, collapse=" - "))
    } else
    {
        cat("\nNo rehabilitation for orphan reads")
    }
    
    if(nchar(removeChrNamesContaining)>0)
    {
        cat("\nChromosomes filter will remove chromosome names (seqnames) containing :", removeChrNamesContaining)
    } else
    {
        cat("\nNot filtering chromosome by names")
    }
    
    if(!is.na(ignoreInsertsOver))
    {
        cat("\nIf applicable paired-end inserts over ",ignoreInsertsOver, "bp will be ignored", sep="")
    } else
    {
        cat("\nNot filtering inserts outliers by size")
    }
    
    
    # Will keep the elements to return for each experiment
    returnList <- list()
    
    for(expName in names(INPUTFilesList))
    {
        
        returnList[[expName]] <- list()
        # List storing the time of execution for the main steps (useful for benchmarking different nb of processors)
        returnList[[expName]][["execTime"]] <- list()
        
        # This name is used to store the progress accross the program loops in order to report statistics for each steps
        programProgressName <- expName
        
        
        # Creating a general folder to store results
        resultFolder <- file.path(INPUTFilesList[[expName]]$folderName, paste(resultSubFolder[1], ifelse(INPUTFilesList[[expName]]$pairedEnds,"_PE","_SE" ), sep=""))
        
        .safeCreateFolder(resultFolder)
        
        
        # Create a timestamp for the local log file
        currentTime <- as.POSIXlt(Sys.time())
        stampPrefix <- format(currentTime, format="%Y_%m_%d_%Hh%M")
        
        cat("\n\nProgram version :",as.character(packageVersion("Pasha")))
        
        
        # Redirecting the log for this experiment specifically in the log folder of the experiment
        localLogToFile <- file.path(resultFolder, paste(expName,"_",stampPrefix,"_Pasha.log",sep=""))
        sink(file=localLogToFile, split=TRUE)
        
        
        cat("\n\nLocal log to file :", localLogToFile)
        
        
        cat("\n\n")
        ts <- paste("##------", date(), "------##")
        cat("\n",ts,"\n")
        title <- paste("------ EXP : ",expName," ------",sep="")
        cat("\n")
        cat(paste(rep("-",nchar(title)),collapse=""),"\n")
        cat(title,"\n")
        cat(paste(rep("-",nchar(title)),collapse=""),"\n")
        
        
        #### READING ALIGNED FILE
        
        cat("\nREADING ALIGNED FILE")
        
        alignedDataObject <- NULL
        
        # If the 'previousReturn' return parameter is specified, try to get the binary aligned object from it (directly from the object or on disk), otherwise reads the original file name
#        if("binaryObject" %in% names(previousReturn[[expName]]))
#        {
#            cat("\n From previous analysis (parameter)")
#            alignedDataList=previousReturn[[expName]][["binaryObject"]]
#        }
#        else if("binaryObjectFileName" %in% names(previousReturn[[expName]]))
#        {
#            cat("\n From previous analysis (file)")
#            returnList[[expName]][["execTime"]][["LoadingFile_From_Binary"]]=system.time(tryCatch(load(previousReturn[[expName]][["binaryObjectFileName"]]),error=function(e){cat("\n Error while reading binary file... Will read the original file instead...")returnList[[expName]][["execTime"]][["LoadingFile"]]=system.time((alignedDataList<<-readAlignedFiles(INPUTFilesList[[expName]]$folderName, INPUTFilesList[[expName]]$fileName, fileType=INPUTFilesList[[expName]]$fileType, pairedEnd=pairedEnd[[expName]], dontSort=dontSort[[expName]])))}))
#        }
#        else
#        {
#            cat("\n From file")
#            # Read the current experiment file
#            returnList[[expName]][["execTime"]][["LoadingFile"]]=system.time((alignedDataList<<-readAlignedFiles(INPUTFilesList[[expName]]$folderName, INPUTFilesList[[expName]]$fileName, fileType=INPUTFilesList[[expName]]$fileType, pairedEnd=pairedEnd[[expName]], dontSort=dontSort[[expName]])))
#        }
        
        
        returnList[[expName]][["execTime"]][[paste(programProgressName,"Reading File(s)",sep=" | ")]] <- system.time((alignedDataObject <- readAlignedData(INPUTFilesList[[expName]]$folderName, INPUTFilesList[[expName]]$fileName, fileType=INPUTFilesList[[expName]]$fileType, pairedEnds=INPUTFilesList[[expName]]$pairedEnds)))
        
        if(is.null(alignedDataObject)) stop("An error occured while reading data to process...")
        
        # Get the number of reads in the experiment
        nbReads <- length(alignedDataObject)
        if(nbReads==0) 
        {
            warningMessage="The aligned file couldn't be read properly or did not carry information about any valid aligned reads, please check your alignment statistics. Skipping..."
            cat("\nWARNING : ", warningMessage);
            warning(warningMessage);
            next
        }
        
        # Print content summary of AlignedData object (the warning is not required since the pipeline takes care of the oprhans later)
        resNULL <-  .summarizeAlignedData(alignedDataObject, noWarning=TRUE)
        
        
        # Saving the loaded data to binary R object for faster reloading in eventual recomputation
#        if("file" %in% tolower(saveBINARY))
#        {
#            binaryObjectFileName=paste(reportFilesFolder,expName, "_binaryObject.RDATA",sep="")
#            save(alignedData, file=binaryObjectFileName)
#            returnList[[expName]][["binaryObjectFileName"]]=binaryObjectFileName
#        }
#        if("return" %in% tolower(saveBINARY))
#        {
#            returnList[[expName]][["binaryObject"]]=alignedData
#        }
        
        
        ## In case a multiLocFilesList is supplied for this experiment, try to load it
        chrSplit_multiLocDataObject <- NULL
        if(expName %in% names(multiLocFilesList))
        {
            cat("\n\n Reading reads that aligned in multiple locations...")
            multiLocDataObject <- .readMultipleAlignedData(fileName=multiLocFilesList[[expName]])
            cat("\n Nb of reads aligned in multiple location :", length(multiLocDataObject))
            chrSplit_multiLocDataObject <- split(multiLocDataObject, seqnames(multiLocDataObject), drop=TRUE)
        }
        else
        {
            cat("\n No file specified for reads with multiple alignments.")
        }
        
        # Removing the chromosomes not needed
        if(nchar(removeChrNamesContaining)>0)
        {
            alignedDataObject <- dropChromosomePattern(alignedDataObject, removeChrNamesContaining, quiet=FALSE)
        }
        # Get the number of reads left after filtering
        nbReadsLeft <- length(alignedDataObject)
        if(nbReadsLeft==0) 
        {
            warningMessage="No reads left in current experiment after filtering chromosomes. Skipping..."
            cat("\nWARNING : ", warningMessage);
            warning(warningMessage);
            next
        }
        
        
        # Cleaning the chromosome name according to the prefix and suffix specified
        alignedDataObject <- normalizeChrNames(alignedDataObject, chrPrefix=INPUTFilesList[[expName]]$chrPrefix, chrSuffix=INPUTFilesList[[expName]]$chrSuffix)
        
        # Removing eventual reads with undefined strand value
        alignedDataObject <- dropUndefinedStrand(alignedDataObject, quiet=FALSE)
        nbReadsLeft <- length(alignedDataObject)
        if(nbReadsLeft==0) 
        {
            warningMessage="No reads left in current experiment after removing undefined strand values. Skipping..."
            cat("\nWARNING : ", warningMessage);
            warning(warningMessage);
            next
        }
        
        # Isolate orphans reads from paired-ends experiments
        # These reads are stored and will be eventually used in the rehab module
        chrSplit_orphanReads <- NULL
        
        # Do some specific consistency checking and organization for paired-ends experiments
        if(pairedEnds(alignedDataObject))
        {
  
            # Apply a filter that handles reads with unobvious flags/properties
            alignedDataObject <- cleanNonSimplePairs(alignedDataObject, quiet=FALSE)
            
            nbReadsLeft <- length(alignedDataObject)
            if(nbReadsLeft==0) 
            {
                warningMessage="No reads left in current experiment after cleaning pairs information. Skipping..."
                cat("\nWARNING : ", warningMessage);
                warning(warningMessage);
                next
            }
            
            
            # Isolate the orphan reads (ones with no mate)
            orphansReadsIndex <- getOrphansIndexes(alignedDataObject, quiet=FALSE)
            cat("\n     Number of orphan reads (no aligned mates) :", sum(orphansReadsIndex), "->", format((sum(orphansReadsIndex)/nbReads)*100, scientific=FALSE, digits=3), "% of aligned reads")
            if(sum(orphansReadsIndex)>0)
            {
                orphanReads <- alignedDataObject[orphansReadsIndex]
                pairedEnds(orphanReads) <- FALSE # not pairs anymore
                chrSplit_orphanReads <- split(orphanReads, seqnames(orphanReads), drop=TRUE)
                rm(orphanReads)
                gc()
                alignedDataObject <- alignedDataObject[!orphansReadsIndex]
            }
            
            nbReadsLeft <- length(alignedDataObject)
            if(nbReadsLeft==0) 
            {
                warningMessage="No reads left in current experiment after removing orphan reads. Skipping..."
                cat("\nWARNING : ", warningMessage);
                warning(warningMessage);
                next
            }
            
            
            # Filtering "outliers" inserts by size specified
            if(is.numeric(ignoreInsertsOver) && (length(ignoreInsertsOver)==1) && (ignoreInsertsOver>0))
            {
                cat("\n Filtering outliers inserts (size > ",ignoreInsertsOver,")... ", sep="")
                alignedDataObject <- filterInsertSize(alignedDataObject, 0, ignoreInsertsOver)
                cat("Done !")
            }
            
            nbReadsLeft <- length(alignedDataObject)
            if(nbReadsLeft==0) 
            {
                warningMessage="No reads left in current experiment after removing long inserts (see argument 'ignoreInsertsOver'). Skipping..."
                cat("\nWARNING : ", warningMessage);
                warning(warningMessage);
                next
            }
            
            
            # Check pairs consistency
            if(!checkPairsOK(alignedDataObject)) 
            {
                cat("\n Paired-ends reads don't seem properly sorted, trying to reorganize the data...")
                alignedDataObject <- sortByPairs(alignedDataObject, quiet=FALSE)
                if(!checkPairsOK(alignedDataObject)) stop("The dataset could not be read/sorted properly")
            }
        }
        
        cat("\n Nb of reads after filters (orphans, chromosome names, outliers) :", length(alignedDataObject), "->", format((length(alignedDataObject)/nbReads)*100, scientific=FALSE, digits=3), "% of aligned reads")
        
        #### RANGE SELECTION
        
        # Keep a reminder to know whether the ranges were estimated automatically (then we need to include the lower value of the first range) or specified manually (the user should take care of including all values in the ranges sonce lower bound is excluded)
        automaticRanges <- FALSE
        
        # In case rangeSelection is an integer, make 'rangeSelection' equal groups of size (inserts or reads if respectively pairedEnds or not)
        if(is.numeric(rangeSelection[[expName]]))
        {
            automaticRanges <- TRUE
            
            if(rangeSelection[[expName]]<2) # Should not happensince the paramter is checked to be >1 at the beginning
            {
                rangeSelection[[expName]] <- IRanges(0,-1)
            } else
            {
                cat("\n\n Preparing", rangeSelection[[expName]], "equally sized groups of", ifelse(pairedEnds(alignedDataObject),"inserts","reads"))
                groupValues <- if(pairedEnds(alignedDataObject)) isize(alignedDataObject)[isize(alignedDataObject)>0] else qwidth(alignedDataObject)
                quantRes <- quantile(groupValues, prob=seq(0,1, length.out=rangeSelection[[expName]]+1)) # compute the group limits
                # Extract the group intervals from cut function (the factor helps to remove groups eventually unused because more groups than values)
                groupsRanges <- levels(factor(cut(groupValues, breaks=unique(quantRes), include.lowest=TRUE)))
                if(length(groupsRanges)!=rangeSelection[[expName]]) cat("\n The required number of reads range based on reads/inserts size could not be met. This can happen if the user asks for too many ranges as compared to then number of different values.")
                if(length(groupsRanges)==1)
                {
                    cat("\n A single range contains all the values.")
                    rangeSelection[[expName]] <- IRanges(0,-1)
                } else
                {
                    starts <- as.numeric(sub("[\\(\\[](.+),.*", "\\1", groupsRanges))
                    ends <- as.numeric(sub("[^,]*,([^]]*)\\]", "\\1", groupsRanges))
                    # Storing ranges
                    #rangeSelection[[expName]] <- c(IRanges(0,-1),IRanges(start=c(quantRes[1]-1,quantRes[2:(length(quantRes)-1)]), end=quantRes[2:length(quantRes)])) # Make the actual groups
                    rangeSelection[[expName]] <- c(IRanges(start=starts, end=ends), IRanges(0,-1)) # Store the groups values as IRanges used later in the code, add a "total" group
                }
            }
        }
        
        
        if(any(duplicated(rangeSelection[[expName]])))
        {
            cat("\nIgnoring one or more duplicated range of insert (or read) size. Maybe the automatic cutting has been asked for too many groups considering the values or the user defined duplicated groups in the options.")
            rangeSelection[[expName]] <- unique(rangeSelection[[expName]])
        }
        
        # Plot a summary of ranges selection on the total distribution (ignore if only total, ie. empty range)
        if((length(rangeSelection[[expName]])>1) || (!isEmpty(rangeSelection[[expName]])))
        {
            
            sizeToPlot <- if(pairedEnds(alignedDataObject)) isize(alignedDataObject)[isize(alignedDataObject)>=0] else qwidth(alignedDataObject)
            
            # Remove empty ranges (that would overlap too much with other ones)
            rangesToPlot <- rangeSelection[[expName]][!isEmpty(rangeSelection[[expName]])]
            
            colorChart <- rainbow(length(rangesToPlot))
            
            pdf(file=file.path(resultFolder, paste(expName, "_size_ranges.pdf",sep="")), width=7, height=7)
            .plotDensityRanges(sizeToPlot, ranges=as.list(as.data.frame(rbind(start(rangesToPlot), end(rangesToPlot)))), ranges.col=colorChart, labels.col="black", include.lower=c(TRUE,rep(FALSE,length(rangesToPlot)-1)), include.upper=TRUE, sep.col="darkgrey", sep.lty=2, sep.lwd=0.5, ylab="", xlab=paste("Size of ", if(pairedEnds(alignedDataObject)) "inserts" else "reads", sep=""), main="Ranges of size selection")
            dev.off()
        }
        
        # Everything is loaded and formatted, before starting processing let's allow for a selection of reads/pairs according to a size range
        # The range selection ONLY occur on reads of the experiment, MULTIREADS ARE NOT FILTERED (mainly because paired-ends multiread is not supported) !!
        for(rangeIndex in 1:length(rangeSelection[[expName]]))
        {
            
            # Iranges are not directly "loopable"...
            currentRange <- rangeSelection[[expName]][rangeIndex]
            
            # Creating a human readable range name
            rangeName <- ifelse(isEmpty(currentRange),"AllReads",paste("Range",start(currentRange),"-",end(currentRange), sep=""))
            
            cat("\n\n--------------- Range of",ifelse(INPUTFilesList[[expName]]$pairedEnds,"inserts","reads"),"size selected :", rangeName, "\n")
            
            # Saving program progress accross loops
            programProgressName_currentRange <- paste(programProgressName, rangeName, sep=" - ")
            
            # Preparing the folders for results and for figures in different ranges ("AllReads" in case of no selection)
            resultFolder_currentRange <- file.path(resultFolder,rangeName)
            reportFilesFolder <- file.path(resultFolder_currentRange, reportFilesSubFolder)
            
            # Create the figures folder, parent is created recursively
            .safeCreateFolder(reportFilesFolder)
            
            
            # Select reads in the current range
            
            rangeSelected_alignedDataObject <- NULL
            
            if(!isEmpty(currentRange))
            {
                if(pairedEnds(alignedDataObject))
                {
                    # Filter the reads (include the lower value when automatic generation of groups and treating the first one)
                    rangeSelected_alignedDataObject <- filterInsertSize(alignedDataObject, start(currentRange), end(currentRange), includeLower=(automaticRanges && (rangeIndex==1)), quiet=FALSE)
                    cat("\n Nb of reads after selection of insert range size : ", length(rangeSelected_alignedDataObject), " (",length(rangeSelected_alignedDataObject)/2, " pairs)", sep="")
                    
                    if(length(rangeSelected_alignedDataObject)>100)
                    {
                        # Plot the inserts size disctribution and the selected range
                        pdf(file=file.path(reportFilesFolder, paste(expName, "_initial_inserts_size_distribution.pdf",sep="")), width=7, height=7)
                        .plotDensityRanges(isize(alignedDataObject)[isize(alignedDataObject)>=0], ranges=c(start(currentRange), end(currentRange)), ranges.col="blue", labels.col="black", include.lower=(automaticRanges && (rangeIndex==1)), include.upper=TRUE, sep.col="darkgrey", sep.lty=2, sep.lwd=0.5, main=paste("Inserts size distribution (selection : ",length(rangeSelected_alignedDataObject)," reads)", sep=""), xlab="Inserts size (bp)", ylab="")
                        dev.off()
                    }
                    else
                    {
                        cat("\nNot enough reads to plot insert size distribution")
                    }
                }
                else # Single end, filter the READS (not the inserts) according to their size
                {
                    # Filter the reads (include the lower value when automatic generation of groups and treating the first one)
                    rangeSelected_alignedDataObject <- filterReadSize(alignedDataObject, start(currentRange), end(currentRange), includeLower=(automaticRanges && (rangeIndex==1)), quiet=FALSE)
                    cat("\n Nb of reads after selection of reads size :", length(rangeSelected_alignedDataObject))
                    
                    if(length(rangeSelected_alignedDataObject)>100)
                    {
                        # Plot the reads size disctribution
                        pdf(file=file.path(reportFilesFolder, paste(expName, "_initial_reads_size_distribution.pdf",sep="")), width=7, height=7)
                        .plotDensityRanges(qwidth(alignedDataObject), ranges=c(start(currentRange), end(currentRange)), ranges.col="blue", labels.col="black", include.lower=(automaticRanges && (rangeIndex==1)), include.upper=TRUE, sep.col="darkgrey", sep.lty=2, sep.lwd=0.5, main=paste("Reads size distribution (selection : ",length(rangeSelected_alignedDataObject)," reads)", sep=""), xlab="Reads size (bp)", ylab="")
                        dev.off()
                    }
                    else
                    {
                        cat("\nNot enough reads to plot read size distribution")
                    }
                    
                }
                
                if(length(rangeSelected_alignedDataObject)==0)
                {
                    warningMessage="No reads left after selection of current size range. Skipping..."
                    cat("\nWARNING : ", warningMessage);
                    warning(warningMessage);
                    next
                }
                
                # Plot the limits of the range selection
                #abline(v=c(start(currentRange), end(currentRange)), col="red", lty=2, lwd=2)
                
            }
            else # rangeSelected is empty, make a group with all reads
            {
                rangeSelected_alignedDataObject <- alignedDataObject
            }
            
            # Should never happen
            if(is.null(rangeSelected_alignedDataObject)) 
            {
                warningMessage="Selection of reads on size range failed. Skipping..."
                cat("\nWARNING : ", warningMessage);
                warning(warningMessage);
                next
            }
            
            
            ### If some GFF annotation files are available for this experiment, plot stats
            if(!(is.na(annotationFilesGFF[[expName]]) || is.na(annotationGenomeFiles[[expName]])))
            {
                # If the annotationGenomeFiles is not a file, maybe it's a reference to a bundled genome file, let's try to read this
                if(!file.exists(annotationGenomeFiles[[expName]]))
                {
                    precomputedReferenceFilesFolder <- "resources"
                    foundFile <- system.file(precomputedReferenceFilesFolder, paste(annotationGenomeFiles[[expName]], ".ref", sep="") ,package="Pasha")
                    
                    if(nchar(foundFile)>0) 
                    {
                        annotationGenomeFiles[[expName]] <- foundFile[1]
                    }
                    else
                    {
                        stop("The annotationGenomeFiles parameter is not a valid file or reference.")
                    }
                }
                cat("\n Plotting reads statistics on GFF files...")
                plotReadsRepartitionAnnotations(alignedData=rangeSelected_alignedDataObject,
                        gff_names_vec <-  annotationFilesGFF[[expName]], 
                        expName <- expName,
                        pdfFileName <- file.path(reportFilesFolder, paste("ReadsAnnotations_", expName, '_', rangeName, ".pdf" ,sep="")), 
                        genomeReferenceFile <- annotationGenomeFiles[[expName]])
            }
            else
            {
                cat("\n Missing GFF annotation file or genome annotation file, ignoring plot for reads occupancy statistics.")
            }
            
            
            ### Preparing data split by chromosome for further steps
            
            # Split the reads list by chromosome
            returnList[[expName]][["execTime"]][[paste(programProgressName_currentRange, "Splitting Chromosomes",sep=" | ")]]=system.time((chrSplit_alignedDataObject=split(rangeSelected_alignedDataObject, seqnames(rangeSelected_alignedDataObject), drop=TRUE)))
            
            
            # Tries to get some memory back
            rm(rangeSelected_alignedDataObject)
            if(length(rangeSelection[[expName]])<2)
            {
                rm(alignedDataObject)
            }
            gc()
            
            
            # Get the number of reads in the experiment AFTER RANGE SELECTION (used for atefact threshold definition)
            nbReads <- sum(sapply(chrSplit_alignedDataObject, length))
            
            # Get the average size of reads on ALL genome
            # Will be used for multiread pileup as the multiread file format can ommit this information. In this case we will use the average read size computed here from other reads.
            # (better to have the same for all chromosomes otherwise we might have discrepancies in the elongation size retrieved and not choose the best one, because the choice is based on the most represented one)
            averageReadSize <- trunc(mean(unlist(lapply(chrSplit_alignedDataObject, qwidth))))
            cat("\n Average reads size :",averageReadSize)
            
            
            #### REMOVING ARTEFACTS (MORE THAN threshold TAGS WITH SAME COORDINATES)
            
            # Will be used to store the indexes to remove and the elongation size computed for each artefact threshold
            #returnList[[expName]][["indexesToRemove"]] <- list()
            #returnList[[expName]][["elongationEstimation"]] <- list()
            
            for(currentIncrArtefactThrEvery in incrArtefactThrEvery[[expName]])
            {
                #browser(text=paste("Entering the loop over incrArtefactThrEvery values. current :",currentIncrArtefactThrEvery))
                
                
                chrSplit_orphansFromArtefactsReads <- NULL
                cat("\n\n------------ Artefact threshold parameter :", ifelse(is.na(currentIncrArtefactThrEvery),"Not removing artefacts",currentIncrArtefactThrEvery), "\n")
                
                # The final value used for filtering after eventual computation
                thresholdArtefacts <- NULL
                
                if(!is.na(currentIncrArtefactThrEvery))
                {
                    
                    cat("\nREMOVING ARTEFACTS ArtefactParam_", currentIncrArtefactThrEvery, sep="")
                    
                    # Define the threshold that will be used to remove the artefacts according to the number of reads
                    if(currentIncrArtefactThrEvery<0) 
                    {
                        thresholdArtefacts <- (-currentIncrArtefactThrEvery)
                    } else
                    {
                        thresholdArtefacts <- trunc(nbReads/currentIncrArtefactThrEvery)
                    }
                    
                    if(thresholdArtefacts<1) thresholdArtefacts <- 1
                    
                    # Creating a human readable artefact threshold name
                    artefactThresholdName <- paste("Artefacts threshold :", thresholdArtefacts, sep=" ")
                    # Saving program progress accross loops
                    programProgressName_artefacts <- paste(programProgressName_currentRange, artefactThresholdName, sep=" - ")
                    
                    
                    
                    ### Getting the indexes of the reads that have to be removed from the splitted object
                    
                    ## Either from a previously computed
                    # saved as the parameter used to define the threshold, for backward compatibility with versions <0.5
                    #                if(currentIncrArtefactThrEvery %in% names(previousReturn[[expName]][["indexesToRemove"]]))
                    #                {
                    #                    indexesToRemove=previousReturn[[expName]][["indexesToRemove"]][[as.character(currentIncrArtefactThrEvery)]]
                    #                }
                    #                # saved as the the threshold value, since version 0.6 because the threshold can be put manually also (as negative value)
                    #                else if(thresholdArtefacts %in% names(previousReturn[[expName]][["indexesToRemove"]]))
                    #                {
                    #                    indexesToRemove=previousReturn[[expName]][["indexesToRemove"]][[as.character(thresholdArtefacts)]]
                    #                }                
                    #                else
                    #                {
                    
                    cat("\n Threshold for artefacts removal :", thresholdArtefacts, "\n")
                    
                    # Remove the artefacts in each chromosome
                    # We don't remove it directly in case of multithreading (remove=TRUE), it would imply to transfer the objects to children processes, it's too big !
                    # Instead we only retrieve the indexes and (remove them) manipulate the big object here in the main process. It avoid copying to children (thanks
                    # to 'copy-on-write' and getting back modified result). Even if it appears more complex...
                    cat("\n Searching for artefactual piles on chromosome : ")
                    
                    returnList[[expName]][["execTime"]][[paste(programProgressName_artefacts, "Get Artefacts",sep=" | ")]] <- system.time((resultsArtefacts <- mclapply(.encapsulateListElementsInEnv(chrSplit_alignedDataObject), getArtefactsIndexes, expName, thresholdToUse=thresholdArtefacts, thresholdForStats=c(1:5,10,20,50,100), reportFilesFolder)))
                    gc()
                    
                    # Something went wrong in the fork ?
                    resultsArtefacts <- .checkErrorsInFork(resultsArtefacts)
                    
                    cat("Done !\n")
                    
                    # Merging indexes to remove from positive and negative strands of each chromosome
                    #indexesToRemove <- lapply(resultsArtefacts, function(x){return(c(x[["-"]][["indexesToRemoveFromTotal"]][[as.character(thresholdArtefacts)]],
                    #                            x[["+"]][["indexesToRemoveFromTotal"]][[as.character(thresholdArtefacts)]]))})
                    # This version does not depend on the number of strands available in the result, it merges whatever it finds in indexesToRemoveFromTotal for each chromosome
                    indexesToRemove <- lapply(resultsArtefacts, function(x) {unique(unlist(lapply(x, "[[", "indexesToRemoveFromTotal")))})
                    
                    # Removing them
                    chrSplit_noArtefact_alignedDataObject <- mapply(function(x, indexesToRemove){if(length(indexesToRemove)>0) {return(x[-indexesToRemove])} else {return(x)}}, chrSplit_alignedDataObject, indexesToRemove)
                    
                    nbReadsLeft <- length(chrSplit_noArtefact_alignedDataObject)
                    if(nbReadsLeft==0) 
                    {
                        warningMessage="No reads left in current range after removing artefact reads. Skipping..."
                        cat("\nWARNING : ", warningMessage);
                        warning(warningMessage);
                        next
                    }
                    
                    
                    # Handle orphans from artefacts removal
                    if(INPUTFilesList[[expName]]$pairedEnds)
                    {
                        # Get orphans indexes created by removing artefacts
                        chrSplit_orphansIndexes <- lapply(chrSplit_noArtefact_alignedDataObject,getOrphansIndexes)
                        
                        # Check that there has been some orphans found before trying to get them, we don't want to alter the NULL value in case there is no orphan (it will be used to decide whether to compute or not the piling)
                        if(any(sapply(chrSplit_orphansIndexes, any)))
                        {
                            # Isolate the orphans for future rehabilitation
                            chrSplit_orphansFromArtefactsReads <- mapply("[", chrSplit_noArtefact_alignedDataObject, chrSplit_orphansIndexes) # The "[" operator of the class automatically return an empty object in case there is no indexes 
                            chrSplit_orphansFromArtefactsReads <- lapply(chrSplit_orphansFromArtefactsReads, "pairedEnds<-", FALSE) # Not pairs anymore
                            
                            # Removing them (orphans) from general object, IMPORTANT : chrSplit_orphansIndexes IS A LOGICA VECTOR, not indexes
                            chrSplit_noArtefact_alignedDataObject <- mapply(function(x, indexesToRemove){if(length(indexesToRemove)>0) {return(x[-indexesToRemove])} else {return(x)}}, chrSplit_noArtefact_alignedDataObject, lapply(chrSplit_orphansIndexes,which))    
                        }
                    }
                    
                    nbReadsLeft <- length(chrSplit_noArtefact_alignedDataObject)
                    if(nbReadsLeft==0) 
                    {
                        warningMessage="No reads left in current range after removing orphans generated by artefact removal. Skipping..."
                        cat("\nWARNING : ", warningMessage);
                        warning(warningMessage);
                        next
                    }

                    
                    ### TODO : plot artefacts stats by chromosome
                    
                    
                    # Saving the indexes to remove for eventual other computation on the same dataset (will help to avoid this long step)
                    #returnList[[expName]][["indexesToRemove"]][[as.character(currentIncrArtefactThrEvery)]]=indexesToRemove # version <0.6
                    #returnList[[expName]][["indexesToRemove"]][[as.character(thresholdArtefacts)]]=indexesToRemove # version >=0.6
                    
                    # Removing the indexes which fall in artefacts (classic mapply, not multithreaded, see precedent comment)
                    # Mapply will put in correspondance the elements of the first list to the ones of the second, we put indexes as negative to remove them with "["
                    #returnList[[expName]][["execTime"]][[paste("ArtefactsRemove", "_ArtefactParam_",currentIncrArtefactThrEvery, sep="")]]=system.time((chrSplittedDataMinusArtefacts=mapply(function(data, chrNames){if(length(indexesToRemove[[chrNames]])>0) return(data[-indexesToRemove[[chrNames]]]) else return(data)}, chrSplittedData, names(chrSplittedData))))
                    #returnList[[expName]][["execTime"]][[paste("ArtefactsRemove", "_ArtefactParam_",currentIncrArtefactThrEvery, sep="")]]=system.time((chrSplittedDataMinusArtefacts=mapply("[",chrSplittedData,lapply(indexesToRemove,"-"))))
                    
                    nbRemoved <- length(unlist(indexesToRemove))
                    nbRemaining <- sum(sapply(chrSplit_noArtefact_alignedDataObject,length))
                    
                    cat("\n Nb of reads removed :", nbRemoved, "->", format((nbRemoved/nbReads)*100, scientific=FALSE, digits=3), "% of aligned reads")
                    cat("\n Nb of reads remaining :", nbRemaining, "->", format((nbRemaining/nbReads)*100, scientific=FALSE, digits=3), "% of aligned reads")
                    
                } # /REMOVING ARTEFACTS
                else
                {
                    
                    # Saving program progress accross loops
                    programProgressName_artefacts <- paste(programProgressName_currentRange, "No Artefact Removal", sep=" - ")
                    
                    # Save unchanged object
                    chrSplit_noArtefact_alignedDataObject <- chrSplit_alignedDataObject
                }
                
                if(length(incrArtefactThrEvery[[expName]])<2)
                {
                    rm(chrSplit_alignedDataObject)
                }
                gc()
                
                #### ELONGATION SIZE ESTIMATION
                
                for(currentElongationSize in elongationSize[[expName]])
                {
                    cat("\n\n--------- Elongation/Shifting :", ifelse(is.na(currentElongationSize),ifelse(INPUTFilesList[[expName]]$pairedEnds,"based on pairs","automatic estimation"),currentElongationSize),"\n")
                    
                    elongationName <- NULL
                    
                    # Flag to mark whether the elongation comes from a manual parameter or by estimation (in case estimation is the same as a manual specification), used to generate output files name
                    manualElongation <- TRUE
                                        
                    if(is.na(currentElongationSize))
                    {
                        manualElongation <- FALSE
                        if(!INPUTFilesList[[expName]]$pairedEnds)
                        {
                            cat("\nELONGATION SIZE ESTIMATION")
                            
                            # In case the size has been estimated in a previous computation, retrieve it from the 'previousReturn' object
                            #                    if(currentIncrArtefactThrEvery %in% names(previousReturn[[expName]][["elongationEstimation"]]))
                            #                    {
                            #                        currentElongationSize <- previousReturn[[expName]][["elongationEstimation"]][[as.character(currentIncrArtefactThrEvery)]]
                            #                    }
                            #                    else
                            #                    {
                            
                            cat("\n Estimating elongation/shifting size on chromosome : ")
                            
                            # Estimate the elongation size for each chromosome
                            returnList[[expName]][["execTime"]][[paste(programProgressName_artefacts, "Elongation/Shifting Estimation",sep=" | ")]]=system.time((elongationEstimationList=mclapply(.encapsulateListElementsInEnv(chrSplit_noArtefact_alignedDataObject), estimateElongationSize, expName, elongationEstimationRange["mini"], elongationEstimationRange["maxi"], elongationEstimationRange["by"], averageReadSize, reportFilesFolder)))
                            gc()
                            
                            # Something went wrong in the fork ?
                            elongationEstimationList <- .checkErrorsInFork(elongationEstimationList)
                            
                            cat("Done !\n")
                            
                            
                            # Get the result as a named vector (avoid the redundant name coming from the named vector returned by the function and from the named list from apply)
                            # Get the names from the list object so all chromosomes with no reads won't be considered anymore (NULL results are automatically removed by unlist)
                            elongationEstimations <- unlist(lapply(elongationEstimationList, unname))
                            
                            # Get the number of each score
                            elongationCounts <- table(elongationEstimations)
                            # Identify the elongations that are less than twice the read size (they are considered as suspicious values and will be weighted down for final decision)
                            smallElongationIndex <- (as.numeric(names(elongationCounts)) < (averageReadSize*2))
                            # Weight down the counts for small elongation values (they have to be represented twice as much as other scores for being selected)
                            elongationCounts[smallElongationIndex] <- elongationCounts[smallElongationIndex]/2;
                            
                            # Get the most represented size among chromosomes
                            currentElongationSize <- as.numeric(names(which.max(elongationCounts)))
                            
                            if(currentElongationSize < (averageReadSize*2))
                            {
                                warningMessage <- "Small elongation values (less than twice the reads size) were the most represented among all chromosomes (even after these values were weighted down for selection). This can reflect anomalous fragment size estimation, please consider checking elongation reports for individual chromosomes and eventually specify a manual value..."
                                # Write warning in imediate log file
                                cat("\n WARNING :", warningMessage)
                                # Report warning programatically
                                warning(warningMessage)
                            }
                            
                            cat("\n Estimation summary (bp) :\n    ")
                            
                            summaryText <- c(paste(names(elongationEstimations), " --> ", elongationEstimations, sep=""), paste("----> Most represented :", currentElongationSize,"bp"))
                            
                            cat(summaryText, sep="\n    ")
                            
                            # Write the summary text file for all chromosomes
                            writeLines(con=file.path(reportFilesFolder,"Shifting_Estimation_Summary.txt"),text=summaryText)
                            #}
                            
                            # Saving the result for eventual other computation on the same dataset (will help to avoid this long step)
                            #returnList[[expName]][["elongationEstimation"]][[as.character(currentIncrArtefactThrEvery)]]=currentElongationSize
                            
                            # Creating a human readable elongation/shifting size
                            elongationName <- paste("Estimated elongation/shifting", currentElongationSize, sep=" ")
                            
                            
                        } 
                        else # Paired Ends
                        {
                            # In this case the elongation size is informative (mainly here to plot a graph distribution)
                            # A decision (a number) will be taken however based on the distribution that could be useful for eventual future 'orphan reads' rehabilitation
                            currentElongationSize <- trunc(median(unlist(lapply(lapply(chrSplit_noArtefact_alignedDataObject,isize), abs))))
                            
                            cat("\n Median elongation size computed using pairs (useful for eventual elongation of orphans or multireads) :", currentElongationSize)
                            
                            
                            # Plotting
                            # Must select for dataset (chromosomes) that have enough valid data to plot a density (at least 2 values to plot the density in positive insert sizes) 
                            chrSplit_noArtefact_alignedDataObject_validValues <- mapply("[", chrSplit_noArtefact_alignedDataObject, lapply(chrSplit_noArtefact_alignedDataObject, function(x) {return(isize(x)>0)}))
                            chrSplit_noArtefact_alignedDataObject_min2values <- chrSplit_noArtefact_alignedDataObject_validValues[sapply(chrSplit_noArtefact_alignedDataObject_validValues, length)>2]
                            if(length(chrSplit_noArtefact_alignedDataObject_min2values)>0)
                            {
                                densityInsertSize <- lapply(lapply(chrSplit_noArtefact_alignedDataObject_min2values, function(x){return(isize(x)[isize(x)>0])}), density)
                                
                                ### Isolating the median of max density points
                                # Get the max density points
                                densityMaxPoints <- sapply(densityInsertSize, function(densityData){return(densityData$x[which.max(densityData$y)])})
                                
                                # Preparing merged plotting
                                max.y <- max(sapply(densityInsertSize,"[[", "y"))
                                max.x <- max(sapply(densityInsertSize,"[[", "x"))
                                
                                pdf(file=file.path(reportFilesFolder, paste(expName, "_insert_size_distribution_by_chromosome",currentElongationSize,".pdf",sep="")), width=7, height=7)    
                                # Set the window
                                plot(NULL, xlim=c(0,max.x), ylim=c(0,max.y), main=paste(expName, " insertSize distributions (",currentElongationSize,")", sep=""), xlab="Insert size", ylab="")
                                
                                # Plot the curves
                                resNULL <- lapply(densityInsertSize, function(densityData){points(densityData, col= "#55555555", lty=2, type='l')})
                                abline(v=currentElongationSize, col="red", lwd=2)
                                
                                dev.off()
                            }
                            else
                            {
                                cat("\nNot enough reads to plot insert size distribution")
                            }
                            
                            # Creating a human readable elongation/shifting size
                            elongationName <- paste("Estimated elongation/shifting (based on pairs for orphans and multireads)", currentElongationSize, sep=" ")                    
                            
                        } # /ELONGATION SIZE ESTIMATION
                        
                    }
                    else # elongation/shifting size manually specified
                    {
                        if(INPUTFilesList[[expName]]$pairedEnds && (currentElongationSize != 0))
                        {
                            cat("\n WARNING : a manual elongation size (other than 0) has been specified for a paired-ends dataset. Note that this value will only be used for eventual orphans and multireads (standard piling will use length information carried by pairs)...");
                        }
                        # Creating a human readable elongation/shifting size
                        elongationName <- paste("Manual elongation/shifting", currentElongationSize, sep=" ")
                    }
                    
                    # Saving program progress accross loops
                    programProgressName_elongation <- paste(programProgressName_artefacts, elongationName, sep=" - ")
                    
                    #### RESULT GENERATION
                    
                    # This variable will store intermediary piled-up results for each category ("mergedReads", "unireads", "multiReads", "orphans", "orphansFromArtefacts") 
                    piledRleData <- list()
                    # mergedReads
                    # unireads
                    # multiReads
                    # orphans
                    # orphansFromArtefacts
                    
                    
                    if(any(c(WIGvs,WIGfs,GFF, BED, BIGWIG)))
                    {
                        cat("\n\n COMPUTING PILED FILES\n")
                        
                        
                        
                        cat("\n Piling UNIREADS reads on chromosome : ")
                        
                        # Computing the pileup vector coming from the experiment and putting it in the uniread slot of the list
                        returnList[[expName]][["execTime"]][[paste(programProgressName_elongation, "Piling Unireads",sep=" | ")]]=system.time((piledRleData[["unireads"]]=mclapply(.encapsulateListElementsInEnv(chrSplit_noArtefact_alignedDataObject), generatePiled, ifelse(currentElongationSize==0, 0,if(INPUTFilesList[[expName]]$pairedEnds) NA else currentElongationSize), averageReadSize, INPUTFilesList[[expName]]$midPoint)))
                        if(length(elongationSize[[expName]])<2)
                        {
                            rm(chrSplit_noArtefact_alignedDataObject)
                        }
                        gc()
                        
                        # Something went wrong in the fork ?
                        piledRleData[["unireads"]] <- .checkErrorsInFork(piledRleData[["unireads"]])
                        
                        
                        cat("Done !\n")
                        
                        
                        
                        ### Preparing which piling will be done
                        
                        # Vector checking (bool) that the data is available for each eventual piling step that will occur
                        dataAvailablePiling <- c(orphans=((!is.null(chrSplit_orphanReads)) && (length(chrSplit_orphanReads)>0)), 
                                orphansFromArtefacts=((!is.null(chrSplit_orphansFromArtefactsReads)) && (length(chrSplit_orphansFromArtefactsReads)>0)), 
                                multiReads=((!is.null(chrSplit_multiLocDataObject)) && (length(chrSplit_multiLocDataObject)>0)))
                        
                        userAskingPiling <- c(orphans=("orphans" %in% rehabilitationStep), 
                                orphansFromArtefacts=("orphansFromArtefacts" %in% rehabilitationStep), 
                                multiReads=TRUE) # as soon as the user gives a file (checked by other test), generate it
                        
                        # Vector that summarize if the piling will be done or not for eache eventual step
                        doPiling <- dataAvailablePiling & userAskingPiling
                        
                        
                        ### Doing the alternative/optional pilings
                        
                        # Rehab orphans reads coming from broken pairs
                        if(doPiling["orphans"])
                        {
                            cat("\n Rehabilitation and piling of orphan reads on chromosome : ")
                            returnList[[expName]][["execTime"]][[paste(programProgressName_elongation, "Piling Orphans",sep=" | ")]]=system.time((piledRleData[["orphans"]]=mclapply(.encapsulateListElementsInEnv(chrSplit_orphanReads), generatePiled, currentElongationSize, averageReadSize, INPUTFilesList[[expName]]$midPoint)))
                            if(length(elongationSize[[expName]])<2)
                            {
                                rm(chrSplit_orphanReads)
                            }
                            gc()
                            # Something went wrong in the fork ?
                            piledRleData[["orphans"]] <- .checkErrorsInFork(piledRleData[["orphans"]])
                            cat("Done !\n")
                        }
                        
                        # Rehab orphans reads coming from broken pairs during artefact removing step
                        if(doPiling["orphansFromArtefacts"])
                        {
                            cat("\n Rehabilitation and piling of orphan reads from artefact removing step on chromosome : ")
                            returnList[[expName]][["execTime"]][[paste(programProgressName_elongation, "Piling Orphans From Artefacts",sep=" | ")]] <- system.time((piledRleData[["orphansFromArtefacts"]]=mclapply(.encapsulateListElementsInEnv(chrSplit_orphansFromArtefactsReads), generatePiled, currentElongationSize, averageReadSize, INPUTFilesList[[expName]]$midPoint)))
                            if(length(elongationSize[[expName]])<2)
                            {
                                rm(chrSplit_orphansFromArtefactsReads)
                            }
                            gc()
                            # Something went wrong in the fork ?
                            piledRleData[["orphansFromArtefacts"]] <- .checkErrorsInFork(piledRleData[["orphansFromArtefacts"]])
                            cat("Done !\n")
                        }
                        
                        # Process reads related to current experiment that aligned in multiple locations
                        if(doPiling["multiReads"])
                        {
                            cat("\n Piling multireads (reads aligned in multiple locations) on chromosome : ")
                            returnList[[expName]][["execTime"]][[paste(programProgressName_elongation, "Piling Multireads",sep=" | ")]] <- system.time((piledRleData[["multiReads"]] <- mclapply(.encapsulateListElementsInEnv(chrSplit_multiLocDataObject), generatePiled, currentElongationSize, averageReadSize, INPUTFilesList[[expName]]$midPoint)))
                            if(length(elongationSize[[expName]])<2)
                            {
                                rm(chrSplit_multiLocDataObject)
                            }
                            gc()
                            # Something went wrong in the fork ?
                            piledRleData[["multiReads"]] <- .checkErrorsInFork(piledRleData[["multiReads"]])
                            cat("Done !\n")
                        }
                        
                        # In case there has been several ones, compute the merge of all for the final file
                        if(any(doPiling))
                        {
                            cat("\n Preparing and merging final piled vector... ")
                            
                            
                            #if(length(piledRleData)>1)
                            #{
                            
                            # Get a list of all chromosomes in all piled lists (can't use mapply in case of heterogeneous chromosomes lists)
                            chrList <- unique(unlist(lapply(piledRleData, names)))
                            
                            # Trick to keep the names after lapply on the chromosomes names
                            names(chrList) <- chrList
                            #browser()
                            returnList[[expName]][["execTime"]][[paste(programProgressName_elongation, "Merging Pileups",sep=" | ")]] <- system.time((piledRleData[["mergedReads"]] <- mclapply(chrList, function(currentChr)
                                                        {
                                                            piledSet <- c(piledRleData["unireads"],piledRleData[names(doPiling)[doPiling]]) # select only the ones that were supposedly computed
                                                            piledSetChrSelection <- lapply(piledSet, "[[", currentChr) # Select the chromosome of interest
                                                            
                                                            # Removing the ones that had no information for the current chromosome (in case of very unbalanced datasets)
                                                            piledSetChrSelection <- piledSetChrSelection[!sapply(piledSetChrSelection,is.null)]
                                                            
                                                            # If there is at least one dataset covering this chromosome (should always be the case because we only treat chromosomes based on what was found in the piled object)
                                                            if(length(piledSetChrSelection)>0)
                                                            {
                                                                
                                                                # Resize to maxSize by appending 0s to all Rle vectors to avoid recycling)
                                                                maxSize <- max(sapply(piledSetChrSelection, length))
                                                                piledSetChrSelection <- lapply(piledSetChrSelection, function(x){return(append(x, rep(0,maxSize-length(x))))})
                                                                
                                                                # Create the resulting sum vector
                                                                mergedRle <- Reduce('+', piledSetChrSelection)
                                                                
                                                                # Adding the chromosome name to metadata after all computations, altering a Rle removes its metadata...
                                                                metadata(mergedRle) <- list("chr"=currentChr) # Keep the chromosome name information for further processing
                                                                
                                                                return(mergedRle)
                                                            }
                                                            else
                                                            {
                                                                # This will be automatically removed from returned results in .checkErrorsInFork
                                                                return(NULL)
                                                            }
                                                        }))) 
                            
                            # Something went wrong in the fork ?
                            piledRleData[["mergedReads"]] <- .checkErrorsInFork(piledRleData[["mergedReads"]])
                            
                            
                            if(!keepTemp)
                            {
                                piledRleData <- piledRleData["mergedReads"] # returns a sublist containing only merged dataset
                            }
                            #}
                            #else
                            #{
                            #    names(piledRleData)="mergedReads"
                            #}
                            
                            cat("Done !")
                            
                        }
                        
                        
                        
                        #### WRITING FILES 
                        
                        # Create or not a subfolder in resultFolder depending on the number of pileup categories
                        pileupSubcategoriesFolder <- file.path(resultFolder_currentRange, "PileupSubcategories")
                        if(length(piledRleData)>1)
                        {
                            .safeCreateFolder(pileupSubcategoriesFolder)
                        }
                        
                        
                        # Will loop across all piled categories to make the final files
                        for(currentPileupCategory in names(piledRleData))
                        {
                            cat("\n\n------ Writing on disk :", currentPileupCategory,"\n")
                            
                            # Saving program progress accross loops
                            programProgressName_pileup <- paste(programProgressName_elongation, currentPileupCategory, sep=" - ")
                            
                            
                            
                            # Put in the subfolder if we deal with subcategories
                            finalResultFolder <- ifelse((currentPileupCategory=="mergedReads") || (length(piledRleData)==1), resultFolder_currentRange, pileupSubcategoriesFolder)
                            
                            
                            # Create a base filename that will be used to create the final filenames and the track names in WIGs
                            #baseFileName <- paste(expName, ifelse(length(piledRleData)>1, paste("_", currentPileupCategory,sep=""), ""), ifelse(INPUTFilesList[[expName]]$midPoint, "_sh", "_el"), currentElongationSize,ifelse(!is.na(currentIncrArtefactThrEvery),paste("_AThr",thresholdArtefacts, sep=""),""),ifelse(INPUTFilesList[[expName]]$midPoint, "_MIDPOINT", ""), sep="")
                            baseFileName <- paste(expName, "_", currentPileupCategory, ifelse(INPUTFilesList[[expName]]$midPoint, "_sh", "_el"), ifelse(INPUTFilesList[[expName]]$pairedEnds, "PairsAnd", ""),ifelse(manualElongation, "Manual", "Est"),currentElongationSize,ifelse(!is.na(currentIncrArtefactThrEvery),paste("_AThr",thresholdArtefacts, sep=""),""),ifelse(INPUTFilesList[[expName]]$midPoint, "_MIDPOINT", ""), sep="")
                            
                            # Generation of the WIG variable step which IS NOT dependant of the binSize
                            if(WIGvs || BIGWIG)
                            {
                                cat("\n Writing WIG variable steps for chromosome : ")

                                returnList[[expName]][["execTime"]][[paste(programProgressName_pileup, "Writing WIGvs chromosomes",sep=" | ")]]=system.time((resultingFiles_WIGvs=mclapply(piledRleData[[currentPileupCategory]], .writeWIGvs_chr, baseFileName, finalResultFolder, compatibilityOutputWIG)))
                                gc()
                                
                                # Something went wrong in the fork ?
                                resultingFiles_WIGvs <- .checkErrorsInFork(resultingFiles_WIGvs)
                                
                                cat("Done !\n")
                                
                                
                                cat("\n Merging WIG variable step temporary result files")
                                
                                # Get the names of wig files for chromosomes
                                chromosomesFiles <- mixedsort(unname(unlist(resultingFiles_WIGvs)))
                                
                                # Create the merged result File
                                resultFileName <- file.path(finalResultFolder, paste("WIGvs_", baseFileName, ".wig", sep=""))
                                
                                # Clean an eventual previous file with the same name
                                if(file.exists(resultFileName))
                                {
                                    file.remove(resultFileName)
                                }
                                
                                # Write the track line in first line of the result file (chromosomes files will be appended after it, when compatibility mode on this is written by writeWIG function for each chromosome) 
                                if(!compatibilityOutputWIG)
                                {
                                    fileCon <- file(resultFileName, open="wb")
                                    writeLines(paste("track type=wiggle_0 name=\"", baseFileName, "\"", sep=""), con=fileCon, sep="\n")
                                    close(fileCon)
                                }
                                
                                # Concatenate the variable step wig file (thanks to rle encoding)
                                returnList[[expName]][["execTime"]][[paste(programProgressName_pileup, "Appending WIGvs Individual Chromosomes",sep=" | ")]] <- system.time(file.append(resultFileName, chromosomesFiles))
                                
                                #                        if(!keepTemp)
                                #                        {
                                # Erase the temporary (chromosomes splitted) WIG files
                                file.remove(chromosomesFiles)
                                #                        }
	
								if(BIGWIG)
								{ 
                                    
                                    
                                    # Compute the minimal chromosome length from piled scores in order to generate the seqinfo object required for bigwig conversion
                                    chromLengths=sapply(lapply(piledRleData[[currentPileupCategory]], runLength), sum);
                                    seqinfo_object=Seqinfo(seqnames=names(chromLengths), seqlengths=chromLengths, isCircular=rep(FALSE, length(chromLengths)), genome="genome")
                                    
									cat("\n Writing BIGWIG file : ")	
									wigToBigWig(resultFileName, seqinfo_object, gsub("\\.wig$", ".bw", resultFileName));
									
									if(!WIGvs)
									{
										file.remove(resultFileName);
									}
								}
                            }
                            
                            # Generation of the BEDGRAPH which IS NOT dependant of the binSize
                            if(BED)
                            {
                                cat("\n Writing Bedgraph for chromosome : ")
                                
                                returnList[[expName]][["execTime"]][[paste(programProgressName_pileup, "Writing Bedgraph chromosomes",sep=" | ")]] <- system.time((resultingFiles_BED <- mclapply(piledRleData[[currentPileupCategory]], .writeBED_chr, baseFileName, finalResultFolder)))
                                gc()
                                
                                # Something went wrong in the fork ?
                                resultingFiles_BED <- .checkErrorsInFork(resultingFiles_BED)
                                
                                cat("Done !\n")
                                
                                
                                cat("\n Merging Bedgraph temporary result files")
                                
                                # Get the names of wig files for chromosomes
                                chromosomesFiles <- mixedsort(unname(unlist(resultingFiles_BED)))
                                
                                # Create the merged result File
                                resultFileName <- file.path(finalResultFolder, paste("BED_", baseFileName, ".bed", sep=""))
                                
                                # Clean an eventual previous file with the same name
                                if(file.exists(resultFileName))
                                {
                                    file.remove(resultFileName)
                                }
                                
                                # Write the track line in first line of the result file
                                fileCon <- file(resultFileName, open="wb")
                                writeLines(paste("track type=bedGraph name=\"", baseFileName, "\"", sep=""), con=fileCon, sep="\n")
                                close(fileCon)
                                
                                # Concatenate the GFF file (for peak detection with CoCas)
                                returnList[[expName]][["execTime"]][[paste(programProgressName_pileup, "Appending Bedgraph Individual Chromosomes",sep=" | ")]] <- system.time(file.append(resultFileName, chromosomesFiles))
                                
                                #                                if(!keepTemp)
                                #                                {
                                # Erase the temporary (chromosomes splitted) GFF files
                                file.remove(chromosomesFiles)
                                #                                }
                            }
                            
                            
                            # Loop on the eventually different binsizes specified by user
                            for(currentBinSize in binSize[[expName]])
                            {
                                cat("\n\n--- Bin Size :", currentBinSize,"\n")
                                
                                # Creating a human readable Binsize
                                binSizeName <- paste("Binsize", currentBinSize, sep=" ")
                                # Saving program progress accross loops
                                programProgressName_binSize <- paste(programProgressName_pileup, binSizeName, sep=" - ")
                                
                                
                                # Generation of the WIG FS and the GFF which ARE dependant of the binSize
                                if(any(c(WIGfs,GFF)) & (!is.na(currentBinSize)))
                                {
                                    
                                    baseFileNameBinned <- paste(baseFileName,"_bin",currentBinSize,sep="")
                                    
                                    ## BINNING
                                    
                                    
                                    binningVec <- function(piledRleData, binSize)
                                    {
                                        currentChr <- metadata(piledRleData)[["chr"]]
                                        
                                        cat(paste(currentChr, "... ", sep=""))
                                        res <- binVector(as.numeric(piledRleData), binSize)
                                        res <- Rle(res)
                                        metadata(res) <- list("chr"=currentChr)
                                        return(res)
                                    }
									
									
									
                                    # We keep Rle data representation for memory saving despite the fact that we will have to convert it for writing
                                    # Moreover we'll be able to use metadata to store the chromosome name
                                    if(currentBinSize>1)
                                    {
                                        cat("\n Binning chromosome (",currentBinSize,"bp) : ",sep="")
                                        
                                        returnList[[expName]][["execTime"]][[paste(programProgressName_binSize, "Binning",sep=" | ")]] <- system.time((binnedDataRle <- mclapply(piledRleData[[currentPileupCategory]], binningVec,  currentBinSize)))
                                        gc()
                                        
                                        # Something went wrong in the fork ?
                                        piledRleData[[currentPileupCategory]] <- .checkErrorsInFork(piledRleData[[currentPileupCategory]])
                                        
                                        cat("Done !\n")
                                    }
                                    else
                                    {
                                        binnedDataRle <- piledRleData[[currentPileupCategory]]
                                    }
                                    
                                    
                                    
                                    if(WIGfs)
                                    {
                                        
                                        cat("\n Writing WIG fixed steps (bins) for chromosome : ")
                                        
                                        returnList[[expName]][["execTime"]][[paste(programProgressName_binSize, "Writing WIGfs chromosomes",sep=" | ")]] <- system.time((resultingFiles_WIGfs <- mclapply(binnedDataRle, .writeWIGfs_chr, baseFileNameBinned, currentBinSize, finalResultFolder, compatibilityOutputWIG)))
                                        gc()
                                        
                                        # Something went wrong in the fork ?
                                        resultingFiles_WIGfs <- .checkErrorsInFork(resultingFiles_WIGfs)

                                        cat("Done !\n")
                                        
                                        
                                        cat("\n Merging WIG fixed step temporary result files")
                                        
                                        # Get the names of wig files for individual chromosomes (and sort them by chromosome name)
                                        chromosomesFiles <- mixedsort(unname(unlist(resultingFiles_WIGfs)))
                                        
                                        # Create the merged result File
                                        resultFileName <- file.path(finalResultFolder, paste("WIGfs_", baseFileNameBinned, ".wig", sep=""))
                                        
                                        # Clean an eventual previous file with the same name
                                        if(file.exists(resultFileName))
                                        {
                                            file.remove(resultFileName)
                                        }
                                        
                                        # Write the track line in first line of the result file (chromosomes files will be appended after it, when compatibility mode on this is written by writeWIG function for each chromosome) 
                                        if(!compatibilityOutputWIG)
                                        {
                                            fileCon <- file(resultFileName, open="wb")
                                            writeLines(paste("track type=wiggle_0 name=\"", baseFileNameBinned, "\"", sep=""), con=fileCon, sep="\n")
                                            close(fileCon)
                                        }
                                        
                                        
                                        # Concatenate the fixed step (currentBinSize) wig files
                                        returnList[[expName]][["execTime"]][[paste(programProgressName_binSize, "Appending WIGfs Individual Chromosomes",sep=" | ")]] <- system.time(file.append(resultFileName, chromosomesFiles))
                                        
                                        #                                if(!keepTemp)
                                        #                                {
                                        # Erase the temporary (chromosomes splitted) WIG files
                                        file.remove(chromosomesFiles)
                                        #                                }
                                    }
                                    
                                    
                                    if(GFF)
                                    {
                                        cat("\n Writing GFF for chromosome : ")
                                        
                                        returnList[[expName]][["execTime"]][[paste(programProgressName_binSize, "Writing GFF chromosomes",sep=" | ")]] <- system.time((resultingFiles_GFF <- mclapply(binnedDataRle, .writeGFF_chr, baseFileNameBinned, currentBinSize, finalResultFolder)))
                                        gc()
                                        
                                        # Something went wrong in the fork ?
                                        resultingFiles_GFF <- .checkErrorsInFork(resultingFiles_GFF)
                                        
                                        cat("Done !\n")
                                        
                                        
                                        cat("\n Merging GFF temporary result files")
                                        
                                        # Get the names of wig files for chromosomes
                                        chromosomesFiles <- mixedsort(unname(unlist(resultingFiles_GFF)))
                                        
                                        # Create the merged result File
                                        resultFileName <- file.path(finalResultFolder, paste("GFF_", baseFileNameBinned, ".gff", sep=""))
                                        
                                        # Clean an eventual previous file with the same name
                                        if(file.exists(resultFileName))
                                        {
                                            file.remove(resultFileName)
                                        }
                                        
                                        # Concatenate the GFF file (for peak detection with CoCas)
                                        returnList[[expName]][["execTime"]][[paste(programProgressName_binSize, "Appending GFF Individual Chromosomes",sep=" | ")]] <- system.time(file.append(resultFileName, chromosomesFiles))
                                        
                                        #                                if(!keepTemp)
                                        #                                {
                                        # Erase the temporary (chromosomes splitted) GFF files
                                        file.remove(chromosomesFiles)
                                        #                                }
                                    }

                                    
                                } # /WIG FS AND GFF GENERATION
                                
                            } # /binSize
                        } # currentPileupCategory
                        
                    } # any(c(WIGvs,WIGfs,GFF))
                    
                } # /ElongationSize
                
            } # /ArtefactsThreshold
            
        } # /currentRange
        
        ts <- paste("##------", date(), "------##")
        cat("\n",ts,"\n")
        
        .printTimes(returnList[expName])
        
        # Releasing the log file for this experiment (sink automatically manages it like a fifo)
        sink()
        
    } # /expName
    
    
    
    .printTimes(returnList)
    
    ts <- paste("##------", date(), "------##")
    cat("\n\n",ts,"\n")
    
    cat("\n\nDone, successfully processed",length(INPUTFilesList),"experiment(s).")
    
    
    # Releasing the log files
    if(generalLogFile)
    {
        sink()
    }
    
    return(returnList)
    
} # /processBAM

Try the Pasha package in your browser

Any scripts or data that you put into this service are public.

Pasha documentation built on Jan. 15, 2017, 6:21 p.m.