R/specUnmix.R

Defines functions specUnmix specUnmixCoFunction

Documented in specUnmix

#' Spectral unmixing of cytometry files
#'
#'
#' This function performs the central task of spectral unmixing, to convert the
#' photon input to the detectors to biological proxy-signals.
#' @importFrom BiocGenerics colnames
#' @importFrom grDevices col2rgb colorRampPalette densCols dev.off png
#' @importFrom graphics mtext par plot title
#' @importFrom parallel detectCores makeCluster stopCluster
#' @importFrom doSNOW registerDoSNOW
#' @importFrom foreach foreach %dopar%
#' @param flowObj The fcs object to be filtered. Both flowFrames and flowSets
#' are accepted.
#' @param specMat This is a matrix generated by the secMatCalc function,
#' possibly with edited row names.
#' @return The unmixed data. It will be returned in the format it was imported as.
#'
#' @examples
#' # Load some uncompensated data
#'
#'
#' # Load the spectral unmixing matrix generated with controls from the same
#' experiment. These can be generated using the specMatCalc function.
#' data(specMat)
#'
#' # And now, just run the function!
#' dataComp <- specUnmix(fluoExprs, specMat)
#' @export specUnmix
specUnmix <- function(flowObj, specMat) {
    if(class(flowObj) == "flowSet"){
        resultObj <- fsApply(flowObj, specUnmixCoFunction,
                             specMat = specMat)
    } else if(class(flowObj) == "flowFrame"){
        resultObj <- specUnmixCoFunction(flowObj, specMat = specMat)
    }
    return(resultObj)
}

specUnmixCoFunction <- function(focusFrame, specMat){
    rawData <- exprs(focusFrame)[,colnames(specMat)]
    if (nrow(rawData) < 70000) {
        # Make the least squares fit based on the raw, uncompensated data.
        ls_corr <- lsfit(x = t(specMat), y = t(rawData), intercept = FALSE)
        # Export the unmixed portion of the least squares result.
        unmixResult <- t(ls_corr$coefficients)
    } else {

        nCores <- detectCores() - 1
        cl <- parallel::makeCluster(nCores, type = "SOCK")
        registerDoSNOW(cl)
        firstRow <- 1
        resultList <- list()
        x <- 1
        while (firstRow < nrow(rawData)) {
            timeBefore <- Sys.time()
            if (((firstRow + nCores * 50000) - 1) < nrow(rawData)) {
                rowRange <- firstRow:(firstRow + (nCores * 50000) - 1)
            } else {
                rowRange <- firstRow:nrow(rawData)
            }
            # Here, the data is divided into suitable chunks
            rowRangeSubsets <- split(rowRange, sort(rowRange %% nCores))

            # Now, the datasets are constructed from this
            lsDataSets <- lapply(1:length(rowRangeSubsets), function(x) return(rawData[rowRangeSubsets[[x]], ]))

            resultFocus <- foreach(i = 1:nCores) %dopar% t((lsfit(x = t(specMat), y = t(lsDataSets[[i]]), intercept = FALSE))$coefficients)

            resultList[[x]] <- do.call("rbind", resultFocus)


            timeAfter <- as.numeric(Sys.time() - timeBefore)
            print(paste0("Rows ", rowRange[1], " to ", rowRange[length(rowRange)], " unmixed in ", timeAfter, " seconds. Now, ", nrow(rawData) - rowRange[length(rowRange)], " rows are left."))
            firstRow <- (rowRange[length(rowRange)] + 1)
            x <- x + 1
        }


        parallel::stopCluster(cl)
        unmixResult <- do.call("rbind", resultList)
    }

    fullResult <- cbind(exprs(focusFrame)[,-which(colnames(focusFrame) %in%
                                                      colnames(specMat))],
                        unmixResult)
    fullResultFF <- focusFrame[,seq_len(ncol(fullResult))]
    BiocGenerics::colnames(fullResultFF) <- colnames(fullResult)
    exprs(fullResultFF) <- fullResult

    return(fullResultFF)
}
jtheorell/theFlowSpec documentation built on Aug. 22, 2019, 3:33 a.m.