R/flowCut.R

Defines functions TimePrint calcMeansAndSegmentsRemoved removeLowDensSections flowCut

Documented in flowCut removeLowDensSections

## flowCut: Precise and Accurate Automated Removal of Outlier Events # and
## Flagging of Files Based on Time Versus Fluorescence Analysis # Authors: Justin
## Meskas and Sherrie Wang #
flowCut <- function(f, Segment = 500, Channels = NULL, Directory = NULL, FileID = NULL, 
    Plot = "Flagged Only", MaxContin = 0.1, MeanOfMeans = 0.13, MaxOfMeans = 0.15, 
    MaxValleyHgt = 0.1, MaxPercCut = 0.3, LowDensityRemoval = 0.1, GateLineForce = NULL, 
    UseOnlyWorstChannels = FALSE, AmountMeanRangeKeep = 1, AmountMeanSDKeep = 2, 
    PrintToConsole = FALSE, AllowFlaggedRerun = TRUE, UseCairo = FALSE, UnifTimeCheck = 0.22, 
    RemoveMultiSD = 7, AlwaysClean = FALSE, IgnoreMonotonic = FALSE, MonotonicFix = NULL, 
    Measures = c(1:8), Verbose = FALSE) {
    
    start0 <- Sys.time()
    resTable <- matrix("", 17, 1)
    rownames(resTable) <- c("Is it monotonically increasing in time", "Largest continuous jump", 
        "Continuous - Pass", "Mean of % of range of means divided by range of data", 
        "Mean of % - Pass", "Max of % of range of means divided by range of data", 
        "Max of % - Pass", "Has a low density section been removed", "% of low density removed", 
        "How many segments have been removed", "% of events removed from segments removed", 
        "Worst channel", "% of events removed", "FileID", "Type of Gating", "Was the file run twice", 
        "Has the file passed")
    resTable["Was the file run twice", ] <- "No"
    # Creating a directory if none is specified
    if (is.null(Directory)) {
        Directory <- paste0(getwd(), "/flowCut")
    }
    # Creating a FileID if none is specified
    if (is.null(FileID)) {
        FileID <- Sys.time()
        FileID <- substr(FileID, start = 1, stop = 19)
        FileID <- gsub("-", "_", FileID)
        FileID <- gsub(":", "_", FileID)
        FileID <- gsub(" ", "__", FileID)
        # samp <- sample(1:9999, 1) FileID <- paste0(rep(0, 4-nchar(samp)), samp)
        if (Verbose == TRUE) {
            cat(paste0("The FileID is: ", FileID, "\n"))
        }
    }
    resTable["FileID", ] <- FileID
    
    if (!is(f, "flowFrame")) {
        message("f must be a flowFrame.")
        resTable["Has the file passed", ] <- "f must be a flowFrame"
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.numeric(Segment) || (Segment <= 0)) {
        message("Segment must be a number larger than 0.")
        resTable["Has the file passed", ] <- paste0("Segment", "must be a number larger than 0.")
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    
    if (nrow(f) <= 3 * Segment) {
        # deGate requires count.lim = 3
        message("Either your Segment size is too large or your number", " of cells is too small.")
        resTable["Has the file passed", ] <- paste0("Either your", "Segment size is too large or your number of cells is too small.")
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.character(Directory)) {
        message("Directory must be a character.")
        resTable["Has the file passed", ] <- "Directory must be a character."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.character(FileID) && !is.numeric(FileID)) {
        message("FileID must be a character or a number.")
        resTable["Has the file passed", ] <- paste0("FileID", "must be a character or a number.")
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!(Plot == "None" || Plot == "All" || Plot == "Flagged Only")) {
        message("Plot must be a character", "with one of the following", " options: 'None', 'All', or 'Flagged Only'.")
        resTable["Has the file passed", ] <- paste0("Plot must be a character", " with one of the following options:", 
            " 'None', 'All', or 'Flagged Only'.")
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.numeric(MaxContin) || (MaxContin < 0)) {
        message("MaxContin must be a number larger than 0.")
        resTable["Has the file passed", ] <- paste0("MaxContin", " must be a number larger than 0.")
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.numeric(MeanOfMeans) || (MeanOfMeans < 0)) {
        message("MeanOfMeans must be a number larger than 0.")
        resTable["Has the file passed", ] <- paste0("MeanOfMeans", " must be a number larger than 0.")
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.numeric(MaxOfMeans) || (MaxOfMeans < 0)) {
        message("MaxOfMeans must be a number larger than 0.")
        resTable["Has the file passed", ] <- "MaxOfMeans must be a number larger than 0."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.numeric(MaxValleyHgt) || (MaxValleyHgt < 0) || (MaxValleyHgt > 1)) {
        message("MaxValleyHgt must be a number between 0 and 1.")
        resTable["Has the file passed", ] <- "MaxValleyHgt must be a number between 0 and 1."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.numeric(MaxPercCut) || (MaxPercCut < 0) || (MaxPercCut > 1)) {
        message("MaxPercCut must be a number between 0 and 1.")
        resTable["Has the file passed", ] <- "MaxPercCut must be a number between 0 and 1."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.numeric(LowDensityRemoval) || (LowDensityRemoval < 0) || (LowDensityRemoval > 
        1)) {
        message("LowDensityRemoval must be a number between 0 and 1.")
        resTable["Has the file passed", ] <- "LowDensityRemoval must be a number between 0 and 1."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.null(GateLineForce) && !is.numeric(GateLineForce)) {
        message("GateLineForce must be numeric.")
        resTable["Has the file passed", ] <- "GateLineForce must be numeric."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.logical(UseOnlyWorstChannels)) {
        message("UseOnlyWorstChannels must be a logical (boolean).")
        resTable["Has the file passed", ] <- "UseOnlyWorstChannels must be a logical (boolean)."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (AmountMeanRangeKeep%%1 != 0) {
        message("AmountMeanRangeKeep must be an integer.")
        resTable["Has the file passed", ] <- "AmountMeanRangeKeep must be an integer."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (AmountMeanSDKeep%%1 != 0) {
        message("AmountMeanSDKeep must be an integer.")
        resTable["Has the file passed", ] <- "AmountMeanSDKeep must be an integer."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.logical(PrintToConsole)) {
        message("PrintToConsole must be a logical (boolean).")
        resTable["Has the file passed", ] <- "PrintToConsole must be a logical (boolean)."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.logical(AllowFlaggedRerun) && !is.character(Directory)) {
        message("AllowFlaggedRerun must be a logical (boolean).")
        resTable["Has the file passed", ] <- "AllowFlaggedRerun must be a logical (boolean)."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.logical(UseCairo)) {
        message("UseCairo must be a logical (boolean).")
        resTable["Has the file passed", ] <- "UseCairo must be a logical (boolean)."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.numeric(UnifTimeCheck) || (UnifTimeCheck < 0) || (UnifTimeCheck > 0.5)) {
        message("UnifTimeCheck must be numeric between 0 and 0.5.")
        resTable["Has the file passed", ] <- "UnifTimeCheck must be numeric between 0 and 0.5."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.null(RemoveMultiSD) && !is.numeric(RemoveMultiSD)) {
        message("RemoveMultiSD must be numeric.")
        resTable["Has the file passed", ] <- "RemoveMultiSD must be numeric."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.logical(AlwaysClean)) {
        message("AlwaysClean must be a logical (boolean).")
        resTable["Has the file passed", ] <- "AlwaysClean must be a logical (boolean)."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.logical(IgnoreMonotonic)) {
        message("IgnoreMonotonic must be a logical (boolean).")
        resTable["Has the file passed", ] <- "IgnoreMonotonic must be a logical (boolean)."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.null(MonotonicFix) && (!is.numeric(MonotonicFix) || (MonotonicFix < 0))) {
        message("MonotonicFix must be NULL or a number greater than or equal to 0.")
        resTable["Has the file passed", ] <- "MonotonicFix must be NULL or a number greater than or equal to 0."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.numeric(Measures)) {
        message("Measures must be a numeric.")
        resTable["Has the file passed", ] <- "Measures must be numeric."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (max(Measures) > 8 || min(Measures) < 1 ) {
        message("Measures must be between 1 and 8.")
        resTable["Has the file passed", ] <- "Measures must be between 1 and 8."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (any(Measures%%1 != 0)) {
        message("Measures must be integers.")
        resTable["Has the file passed", ] <- "Measures must be integers."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    if (!is.logical(Verbose)) {
        message("Verbose must be a logical (boolean).")
        resTable["Has the file passed", ] <- "Verbose must be a logical (boolean)."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    
    #### Extracting the location of channels where flowCut will be applied ####
    t.name <- parameters(f)$name
    t.desc <- parameters(f)$desc
    FSC.loc <- sort(unique(c(grep("fsc", tolower(t.name)), grep("fs lin", tolower(t.name)), 
        grep("*FS", t.name))))
    names(FSC.loc) <- NULL
    SSC.loc <- sort(unique(c(grep("ssc", tolower(t.name)), grep("ss lin", tolower(t.name)), 
        grep("*SS", t.name))))
    names(SSC.loc) <- NULL
    
    Time.loc <- sort(unique(c(grep("time", tolower(t.name)), grep("time", tolower(t.desc)), 
        grep("hdr-t", tolower(t.name)))))
    
    all.Time.loc <- Time.loc
    # if (length(Time.loc) >= 2){ Time.loc <- Time.loc[1] # default to take the
    # first.  } names(Time.loc) <- NULL
    
    if (length(all.Time.loc) >= 2) {
        # Multiple time channels
        message("This file has ", length(all.Time.loc), " time channels. flowCut has selected to use ", 
            t.name[Time.loc], " - ", t.desc[Time.loc], ".")
        Time.loc <- Time.loc[1]  # default to take the first.
        nonlap.loc <- all.Time.loc[which(all.Time.loc != Time.loc)]
        parameters(f)$name[nonlap.loc] <- paste0(parameters(f)$name[nonlap.loc], "-Removed")
        colnames(exprs(f))[nonlap.loc] <- paste0(colnames(exprs(f))[nonlap.loc], "-Removed")
    }
    
    Extra.loc <- c(grep("pulse|width|length|count|sort classifier|event|phenograph|barcode", 
        tolower(t.name)))
    names(Extra.loc) <- NULL
    Extra.loc <- unique(Extra.loc)
    if (length(Extra.loc) >= 1 && Verbose == TRUE) {
        cat(paste0("Channels ", paste0(Extra.loc, collapse = ", "), " are removed as they are not channels that need to be analyzed.\n"))
    }
    
    NoVariation <- NULL
    for (NoVar in seq_len(length(colnames(f)))) {
        if (sd(exprs(f)[, NoVar], na.rm = TRUE) == 0) {
            NoVariation <- c(NoVariation, NoVar)
        }
    }
    names(NoVariation) <- NULL
    if (length(NoVariation) >= 1 && Verbose == TRUE) {
        message("Channels ", paste0(NoVariation, collapse = ", "), " have no variation and have been removed from the analysis.")
    }
    
    
    #### Monotonic in Time fix ####
    if (!is.null(MonotonicFix)) {
        time.data <- exprs(f)[, Time.loc]
        if (all(time.data == cummax(time.data)) == FALSE) {
            message("Fixing file ", FileID, " for the monotonic time issue.")
            
            time.diff <- time.data[2:length(time.data)] - time.data[seq_len(length(time.data) - 
                1)]
            diff.ind.strong <- which(time.diff < -MonotonicFix)
            diff.ind.all <- which(time.diff < 0)
            
            if (length(diff.ind.strong) > 0) {
                time.diffs <- abs(time.diff[diff.ind.strong])
                for (mono.ind in diff.ind.strong) {
                  idx <- seq(mono.ind + 1, length(time.data))
                  time.data[idx] <- time.data[idx] + time.diffs[which(mono.ind == diff.ind.strong)]
                }
            } else {
                message("All of file ", FileID, "'s monotonic issues are larger than MonotonicFix and were not adjusted.")
            }
            
            if (Plot == "All" || (Plot == "Flagged Only")) {
                suppressWarnings(dir.create(paste0(Directory, "/Mono/"), recursive = TRUE))
                pngMono <- paste0(Directory, "/Mono/", gsub(".fcs", "", FileID), 
                  "_Fix.png")
                
                if (PrintToConsole == FALSE) 
                  {
                    if (UseCairo == TRUE) {
                      CairoPNG(pngMono, width = 800, height = 600)
                    } else {
                      png(pngMono, width = 800, height = 600)
                    }
                  }  # else print to console with default settings
                
                par(mfrow = c(1, 1), oma = c(0, 0, 0, 0), mar = c(5, 5, 3, 1))
                plot(seq_len(length(time.data)), time.data, pch = 19, cex = 0.2, 
                  lty = 2, col = "blue", main = paste0("Monotonic Time Correction: ", 
                    gsub(".fcs", "", FileID)), xlab = "Cell Index", ylab = "Time")
                points(seq_len(length(exprs(f)[, Time.loc])), exprs(f)[, Time.loc], 
                  pch = 19, cex = 0.2)
                if (length(diff.ind.all) > 0) {
                  abline(v = diff.ind.all, col = "red")
                }
                if (length(diff.ind.strong) > 0) {
                  abline(v = diff.ind.strong, col = "green4")
                }
                legend("bottomright", legend = c("Original", "Corrected", "Jump Fixed", 
                  "Jump Not Fixed"), col = c("black", "blue", "green4", "red"), lty = 1, 
                  cex = 1, lwd = c(2, 2, 1, 1))
                if (PrintToConsole == FALSE) {
                  dev.off()
                } else {
                  par(mfrow = c(1, 1), mar = c(5, 5, 4, 2), mgp = c(3, 1, 0))  # back to default
                }
                
            }
            exprs(f)[, Time.loc] <- time.data
        }
    }
    
    # only for the cases that have channels that are monotonically increasing in
    # themselves. It has nothing to do with monotonically increasing in the time
    # dimension.
    MonotonicWithTime <- NULL
    for (MonoChan in seq_len(length(colnames(f)))) {
        if (all(exprs(f)[, MonoChan] == cummax(exprs(f)[, MonoChan])) == TRUE) {
            MonotonicWithTime <- c(MonotonicWithTime, MonoChan)
        }
    }
    names(MonotonicWithTime) <- NULL
    MonotonicWithTime <- sort(unique(MonotonicWithTime))
    
    test <- match(all.Time.loc, MonotonicWithTime, nomatch = 0)
    if (any(test >= 1)) {
        MonotonicWithTime <- MonotonicWithTime[-test]
    }
    
    if (length(MonotonicWithTime) >= 1 && Verbose == TRUE) {
        message("Channels ", paste0(MonotonicWithTime, collapse = ", "), " are monotonically increasing in time and have been removed from the analysis.")
    }
    
    
    
    
    if (length(which(NoVariation == Time.loc)) >= 1) {
        message("Your time channel has no variation.")
        parameters(f)$name[Time.loc] <- paste0(parameters(f)$name[Time.loc], "-Removed")
        colnames(exprs(f))[Time.loc] <- paste0(colnames(exprs(f))[Time.loc], "-Removed")
        Time.loc <- NULL
        
        # if(length(which(NoVariation != all.Time.loc)) >= 1){ does something exist in
        # all.Time.loc that isn't in NoVariation
        if (length(which(is.na(match(all.Time.loc, NoVariation)))) >= 1) {
            
            message("The first time channel will be replaced by the second time channel.")
            Time.loc <- all.Time.loc[-which(all.Time.loc == NoVariation)][1]
            parameters(f)$name[Time.loc] <- "Time"
            nonlap.loc <- all.Time.loc[which(all.Time.loc != Time.loc)]
            parameters(f)$name[nonlap.loc] <- paste0(parameters(f)$name[nonlap.loc], "-Removed")
            colnames(exprs(f))[nonlap.loc] <- paste0(colnames(exprs(f))[nonlap.loc], "-Removed")
        }
    }
    
    #### Creating a time channel if none is specified #########################
    if (length(Time.loc) == 0) {
        message("Your data does not have a time channel. flowCut will", " create one, but now flowCut will not be as fully", 
            " functional as it could be. Consider recording the time", " for future projects.")
        time_col <- matrix(seq_len(nrow(f)), ncol=1)
        colnames(time_col) <- "Time"
        f <- fr_append_cols(f, time_col)
        pd <- pData(parameters(f))
        pd[pd$name == "Time", c("range", "minRange", "maxRange")] <- c(262144, -144, 262143)
        pData(parameters(f)) <- pd
        rownames(parameters(f))[length(colnames(f))] <- paste0("$P", length(colnames(f)))
        description(f)[paste0("P", length(colnames(f)), "DISPLAY")] <- "LIN"  # 'LOG'
        description(f)[paste0("flowCore_$P", length(colnames(f)), "Rmax")] <- 262143
        description(f)[paste0("flowCore_$P", length(colnames(f)), "Rmin")] <- 0
        description(f)[paste0("P", length(colnames(f)), "B")] <- "0"
        description(f)[paste0("P", length(colnames(f)), "R")] <- "262143"
        description(f)[paste0("P", length(colnames(f)), "N")] <- "Time"
        description(f)[paste0("P", length(colnames(f)), "G")] <- "1"
        description(f)[paste0("P", length(colnames(f)), "E")] <- "0,0"
        
        
        Time.loc <- length(colnames(f))
    }
    
    if (length(c(FSC.loc, SSC.loc)) == 0) {
        message("No FCS or SSC channels found.")
        # return(list(frame=f, ind=NULL, data=resTable, worstChan=NULL))
    }
    
    range_of_time <- max(exprs(f)[, Time.loc]) - min(exprs(f)[, Time.loc])
    
    Time_test_passes <- TRUE
    
    if (range_of_time != 0) {
        
        # is the mean and midpoint similar?
        uniformity_in_time_test <- abs(mean(exprs(f)[, Time.loc]) - (range_of_time/2 + 
            min(exprs(f)[, Time.loc])))/range_of_time
        
        # print(uniformity_in_time_test)
        
        # dividing_points <- seq(min(f@exprs[,Time.loc]), max(f@exprs[,Time.loc]),
        # length.out = 11) uniformity_in_time_test_2 <- sapply (1:10, function(x) {
        # length(intersect( which(f@exprs[,Time.loc] <= dividing_points[x+1]),
        # which(f@exprs[,Time.loc] >= dividing_points[x])) ) })
        
        # This number needs to be set better. Used to be 0.2, moved to 0.22 to work with
        # a particular project.
        if (uniformity_in_time_test >= UnifTimeCheck) {
            message("The time channel does not appear to be distributed like an expected time channel would be.")
            Time_test_passes <- FALSE
        }
        
        # if ( (min(uniformity_in_time_test_2) <= 0.05*max(uniformity_in_time_test_2)) ){
        # message('The 2 time channel does not appear to be distributed like an expected
        # time channel would be.') Time_test_passes <- FALSE }
        
        # is there a bunch of repeats?
        uniformity_in_time_test_3 <- max(table(exprs(f)[, Time.loc]))
        
        # print(uniformity_in_time_test_3)
        
        if (uniformity_in_time_test_3 >= 0.05 * nrow(f)) {
            message("There appears to be an overwhelming amount of repeated time values.")
            Time_test_passes <- FALSE
        }
    } else {
        # if all the same time value, messes up other time tests
        message("Range of the time channel is 0.")
        Time_test_passes <- FALSE
    }
    
    if (Time_test_passes == FALSE) {
        message("Time test(s) failed.")
        resTable["Has the file passed", ] <- "Time test(s) failed."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    
    # channels to clean
    CleanChan.loc <- (seq_len(ncol(f)))[-c(FSC.loc, SSC.loc, Time.loc, all.Time.loc, 
        Extra.loc, NoVariation, MonotonicWithTime)]
    
    if (length(CleanChan.loc) == 0) {
        message("No marker channels to run flowCut on.")
        resTable["Has the file passed", ] <- "No marker channels to run flowCut on."
        return(list(frame = f, ind = NULL, data = resTable, worstChan = NULL))
    }
    
    # Use all non scatter/time channels unless otherwise specified
    if (!is.null(Channels)) {
        if (all(is.character(Channels))) {
            Channels <- sort(unique(sapply(seq_len(length(Channels)), function(x) {
                grep(tolower(Channels[x]), tolower(parameters(f)$desc))
            })))
        }
        
        # Forces the user not to use FSC, SSC and Time
        CleanChan.loc <- intersect(CleanChan.loc, Channels)
    }
    
    ind.removed <- NA
    f.org <- f
    
    #### Test if the file is monotonic ########################################
    if (all(exprs(f)[, Time.loc] == cummax(exprs(f)[, Time.loc])) == FALSE && !IgnoreMonotonic) {
        message("The flow frame is not monotonically increasing in time.")
        resTable["Is it monotonically increasing in time", ] <- "F"
    } else {
        resTable["Is it monotonically increasing in time", ] <- "T"
    }
    
    #### Remove low density sections ##########################################
    res.temp <- removeLowDensSections(f, Time.loc = Time.loc, Segment, LowDensityRemoval, 
        Verbose = Verbose)
    f <- res.temp$frame
    removeIndLowDens <- res.temp$rem.ind
    remove(res.temp)
    ifelse(length(removeIndLowDens) >= 1, resTable["Has a low density section been removed", 
        ] <- "T", resTable["Has a low density section been removed", ] <- "F")
    resTable["% of low density removed", ] <- as.character(round(length(removeIndLowDens)/nrow(f.org), 
        digits = 4) * 100)
    
    #### 1st Time: Calculate means and which segments will be removed #########
    res.temp <- calcMeansAndSegmentsRemoved(f = f, Segment = Segment, CleanChan.loc = CleanChan.loc, 
        FirstOrSecond = "First", MaxValleyHgt = MaxValleyHgt, MaxPercCut = MaxPercCut, 
        MaxContin = MaxContin, MeanOfMeans = MeanOfMeans, MaxOfMeans = MaxOfMeans, 
        GateLineForce = GateLineForce, UseOnlyWorstChannels = UseOnlyWorstChannels, 
        AmountMeanRangeKeep = AmountMeanRangeKeep, AmountMeanSDKeep = AmountMeanSDKeep, 
        RemoveMultiSD = RemoveMultiSD, AlwaysClean = AlwaysClean, Verbose = Verbose, 
        Measures = Measures, Time.loc = Time.loc)
    
    deletedSegments1 <- res.temp$deletedSegments
    quantiles1 <- res.temp$quantiles
    storeMeans1 <- res.temp$storeMeans
    meanRangePerc1 <- res.temp$meanRangePerc
    timeCentres1 <- res.temp$timeCentres
    typeOfGating <- res.temp$typeOfGating
    densityGateLine <- res.temp$densityGateLine
    cellDelete2 <- res.temp$cellDelete2
    choosenChans <- res.temp$choosenChans
    remove(res.temp)
    
    #### Now delete segments that are statistically different #################
    removed.ind <- NULL
    totalNumSeg <- floor(nrow(exprs(f))/Segment)
    
    if (length(deletedSegments1) == 0) {
        # If nothing is deleted
        if (Verbose == TRUE) {
            cat("None deleted from flowCut segment removal.\n")
        }
        resTable["How many segments have been removed", ] <- as.character(0)
    } else {
        deletedSegments1 <- sort(unique(deletedSegments1), decreasing = FALSE)
        
        # Turn segments removed into ranges for printing to console
        del.seg.list <- split(deletedSegments1, cumsum(c(1, diff(deletedSegments1) != 
            1)))
        print.segs.rem <- sapply(seq_len(length(del.seg.list)), function(x) {
            if (length(del.seg.list[[x]]) >= 2) {
                paste0(del.seg.list[[x]][1], "-", del.seg.list[[x]][length(del.seg.list[[x]])])
            } else {
                paste0(del.seg.list[[x]][1])
            }
        })
        
        if (Verbose == TRUE) {
            cat(paste0("Removing segments ", paste0(print.segs.rem, collapse = ", "), 
                " out of ", totalNumSeg, " segments.\n"))
        }
        resTable["How many segments have been removed", ] <- as.character(length(deletedSegments1))
        for (n in seq_len(length(deletedSegments1))) {
            if (deletedSegments1[n] == totalNumSeg) 
                removed.ind <- c(removed.ind, (Segment * (deletedSegments1[n] - 1) + 
                  1):nrow(exprs(f)))
            if (deletedSegments1[n] != totalNumSeg) 
                removed.ind <- c(removed.ind, Segment * (deletedSegments1[n] - 1) + 
                  (seq_len(Segment)))
        }
        exprs(f) <- exprs(f)[-removed.ind, ]
    }
    resTable["% of events removed from segments removed", ] <- as.character(round(length(removed.ind)/nrow(f.org), 
        digits = 4) * 100)
    
    #### 2nd Time: Calculate means and which segments will be removed ######### This
    #### time we are only interested in calculations of quantiles, means, mean range
    #### percentage and we do not want to do any deletion
    res.temp <- calcMeansAndSegmentsRemoved(f = f, Segment = Segment, CleanChan.loc = CleanChan.loc, 
        FirstOrSecond = "Second", MaxValleyHgt = MaxValleyHgt, MaxPercCut = MaxPercCut, 
        MaxContin = MaxContin, MeanOfMeans = MeanOfMeans, MaxOfMeans = MaxOfMeans, 
        GateLineForce = GateLineForce, UseOnlyWorstChannels = UseOnlyWorstChannels, 
        AmountMeanRangeKeep = AmountMeanRangeKeep, AmountMeanSDKeep = AmountMeanSDKeep, 
        RemoveMultiSD = RemoveMultiSD, AlwaysClean = AlwaysClean, Verbose = Verbose, 
        Measures = Measures, Time.loc = Time.loc)
    quantiles2 <- res.temp$quantiles
    storeMeans2 <- res.temp$storeMeans
    meanRangePerc2 <- res.temp$meanRangePerc
    timeCentres2 <- res.temp$timeCentres
    remove(res.temp)
    
    #### Continuous Check ##################################################### Check if
    #### there are sudden changes in the mean for the neighbouring segments. If the
    #### difference of the mean of a given segment to the mean of an adjacent segment
    #### divided by the difference of the 98th percentile and the 2nd percentile for the
    #### whole file is larger than a critical value, then the file is flagged.
    
    maxDistJumped <- rep(0, max(CleanChan.loc))
    for (j in CleanChan.loc) {
        temp.vect <- rep(0, length(storeMeans2[[j]]) - 1)
        
        temp.vect <- abs(diff(storeMeans2[[j]]))/(quantiles1[[j]]["98%"] - quantiles1[[j]]["2%"])
        
        maxDistJumped[j] <- max(temp.vect)
    }
    resTable["Largest continuous jump", ] <- as.character(round(max(maxDistJumped, 
        na.rm = TRUE), digits = 3))
    if (resTable["Largest continuous jump", ] >= as.numeric(MaxContin)) {
        resTable["Continuous - Pass", ] <- "F"
        if (Verbose == TRUE) {
            message("The file has been flagged. The largest continuous jump ", "was larger than ", 
                MaxContin * 100, "% of the range of the ", "2-98 percentile of the full data.")
        }
    } else {
        resTable["Continuous - Pass", ] <- "T"
    }
    
    #### Mean of Range of Means Divided by Range of Data ###################### If there
    #### is a large gradual change of the means of the fluorescence in all channels,
    #### then the file is flagged.
    resTable["Mean of % of range of means divided by range of data", ] <- as.character(round(mean(meanRangePerc2, 
        na.rm = TRUE), digits = 3))
    if (resTable["Mean of % of range of means divided by range of data", ] >= as.numeric(MeanOfMeans)) {
        if (Verbose == TRUE) {
            message("The file has been flagged. The means differ more than ", MeanOfMeans * 
                100, "% of the range of the 2-98 percentile of ", "the full data.")
        }
        resTable["Mean of % - Pass", ] <- "F"
    } else {
        resTable["Mean of % - Pass", ] <- "T"
    }
    
    #### Max of Range of Means Divided by Range of Data #### If there is a very large
    #### gradual change of the means of the fluorescence in at least one channels, then
    #### the file is flagged.
    resTable["Max of % of range of means divided by range of data", ] <- round(max(meanRangePerc2, 
        na.rm = TRUE), digits = 3)
    # use 1 because we want the worst marker before any corrections.
    worstChan <- min(which(meanRangePerc1 == max(meanRangePerc1, na.rm = TRUE)))
    names.worschan <- parameters(f)$name[worstChan]
    names(names.worschan) <- NULL
    resTable["Worst channel", ] <- names.worschan
    if (resTable["Max of % of range of means divided by range of data", ] >= MaxOfMeans) {
        if (Verbose == TRUE) {
            message("The file has been flagged. The max ranged means differ ", "more than ", 
                MaxOfMeans * 100, "% of the range of the 2-98 ", "percentile of the full data.")
        }
        resTable["Max of % - Pass", ] <- "F"
    } else {
        resTable["Max of % - Pass", ] <- "T"
    }
    
    #### Organize the indices that have been removed ##########################
    if (is.null(removed.ind) && is.null(removeIndLowDens)) 
        to.be.removed <- NULL
    if (is.null(removed.ind) && !is.null(removeIndLowDens)) 
        to.be.removed <- removeIndLowDens
    if (!is.null(removed.ind) && is.null(removeIndLowDens)) 
        to.be.removed <- removed.ind
    if (!is.null(removed.ind) && !is.null(removeIndLowDens)) {
        # lowDens was removed first
        temp <- setdiff(seq_len(nrow(f.org)), removeIndLowDens)
        to.be.kept <- temp[setdiff(seq_len(length(temp)), removed.ind)]
        to.be.removed <- setdiff(seq_len(nrow(f.org)), to.be.kept)
    }
    
    resTable["% of events removed", ] <- as.character(round(length(to.be.removed)/nrow(f.org), 
        digits = 4) * 100)
    if ("TTTT" == paste0(resTable["Is it monotonically increasing in time", ], resTable["Continuous - Pass", 
        ], resTable["Mean of % - Pass", ], resTable["Max of % - Pass", ])) {
        resTable["Has the file passed", ] <- "T"
    } else {
        resTable["Has the file passed", ] <- "F"
    }
    
    # Keep track of the worse channels by using asterisks
    asterisks <- rep("", max(CleanChan.loc))
    if (UseOnlyWorstChannels == TRUE) {
        asterisks <- sapply(seq_len(max(CleanChan.loc)), function(x) {
            if (length(which(x == choosenChans)) >= 1) {
                asterisks <- " *"
            } else {
                asterisks <- ""
            }
            return(asterisks)
        })
    }
    
    if (Verbose == TRUE) {
        cat("Type of Gating: ", typeOfGating, ".\n", sep = "")
    }
    resTable["Type of Gating", ] <- typeOfGating
    
    #### Plotting #############################################################
    if (resTable["Is it monotonically increasing in time", ] == "T") {
        PassedMono <- "T"
    } else {
        PassedMono <- "F"
    }
    if (resTable["Continuous - Pass", ] == "T") {
        PassedCont <- "T"
    } else {
        PassedCont <- "F"
    }
    if (resTable["Mean of % - Pass", ] == "T") {
        PassedMean <- "T"
    } else {
        PassedMean <- "F"
    }
    if (resTable["Max of % - Pass", ] == "T") {
        PassedMax <- "T"
    } else {
        PassedMax <- "F"
    }
    if (resTable["Has the file passed", ] == "T") {
        FlaggedOrNot <- "Passed"
    } else {
        FlaggedOrNot <- "Flagged"
    }
    
    # the pngName will be passed through the variable 'AllowFlaggedRerun' for the
    # second run to make the second figure have a suffix.
    pngName <- paste0(Directory, "/", gsub(".fcs", "", FileID), "_", FlaggedOrNot, 
        "_", PassedMono, PassedCont, PassedMean, PassedMax, ".png")
    
    if (Plot == "All" || (Plot == "Flagged Only" && FlaggedOrNot == "Flagged")) {
        # z1 and z2 are the dimensions of the figure
        z1 <- ceiling(sqrt(length(CleanChan.loc) + 2))
        if ((z1^2 - z1) >= (length(CleanChan.loc) + 2)) {
            z2 <- z1 - 1
        } else {
            z2 <- z1
        }
        
        suppressWarnings(dir.create(paste0(Directory), recursive = TRUE))
        if (AllowFlaggedRerun != TRUE && AllowFlaggedRerun != FALSE && file.exists(AllowFlaggedRerun)) {
            pngName <- gsub(".png", "_2nd_run.png", pngName)
        }
        
        if (PrintToConsole == FALSE) {
            if (UseCairo == TRUE) {
                CairoPNG(filename = pngName, width = (z1) * 600, height = z2 * 600)
            } else {
                png(filename = pngName, width = (z1) * 600, height = z2 * 600)
            }
            par(mfrow = c(z2, z1), mar = c(7, 7, 4, 2), mgp = c(4, 1.5, 0), oma = c(0, 
                0, 5, 0))
            cex.size <- 3
        } else {
            par(mfrow = c(z2, z1), mar = c(5, 5, 4, 2), mgp = c(3, 1, 0), oma = c(0, 
                0, 5, 0))
            cex.size <- 1.5
        }
        
        for (x in CleanChan.loc) {
            plotDens(f.org, c(Time.loc, x), cex.main = cex.size, cex.lab = cex.size, 
                cex.axis = cex.size, main = paste0(round(meanRangePerc1[x], digits = 3), 
                  " / ", round(meanRangePerc2[x], digits = 3), " (", round(max(maxDistJumped[x]), 
                    digits = 3), ")", asterisks[x]))
            
            if (length(to.be.removed) != 0) 
                points(exprs(f.org)[to.be.removed, c(Time.loc, x)], col = 1, pch = ".")
            if ((length(removeIndLowDens) != 0)) 
                points(exprs(f.org)[removeIndLowDens, c(Time.loc, x)], pch = ".", 
                  cex = 1, col = "grey")
            lines(x = (timeCentres1), y = storeMeans1[[x]], cex.main = cex.size, 
                cex.lab = cex.size, cex.axis = cex.size, lwd = 4, col = "deeppink2")
            lines(x = (timeCentres2), y = storeMeans2[[x]], cex.main = cex.size, 
                cex.lab = cex.size, cex.axis = cex.size, lwd = 4, col = "brown")
            abline(h = c(quantiles1[[x]]["98%"], quantiles1[[x]]["2%"]), lwd = 4, 
                col = "chocolate2")
            abline(h = c(quantiles2[[x]]["98%"], quantiles2[[x]]["2%"]), lwd = 4, 
                col = "chocolate4")
        }
        x <- worstChan
        
        plotDens(f.org, c(Time.loc, x), cex.main = cex.size, cex.lab = cex.size, 
            cex.axis = cex.size, main = paste0("Worst Channel without indices removed"))
        
        temp <- density(cellDelete2, adjust = 1)
        graphics::plot(temp, cex.main = cex.size, cex.lab = cex.size, cex.axis = cex.size, 
            main = "Density of summed measures")
        abline(v = densityGateLine, lwd = 2)
        title(main = paste0(FileID, " ", FlaggedOrNot, " ", PassedMono, PassedCont, 
            PassedMean, PassedMax), outer = TRUE, cex.main = cex.size + 1)
        if (PrintToConsole == FALSE) {
            dev.off()
        } else {
            par(mfrow = c(1, 1), mar = c(5, 5, 4, 2), mgp = c(3, 1, 0))  # back to default
        }
    }
    
    if (Verbose == TRUE) {
        if (resTable["Has the file passed", ] == "T") {
            cat("File Passed\n")
        } else {
            cat(paste0("The file has been flagged ", PassedMono, PassedCont, PassedMean, 
                PassedMax, "\n"))
        }
    }
    
    if (Verbose == TRUE) {
        cat("Cleaning completed in: ", TimePrint(start0), "\n", sep = "")
    }
    if (AllowFlaggedRerun == TRUE && resTable["Has the file passed", ] == "F") {
        if (Verbose == TRUE) {
            cat("Running flowCut a second time.\n")
        }
        res_flowCut <- flowCut(f = f, Segment = Segment, Channels = Channels, Directory = Directory, 
            FileID = FileID, Plot = Plot, MaxContin = MaxContin, MeanOfMeans = MeanOfMeans, 
            MaxOfMeans = MaxOfMeans, MaxValleyHgt = MaxValleyHgt, MaxPercCut = MaxPercCut, 
            LowDensityRemoval = LowDensityRemoval, GateLineForce = GateLineForce, 
            UseOnlyWorstChannels = UseOnlyWorstChannels, AmountMeanSDKeep = AmountMeanSDKeep, 
            AmountMeanRangeKeep = AmountMeanRangeKeep, PrintToConsole = PrintToConsole, 
            AllowFlaggedRerun = pngName, UseCairo = UseCairo, UnifTimeCheck = UnifTimeCheck, 
            RemoveMultiSD = RemoveMultiSD, AlwaysClean = AlwaysClean, IgnoreMonotonic = IgnoreMonotonic, 
            MonotonicFix = MonotonicFix, Measures = Measures, Verbose = Verbose)
        if (res_flowCut$data["Has the file passed", ] == "Time test(s) failed.") {
            message("Time test(s) failed on the second run. Returning results from the first run for flowCut.")
            return(list(frame = f, ind = to.be.removed, data = resTable, worstChan = worstChan))
        }
        
        indOfInd <- setdiff(seq_len(nrow(f.org)), to.be.removed)
        indOfInd <- sort(c(indOfInd[res_flowCut$ind], to.be.removed))
        
        resTableOfResTable <- res_flowCut$data
        # if ( as.numeric(res_flowCut$data['Largest continuous jump', ]) <
        # as.numeric(resTable['Largest continuous jump', ])){ resTableOfResTable['Largest
        # continuous jump', ] <- resTable['Largest continuous jump', ] } if (
        # as.numeric(res_flowCut$data['Mean of % of range of means divided by range of
        # data', ]) < as.numeric(resTable['Mean of % of range of means divided by range
        # of data', ])){ resTableOfResTable['Mean of % of range of means divided by range
        # of data', ] <- resTable['Mean of % of range of means divided by range of data',
        # ] } if ( as.numeric(res_flowCut$data['Max of % of range of means divided by
        # range of data', ]) < as.numeric(resTable['Max of % of range of means divided by
        # range of data', ])){ resTableOfResTable['Max of % of range of means divided by
        # range of data', ] <- resTable['Max of % of range of means divided by range of
        # data', ] }
        resTableOfResTable["% of low density removed", ] <- as.character(round((nrow(f) * 
            as.numeric(res_flowCut$data["% of low density removed", ]) + nrow(f.org) * 
            as.numeric(resTable["% of low density removed", ]))/nrow(f.org), digits = 4))
        resTableOfResTable["How many segments have been removed", ] <- as.character(as.numeric(res_flowCut$data["How many segments have been removed", 
            ]) + as.numeric(resTable["How many segments have been removed", ]))
        resTableOfResTable["% of events removed from segments removed", ] <- as.character(round((nrow(f) * 
            as.numeric(res_flowCut$data["% of events removed from segments removed", 
                ]) + nrow(f.org) * as.numeric(resTable["% of events removed from segments removed", 
            ]))/nrow(f.org), digits = 4))
        resTableOfResTable["% of events removed", ] <- as.character(round((nrow(f) * 
            as.numeric(res_flowCut$data["% of events removed", ]) + nrow(f.org) * 
            as.numeric(resTable["% of events removed", ]))/nrow(f.org), digits = 4))
        resTableOfResTable["Was the file run twice", ] <- "Yes"
        return(list(frame = res_flowCut$frame, ind = indOfInd, data = resTableOfResTable, 
            worstChan = res_flowCut$worstChan))
    }
    return(list(frame = f, ind = to.be.removed, data = resTable, worstChan = worstChan))
}


## Remove sections that have a very low density 
## The sections that have a density of less than LowDensityRemoval (defaulted at 10%) 
## of the maximum density are removed.  An additional 2.5% of the range of these low density regions on
## either side is also removed because it is very common to have a burst of events
## before or after these low density sections.  Because the density functions
## indices do not match with the flowFrames indices, density function indices need
## to convert to flowFrame indices.
removeLowDensSections <- function(f, Time.loc, Segment = 500, LowDensityRemoval = 0.1, 
    Verbose = FALSE) {
    
    if (LowDensityRemoval == 0) {
        # dont want to remove low density on the rerun
        return(list(frame = f, rem.ind = NULL))
    }
    
    # Time.loc <- which(tolower(f@parameters@data$name) == 'time') names(Time.loc) <-
    # NULL
    
    # only plays a role if users are using removeLowDensSections independently of
    # flowCut if(length(Time.loc) == 0){ message('There is no time channel.
    # removeLowDensSections only works ', 'with a time channel.')
    # return(list(frame=f, rem.ind=NULL)) }
    
    minTime <- min(exprs(f)[, Time.loc])
    maxTime <- max(exprs(f)[, Time.loc])
    
    # Change flow frame data into a density
    dens.f <- density(exprs(f)[, Time.loc], n = nrow(f), adjust = 0.1)
    # Extracting indices that has density values below 10% or a user-specified
    # threshold
    low.dens.removeIndLowDens <- which(dens.f$y <= LowDensityRemoval * max(dens.f$y))
    
    # Split the consectutive sections into separate elements in the list
    range.low.dens <- split(low.dens.removeIndLowDens, cumsum(c(1, diff(low.dens.removeIndLowDens) != 
        1)))
    
    # If there are indices left in range.low.dens to be removed
    if (length(range.low.dens) != 0) {
        
        # change groups of indices to ranges
        range.low.dens <- lapply(seq_len(length(range.low.dens)), function(x) {
            range(range.low.dens[[x]])
        })
        # Add 2.5% each way to the range.
        range.low.dens <- lapply(seq_len(length(range.low.dens)), function(x) {
            range.temp <- range.low.dens[[x]][2] - range.low.dens[[x]][1]
            
            # if (range.temp <= 0.25 *(maxTime-minTime)){ # only add buffer if range is less
            # than 25% of the total time range new.range <- c(
            # max(round(range.low.dens[[x]][1]-0.025*range.temp), 1),
            # min(round(range.low.dens[[x]][2]+0.025*range.temp), length(dens.f$y)) ) } else
            # {
            new.range <- c(max(round(range.low.dens[[x]][1]), 1), min(round(range.low.dens[[x]][2]), 
                length(dens.f$y)))
            # }
            return(new.range)
        })
        
        # Change density function indices to time coordinates
        range.low.dens <- lapply(seq_len(length(range.low.dens)), function(x) {
            c(dens.f$x[range.low.dens[[x]][1]], dens.f$x[range.low.dens[[x]][2]])
        })
        
        removeIndLowDens <- NULL
        
        # Change time coordinates to flowframe indices, removeIndLowDens contains
        # flowframe indices to be removed
        time_pointsloc <- exprs(f)[, Time.loc]
        for (b2 in seq_len(length(range.low.dens))) {
            removeIndLowDens <- c(removeIndLowDens, intersect(which(time_pointsloc >= 
                range.low.dens[[b2]][1]), which(time_pointsloc <= range.low.dens[[b2]][2])))
        }
        removeIndLowDens <- unique(removeIndLowDens)
        
        if (length(removeIndLowDens) == 0) {
            if (Verbose == TRUE) {
                cat("None deleted from flowCut low dens removal.\n")
            }
        } else if (length(removeIndLowDens) == nrow(f)) {
            if (Verbose == TRUE) {
                cat("Low density removal removed all events. Probably a spike in the time channel. Therefore, removing no events.\n")
            }
            removeIndLowDens <- NULL
        } else if (length(removeIndLowDens) > 0.5 * nrow(f)) {
            if (Verbose == TRUE) {
                cat("Low density removal removed more than half of the events. Probably a spike in the marker channels. Therefore, removing no events.\n")
            }
            removeIndLowDens <- NULL
        } else if ((nrow(f) - length(removeIndLowDens)) < Segment * 3) {
            if (Verbose == TRUE) {
                cat("Low density removal removed too many events. If we allowed removal of all low density events, then there", 
                  "would be less than Segment*3 events and then many errors would occur in flowCut. Therefore, we are not removing any.\n")
            }
            removeIndLowDens <- NULL
        } else {
            temp.text <- range.low.dens  # Create temp.text for printing to console purposes
            
            # Check if the ranges of time coordinates exceed the maxTime and minTime, if so,
            # update the end points
            if (temp.text[[length(temp.text)]][1] < maxTime && temp.text[[length(temp.text)]][2] > 
                maxTime) {
                temp.text[[length(temp.text)]][2] <- maxTime
            }
            if (temp.text[[length(temp.text)]][1] > maxTime && temp.text[[length(temp.text)]][2] > 
                maxTime) {
                temp.text[[length(temp.text)]] <- NULL
            }
            
            if (temp.text[[1]][1] < minTime && temp.text[[1]][2] > minTime) {
                temp.text[[1]][1] <- minTime
            }
            if (temp.text[[1]][1] < minTime && temp.text[[1]][2] < minTime) {
                temp.text[[1]] <- NULL
            }
            
            for (p1 in seq_len(length(temp.text))) {
                temp.text[[p1]] <- paste(round(temp.text[[p1]], digits = 2), collapse = " to ")
            }
            # Converts to text-form for output
            temp.text <- paste(temp.text, collapse = ", ")
            
            if (Verbose == TRUE) {
                cat(paste0("Removing low density ranges ", temp.text, ".\n"))
            }
            exprs(f) <- exprs(f)[-removeIndLowDens, ]
        }
    } else {
        # If there are no indices to be removed
        if (Verbose == TRUE) {
            cat("None deleted from flowCut low dens removal.\n")
        }
        removeIndLowDens <- NULL
    }
    return(list(frame = f, rem.ind = removeIndLowDens))
}



## Calculate means and which segments will be removed 
## All events are broken down into segments for analysis Eight measures of each segment (mean, median, 5th,
## 20th, 80th and 95th percentile, second moment(variation) and third moment
## (skewness)) are calculated. The regions where these eight measures are
## significantly different from the rest will be removed.
calcMeansAndSegmentsRemoved <- function(f, Segment, CleanChan.loc, FirstOrSecond, 
    MaxValleyHgt, MaxPercCut, MaxContin, MeanOfMeans, MaxOfMeans, GateLineForce, 
    UseOnlyWorstChannels, AmountMeanRangeKeep, AmountMeanSDKeep, RemoveMultiSD, AlwaysClean, 
    Verbose, Measures, Time.loc) {
    
    # f is the flow frame FirstOrSecond - parameter used to determine what
    # calculation or process will be performed
    
    deletedSegments <- NULL
    meanRangePerc <- NULL
    timeCentres <- NULL
    cellDelete <- list()
    storeMeans <- list()
    quantiles <- list()
    totalNumSeg <- floor(nrow(exprs(f))/Segment)
    
    maxDistJumped <- rep(0, max(CleanChan.loc))
    # Calcuate all means except for the last segment.  Each segment containing 500
    # (Segments = 500) events
    
    
    for (k in seq_len(totalNumSeg - 1)) {
        temp <- exprs(f)[Segment * (k - 1) + (seq_len(Segment)), Time.loc]
        # timeCentres contains the means of each segment
        timeCentres[k] <- mean(temp)
    }
    # Calculating the mean of the last segment
    k <- totalNumSeg
    temp <- exprs(f)[(Segment * (k - 1) + 1):nrow(exprs(f)), Time.loc]
    timeCentres[k] <- mean(temp)
    
    for (j in CleanChan.loc) {
        segSummary <- matrix(0, totalNumSeg, 8)
        # Calculating the eight features for each segment except the last one and store
        # them in the matrix
        for (k in seq_len(totalNumSeg - 1)) {
            temp <- exprs(f)[Segment * (k - 1) + (seq_len(Segment)), c(j)]
            segSummary[k, ] <- c(quantile(temp, probs = c(5, 20, 50, 80, 95)/100), 
                mean(temp), sapply(2:3, function(x) {
                  moment(temp, order = x, center = TRUE)
                }))
        }
        # Calculating the eight features for the last segment and store them in the
        # matrix
        k <- totalNumSeg
        temp <- exprs(f)[(Segment * (k - 1) + 1):nrow(exprs(f)), c(j)]
        segSummary[k, ] <- c(quantile(temp, probs = c(5, 20, 50, 80, 95)/100), mean(temp), 
            sapply(2:3, function(x) {
                moment(temp, order = x, center = TRUE)
            }))
        
        storeMeans[[j]] <- segSummary[, 6]
        quantiles[[j]] <- quantile(exprs(f)[, j], probs = c(0, 2, 98, 100)/100)
        meanRangePerc[j] <- (max(storeMeans[[j]]) - min(storeMeans[[j]]))/(quantiles[[j]]["98%"] - 
            quantiles[[j]]["2%"])
        
        if (FirstOrSecond == "First") {
            # calculate Z-scores
            segSummary <- rbind(sapply(seq_len(ncol(segSummary)), function(x) {
                (segSummary[, x] - mean(segSummary[, x]))/sd(segSummary[, x])
            }))
            segSummary <- replace(segSummary, is.nan(segSummary), 0)
            cellDeleteExpo <- abs(segSummary)
            
            # if (j==1){ return(cellDeleteExpo) }
            
            # Sum up each row of the resulting matrix
            cellDeleteExpo <- sapply(seq_len(nrow(cellDeleteExpo)), function(x) {
                sum(cellDeleteExpo[x, Measures]) # sum across a subset of the measures.
            })
            cellDelete[[j]] <- cellDeleteExpo
            
            temp.vect <- rep(0, length(storeMeans[[j]]) - 1)
            temp.vect <- abs(diff(storeMeans[[j]])) / (quantiles[[j]]["98%"] - quantiles[[j]]["2%"])
            maxDistJumped[j] <- max(temp.vect)
        }
    }  # End of for-loop
    
    if (length(cellDelete) > 0) {
        choosenChans <- NULL
        if (UseOnlyWorstChannels == TRUE) {
            if (length(which(!is.na(meanRangePerc))) >= AmountMeanRangeKeep && AmountMeanRangeKeep >= 
                1) {
                choosenChans <- sort(unique(c(choosenChans, sapply(sort(meanRangePerc, 
                  decreasing = TRUE)[seq_len(AmountMeanRangeKeep)], function(x) {
                  which(x == meanRangePerc)
                }))))
            }
            
            sdMeans <- sapply(seq_len(length(storeMeans)), function(x) {
                sd(storeMeans[[x]])
            })
            
            if (length(which(!is.na(sdMeans))) >= AmountMeanSDKeep && AmountMeanSDKeep >= 
                1) {
                choosenChans <- sort(unique(c(choosenChans, sapply(sort(sdMeans, 
                  decreasing = TRUE)[seq_len(AmountMeanSDKeep)], function(x) {
                  which(x == sdMeans)
                }))))
            }
            if (Verbose == TRUE) {
                cat(paste0("The channels that are selected for cutting are: ", paste0(parameters(f)$desc[choosenChans], 
                  collapse = ", "), " (", paste0(choosenChans, collapse = ","), ").\n"))
            }
            
            cellDelete <- cellDelete[choosenChans]
        }
        
        # matrix of Z-scores, segments vs channels (measures summed previously)
        cellDelete1 <- do.call(cbind, cellDelete)
        
        # vector of Z-scores, one for each segment
        cellDelete2 <- NULL
        for (m in seq_len(length(cellDelete1[, 1]))) {
            cellDelete2[m] <- sum(cellDelete1[m, ])
        }
        
        typeOfGating <- NULL
        # Check if the file is nice, before removing anything.  If it is, then we can
        # avoid removing slivers.
        if ((round(max(maxDistJumped, na.rm = TRUE), digits = 3) < MaxContin) && 
            (round(mean(meanRangePerc, na.rm = TRUE), digits = 3) < MeanOfMeans) && 
            (round(max(meanRangePerc, na.rm = TRUE), digits = 3) < MaxOfMeans) && 
            is.null(GateLineForce) && AlwaysClean == FALSE) {
            densityGateLine <- NULL
            
            range_7SD <- c(mean(cellDelete2) - RemoveMultiSD * sd(cellDelete2), mean(cellDelete2) + 
                RemoveMultiSD * sd(cellDelete2))
            densityGateLine <- range_7SD[2]
            deletedSegments <- union(which(cellDelete2 < range_7SD[1]), which(cellDelete2 > 
                range_7SD[2]))
            typeOfGating <- paste0(typeOfGating, paste0(RemoveMultiSD, "SD"))
            cellDelete2.org <- cellDelete2
        } else {
            # Finding outliers by plotting density of the 8 measures, the index of
            # cells that have 8 measures significantly different than the rest will be
            # returned
            peaks_info <- getPeaks(cellDelete2, tinypeak.removal = 0.1)
            peaks_xind <- peaks_info$Peaks
            peaks_value <- which(density(cellDelete2)$x %in% peaks_xind)
            peaks_value <- density(cellDelete2)$y[peaks_value]
            peak_max <- max(density(cellDelete2)$y)
            
            cellDelete2.org <- cellDelete2
            # Create very similar data that wont have a very large value in the distribution
            # if there was one very obvious outlier.  this allows deGate to be able to gate
            # without resolution issues.
            cellDelete2 <- cellDelete2[which(cellDelete2 <= (mean(cellDelete2) + 
                5 * sd(cellDelete2)))]
            
            all_cut <- deGate(cellDelete2, tinypeak.removal = 0.001, all.cuts = TRUE, 
                upper = TRUE, verbose = FALSE, count.lim = 3)
            
            if (!anyNA(all_cut)) {
                # We need to set a limit on peaks because we don't want to overcut
                if (length(peaks_xind) > 1) {
                  sn <- NULL
                  for (ml in seq_len(length(peaks_xind) - 1)) {
                    ind <- intersect(which(all_cut > peaks_xind[ml]), which(all_cut < 
                      peaks_xind[ml + 1]))
                    y_ind <- which(density(cellDelete2)$x %in% all_cut[ind])
                    y_ind <- density(cellDelete2)$y[y_ind]
                    
                    if (abs(peaks_value[ml + 1] - min(y_ind)) < abs(max(peaks_value) - 
                      min(y_ind)) * 0.015) {
                      sn <- c(sn, ind)
                    }
                  }
                  # Remove the peaks that are too small to be counted as a separate population
                  if (length(sn) > 0) {
                    peaks_xind <- peaks_xind[-(sn + 1)]
                    all_cut <- all_cut[-sn]
                    # Remove peaks
                    typeOfGating <- paste0(typeOfGating, "RemPeaks")
                  }
                }
            }
            
            all_cut_value <- which(density(cellDelete2)$x %in% all_cut)
            all_cut_value <- density(cellDelete2)$y[all_cut_value]
            if (anyNA(peaks_xind)) {
                # In case the density is a spike
                densityGateLine <- mean(density(cellDelete2)$x)/20
            } else {
                if (length(peaks_xind) == 1) {
                  upper_cut <- deGate(cellDelete2, tinypeak.removal = 0.1, upper = TRUE, 
                    use.upper = TRUE, alpha = 0.1, percentile = 0.95, verbose = FALSE, 
                    count.lim = 3)
                  if (!is.na(upper_cut)) {
                    # We don't want the upper cut and the all cut to be the same because that means
                    # we are cutting at the 95 percentile
                    if (length(all_cut) == 1 && upper_cut == all_cut) {
                      upper_cut <- deGate(cellDelete2, tinypeak.removal = 0.1, upper = TRUE, 
                        use.upper = TRUE, alpha = 0.05, verbose = FALSE, count.lim = 3)
                      if (upper_cut == all_cut) {
                        upper_cut <- deGate(cellDelete2, tinypeak.removal = 0.1, 
                          upper = TRUE, use.upper = TRUE, alpha = 0.01, verbose = FALSE, 
                          count.lim = 3)
                      }
                    }
                  }
                  
                  if (length(which(all_cut_value < MaxValleyHgt * peak_max)) == 0) {
                    all_cut_min <- all_cut[1]
                  } else {
                    all_cut_min <- all_cut[min(which(all_cut_value < MaxValleyHgt * 
                      peak_max))]
                  }
                  
                  if (!is.na(upper_cut) && !is.na(all_cut_min)) {
                    if (upper_cut >= all_cut_min) {
                      typeOfGating <- paste0(typeOfGating, "Upper")
                    } else {
                      typeOfGating <- paste0(typeOfGating, "AllCutMin")
                    }
                  }
                  
                  densityGateLine <- max(upper_cut, all_cut_min, na.rm = TRUE)
                  
                  if (densityGateLine == -Inf) 
                    {
                      densityGateLine <- NA
                    }  # max returns -Inf if both were NA
                } else {
                  sind <- NULL
                  for (t in all_cut) {
                    # Save the index that are less than the population cut off.
                    if (length(which(cellDelete2 > t)) <= MaxPercCut * length(cellDelete2)) {
                      sind <- c(sind, t)
                    }
                  }
                  densityGateLine <- sind[1]  # Use the min of the index for cut off
                  typeOfGating <- paste0(typeOfGating, "MaxPercCut")
                }
            }
            if (!is.null(GateLineForce)) {
                densityGateLine <- GateLineForce
            }
            deletedSegments <- which(cellDelete2.org > densityGateLine)
        }
    }
    
    if (FirstOrSecond == "Second") {
        return(list(deletedSegments = 0, quantiles = quantiles, storeMeans = storeMeans, 
            meanRangePerc = meanRangePerc, timeCentres = timeCentres))
    }
    deletedSegments <- suppressWarnings(sort(unique(deletedSegments)))
    
    if (length(deletedSegments) != 0) {
        # Split the consectutive sections into separate elements in the list
        range.seg.rem <- split(deletedSegments, cumsum(c(1, diff(deletedSegments) != 
            1)))
        
        size.in.a.row <- 5  # If greater or equal to 5 segments in a row, add 20% to each side
        adding.Segments <- NULL
        
        length.range.seg.rem <- sapply(seq_len(length(range.seg.rem)), function(x) {
            length(range.seg.rem[[x]])
        })
        range.seg.rem.only.5.or.more <- range.seg.rem[which(length.range.seg.rem >= 
            size.in.a.row)]
        
        range.seg.keep <- setdiff(seq_len(totalNumSeg), unlist(range.seg.rem.only.5.or.more))
        range.seg.keep <- split(range.seg.keep, cumsum(c(1, diff(range.seg.keep) != 
            1)))
        
        
        if (length(range.seg.rem.only.5.or.more) >= 1) {
            # Make sure range.seg.keep is first (will be skipped later because it is only
            # length of 1)
            if (min(unlist(range.seg.rem.only.5.or.more)) <= min(unlist(range.seg.keep))) {
                range.seg.keep <- c(0, range.seg.keep)
            }
            # Make sure range.seg.keep is last (will be skipped later because it is only
            # length of 1)
            if (max(unlist(range.seg.rem.only.5.or.more)) >= max(unlist(range.seg.keep))) {
                range.seg.keep <- c(range.seg.keep, max(unlist(range.seg.rem.only.5.or.more)) + 
                  1)
            }
            # Adding buffer region only when the number of consecutive segments in the group
            # to be deleted are quite large
            
            for (b2 in seq_len(length(range.seg.rem.only.5.or.more))) {
                length_section <- length(range.seg.rem.only.5.or.more[[b2]])
                for (j1 in CleanChan.loc) {
                  
                  left.pop <- storeMeans[[j1]][range.seg.keep[[b2]]]
                  rght.pop <- storeMeans[[j1]][range.seg.keep[[b2 + 1]]]
                  rght.pop <- rev(rght.pop)
                  
                  main.pop <- storeMeans[[j1]][range.seg.rem.only.5.or.more[[b2]]]
                  
                  for (side.pop in list(left.pop, rght.pop)) {
                    if (length(side.pop) >= 2) {
                      left.side <- identical(side.pop, left.pop)
                      # Code below works for good part to be lower than bad part, so we might need to
                      # transform our data
                      if (mean(side.pop) >= mean(main.pop)) {
                        side.pop <- -(side.pop - mean(main.pop)) + mean(main.pop)
                      }
                      
                      for (k1 in seq_len(length(side.pop))) {
                        if (length(side.pop) <= 2 || sd(side.pop) == 0 || (max(side.pop) - 
                          min(side.pop) == 0)) {
                          break
                        }
                        if (tail(side.pop, n = 1) >= (mean(side.pop) + sd(side.pop))) {
                          if (left.side) {
                            adding.Segments <- c(adding.Segments, length(side.pop))
                          } else {
                            adding.Segments <- c(adding.Segments, range.seg.keep[[b2 + 
                              1]][k1])
                          }
                          # Remove last point because we want to recalculate mean and sd on progressively
                          # cleaner data
                          side.pop <- side.pop[-length(side.pop)]
                        } else {
                          break
                        }
                      }
                    }
                  }
                }
                adding.Segments <- c(adding.Segments, (min(range.seg.rem.only.5.or.more[[b2]]) - 
                  ceiling(length_section/5)):(min(range.seg.rem.only.5.or.more[[b2]]) - 
                  1), (max(range.seg.rem.only.5.or.more[[b2]]) + 1):(max(range.seg.rem.only.5.or.more[[b2]]) + 
                  ceiling(length_section/5)))
            }
        }
        if (length(adding.Segments) >= 1) {
            deletedSegments <- sort(unique(c(deletedSegments, adding.Segments[intersect(which(adding.Segments >= 
                1), which(adding.Segments <= totalNumSeg))])))
        }
    }
    return(list(deletedSegments = deletedSegments, quantiles = quantiles, storeMeans = storeMeans, 
        typeOfGating = typeOfGating, meanRangePerc = meanRangePerc, timeCentres = timeCentres, 
        densityGateLine = densityGateLine, cellDelete2 = cellDelete2.org, choosenChans = choosenChans))
}


################################################## Time Function # Prints out the time since startTime
TimePrint <- function(startTime) {
    startTime <- as.POSIXct(startTime)
    difft <- difftime(Sys.time(), startTime, units = "secs")
    format(.POSIXct(difft, tz = "GMT"), "%H:%M:%S")
}
jmeskas/flowCut documentation built on March 31, 2022, 11:04 a.m.