R/SCALPEL_stepFunctions.R
In scalpel: Processes Calcium Imaging Data

Documented in scalpel scalpelStep0 scalpelStep1 scalpelStep2 scalpelStep3

#main functions for performing each step of the pipeline
#along with function to call to run entire pipeline

###############################################################################################
## SCALPEL STEP 0: DATA PRE-PROCESSING
###############################################################################################

#' Perform Step 0 of SCALPEL.
#'
#' This step involves data pre-processing. We read in the raw data version of Y and perform standard pre-processing
#' techniques in order to smooth the data both temporally and spatially, remove the bleaching effect,
#' and calculate a standardized fluorescence.
#'
#' @param outputFolder The existing directory where the results should be saved.
#' @param rawDataFolder The directory where the raw data version of Y is saved. The data should be a
#' PxT matrix, where P is the total number of pixels per image frame and T
#' the number of frames of the video, for which the (i,j)th element contains the
#' fluorescence of the ith pixel in the jth frame. To create Y, you should
#' vectorize each 2-dimensional image frame by concatenating the columns of the image frame.  If the data is
#' saved in a single file, it should be named "Y_1.mat", "Y_1.rds", "Y_1.txt", or "Y_1.txt.gz" (depending on \code{fileType}),
#' and if the data is split over multiple files, they should be split into chunks of the columns
#' and named consecutively ("Y_1.mat", "Y_2.mat", etc.; "Y_1.rds", "Y_2.rds", etc.; "Y_1.txt", "Y_2.txt", etc.; or "Y_1.txt.gz", "Y_2.txt.gz", etc.).
#' @param videoHeight The height of the video (in pixels).
#' @param fileType Indicates whether raw data is an .rds (default value; \code{fileType="R"}), .mat (\code{fileType="matlab"}), .txt (\code{fileType="text"}), or
#' .txt.gz (\code{fileType="zippedText"}) file. Any text files should not have row or column names.
#' @param processSeparately Logical scalar giving whether the multiple raw data files should be
#' processed individually, versus all at once. Processing the files separately may be preferable for larger videos.
#' The default value is \code{TRUE}; this argument is ignored if the raw data is saved in a single file.
#'
#' @return An object of class \code{scalpelStep0}, which can be summarized using \code{\link{summary}}, used to run SCALPEL Step 1 using \code{\link{scalpelStep1}},
#' or can be used with the plotting functions \code{\link{plotFrame}}, \code{\link{plotThresholdedFrame}}, and \code{\link{plotVideoVariance}}.
#' \itemize{
#' \item{\code{minRaw, maxRaw, minDeltaf, maxDeltaf}: }{Minimum and maximum values for the raw and processed videos.}
#' \item{\code{partsRaw, partsDeltaf}: }{Vectors indicating the indices of the raw and processed data files, respectively.}
#' \item{\code{nFramesRaw, nFramesDeltaf}: }{The number of frames in each part of the raw and processed data.}
#' \item{\code{lowThreshold, highThreshold}: }{The default lowest and highest threshold values for image segmentation that may be used in Step 1.}
#' \item{Other elements: }{As specified by the user.}
#' }
#'
#' @details Several files containing data from this step and a summary of the step are saved in "outputFolder".
#'
#' @seealso The entire SCALPEL pipeline can be implemented using the \code{\link{scalpel}} function. The
#' other steps in the pipeline can be run using the \code{\link{scalpelStep1}}, \code{\link{scalpelStep2}}, \code{\link{scalpelStep3}} functions.
#' Results from this step can be summarized using \code{\link{summary}}, loaded at a later time using \code{\link{getScalpelStep0}}, and plotted using
#' \code{\link{plotFrame}}, \code{\link{plotThresholdedFrame}}, and \code{\link{plotVideoVariance}}.
#'
#' @import Matrix
#' @import gam
#' @examples
#' \dontrun{
#' ### many of the functions in this package are interconnected so the
#' ### easiest way to learn to use the package is by working through the vignette,
#' ### which is available at ajpete.com/software
#'
#' #existing folder to save results (update this to an existing folder on your computer)
#' outputFolder = "scalpelResultsStepByStep"
#' #location on computer of raw data in R package to use
#' rawDataFolder = gsub("Y_1.rds", "", system.file("extdata", "Y_1.rds", package = "scalpel"))
#' #video height of raw data in R package
#' videoHeight = 30
#' #run Step 0 of SCALPEL
#' Step0Out = scalpelStep0(outputFolder = outputFolder,
#'                        rawDataFolder = rawDataFolder, videoHeight = videoHeight)
#' summary(Step0Out)
#' }
#' @export
scalpelStep0 = function(outputFolder, rawDataFolder, videoHeight, fileType="R", processSeparately=TRUE) {

  message("Beginning Step 0 of SCALPEL")

  #check function arguments
  #check that folders end in "/"
  if (substr(outputFolder, nchar(outputFolder), nchar(outputFolder))!="/") outputFolder = paste0(outputFolder, "/")
  if (substr(rawDataFolder, nchar(rawDataFolder), nchar(rawDataFolder))!="/") rawDataFolder = paste0(rawDataFolder, "/")
  if (!dir.exists(outputFolder)) stop("Specify an existing folder for 'outputFolder'")
  if (dir.exists(paste0(outputFolder, "Step0Data")) | file.exists(paste0(outputFolder, "Step0Summary.txt"))) {
    overwrite = UIinputYesNo(paste0("Results already exist in ",outputFolder,".\n Should we overwrite them? Enter 'Y' or 'N': \n"))
    if (overwrite==FALSE) {
      stop("Results not overwritten. Please rerun with a different 'outputFolder'.")
    } else {
      #delete existing summary file, Step0 data directory, and Step1 directories (if they exist)
      if (dir.exists(paste0(outputFolder, "Step0Data"))) unlink(paste0(outputFolder, "Step0Data"), recursive = TRUE)
      if (file.exists(paste0(outputFolder, "Step0Summary.txt"))) unlink(paste0(outputFolder, "Step0Summary.txt"), recursive = TRUE)
      allStep1Folders = list.dirs(outputFolder, full.names = FALSE, recursive = FALSE)
      allStep1Folders = allStep1Folders[grep("Step1_", allStep1Folders)]
      for (folder in allStep1Folders) unlink(paste0(outputFolder, folder), recursive = TRUE)
    }
  }
  if (!is.logical(processSeparately)) stop("Specify TRUE or FALSE for 'processSeparately'")
  if (fileType!="matlab" & fileType!="R" & fileType!="text" & fileType!="zippedText") stop("Specify 'R', 'matlab', 'text', or 'zippedText' for 'fileType'")

  #create subdirectories in outputFolder to save results
  dir.create(paste0(outputFolder,"Step0Data/"))
  fileTypeEnd = ifelse(fileType=="R", ".rds", ifelse(fileType=="matlab", ".mat", ifelse(fileType=="text", ".txt", ".txt.gz")))
  if (!file.exists(paste0(rawDataFolder, "Y_1", fileTypeEnd))) stop(paste0("Raw data should be saved as 'Y_1",fileTypeEnd,"', 'Y_2",fileTypeEnd,"', etc. in 'rawDataFolder'"))

  if (!dir.exists(paste0(outputFolder,"timings/"))) dir.create(paste0(outputFolder,"timings/"))
  utils::write.table(1,paste0(outputFolder,"timings/Step0_0_start.txt"))

  #figure out how many parts the raw data are split into
  numParts = 0
  while(file.exists(paste0(rawDataFolder, "Y_", numParts+1, fileTypeEnd))) {
    numParts = numParts + 1
  }
  message("Found ", numParts, " raw data files")

  partsRaw = 1:numParts
  if(processSeparately==FALSE) partsDeltaf = 1 else partsDeltaf = 1:numParts
  nFramesRaw = rep(NA, length(partsRaw))
  maxValBleach = rep(NA, length(partsDeltaf))
  minRaw <- maxRaw <- minDeltaf <- maxDeltaf <- NA
  thresholds = matrix(NA, nrow=max(partsDeltaf), ncol=2)

  for (part in partsDeltaf) {

    message("Performing processing on part ", part, " of ", max(partsDeltaf))
    message("Reading in raw data")
    #read in original Y matrix (no pre-processing) from Matlab file
    #should be a matrix with nrow=number of pixels, ncol=number of frames
    if (processSeparately==FALSE) {
      for (partRead in 1:numParts) {
        if (fileType=="matlab") {
          Ypart = R.matlab::readMat(paste0(rawDataFolder,"Y_",partRead,".mat"))
          if (length(names(Ypart))>1) stop(paste0(c("There are multiple variables saved in this Matlab file: ",names(Ypart)), collapse = " "))
          numRows = nrow(with(Ypart, get(names(Ypart))))
          if (numRows%%videoHeight!=0) {
            stop(paste0("There are ",numRows," rows in the matrix saved in 'Y_",
                        part,".mat' and the number of rows must be divisble by 'videoHeight', which is ",videoHeight))
          }
          if (partRead>1) Y = cbind(Y, with(Ypart, get(names(Ypart)))) else Y = with(Ypart, get(names(Ypart)))
          nFramesRaw[partRead] = ncol(with(Ypart, get(names(Ypart))))
        } else if (fileType=="R") {
          Ypart = readRDS(paste0(rawDataFolder,"Y_",partRead,".rds"))
          if (nrow(Ypart)%%videoHeight!=0) {
            stop(paste0("There are ",nrow(Ypart)," rows in the matrix saved in 'Y_",
                        part,".rds' and the number of rows must be divisble by 'videoHeight', which is ",videoHeight))
          }
          if (partRead>1) Y = cbind(Y, Ypart) else Y = Ypart
          nFramesRaw[partRead] = ncol(Ypart)
        } else {
          Ypart = utils::read.table(paste0(rawDataFolder,"Y_",partRead,fileTypeEnd))
          if (nrow(Ypart)%%videoHeight!=0) {
            stop(paste0("There are ",nrow(Ypart)," rows in the matrix saved in 'Y_",
                        part,fileTypeEnd,"' and the number of rows must be divisble by 'videoHeight', which is ",videoHeight))
          }
          if (partRead>1) Y = cbind(Y, Ypart) else Y = Ypart
          nFramesRaw[partRead] = ncol(Ypart)
        }
        rm(Ypart)
      }
      minRaw = min(Y); maxRaw = max(Y)
    } else {
      if (fileType=="matlab") {
        Y = R.matlab::readMat(paste0(rawDataFolder,"Y_",part,".mat"))
        if (length(names(Y))>1) stop(paste0(c("There are multiple variables saved in this Matlab file: ",names(Y)), collapse = " "))
        Y = with(Y, get(names(Y)))
        if (nrow(Y)%%videoHeight!=0) stop(paste0("The number of rows of the matrix saved in 'Y_",
                                                 part,".mat' should be divisble by 'videoHeight'"))
      } else if (fileType=="R") {
        Y = readRDS(paste0(rawDataFolder,"Y_",part,".rds"))
        if (nrow(Y)%%videoHeight!=0) stop(paste0("The number of rows of the matrix saved in 'Y_",
                                                 part,".rds' should be divisble by 'videoHeight'"))
      } else {
        Y = utils::read.table(paste0(rawDataFolder,"Y_",part,fileTypeEnd))
        if (nrow(Y)%%videoHeight!=0) stop(paste0("The number of rows of the matrix saved in 'Y_",
                                                 part,fileTypeEnd,"' should be divisble by 'videoHeight'"))
      }
      nFramesRaw[part] = ncol(Y)
      minRaw = min(c(minRaw, min(Y)), na.rm = T)
      maxRaw = max(c(maxRaw, max(Y)), na.rm = T)
    }

    #shift so that minimum value is 0
    message("Y has ",nrow(Y)," total pixels and ",ncol(Y)," frames")
    Y = Y - min(Y)
    utils::write.table(1,paste0(outputFolder,"timings/Step0_part",part,"_1_readdata.txt"))

    message("Performing temporal and spatial smoothing")
    #perform temporal and spatial smoothing
    prepped = imageFastSmoothHelper(m=videoHeight, n=nrow(Y)/videoHeight)
    for (frame in 1:ncol(Y)) {
      if (frame%%100==0) message("Frame ", frame, " of ", ncol(Y))
      Y[,frame] = as.vector(imageFastSmooth(matrix(Y[,frame], nrow=videoHeight), prepped))
    }
    prepped = imageFastSmoothHelper(1, ncol(Y))
    for (pixel in 1:nrow(Y)) {
      if (pixel%%1000==0) message("Pixel ", pixel, " of ", nrow(Y))
      Y[pixel,] = as.vector(imageFastSmooth(matrix(Y[pixel,],nrow=1), prepped))
    }
    rm(prepped)
    utils::write.table(1,paste0(outputFolder,"timings/Step0_part",part,"_2_smoothed.txt"))

    message("Adjusting for possible bleaching effect")
    #adjust for bleaching effect
    bleachVec = apply(Y, 2, stats::median)
    frames = 1:length(bleachVec)
    bleachModel = gam(bleachVec~s(frames,10))
    #nobleachY = t(t(Y) - bleachModel$fitted.values + max(bleachModel$fitted.values))
    nobleachY = t(t(Y) - bleachModel$fitted.values)
    maxValBleach[part] = max(bleachModel$fitted.values)
    rm(list=c('Y', 'bleachVec', 'frames', 'bleachModel'))
    utils::write.table(1,paste0(outputFolder,"timings/Step0_part",part,"_3_bleacheffect.txt"))
    #save nobleachY if there are multiple parts b/c we need to figure out the max value
    if (length(partsDeltaf)>1) {
      saveRDS(nobleachY, file=paste0(outputFolder,"Step0Data/Ynobleach_part",part,".rds"))
      rm(nobleachY)
    }
  }
  for (part in partsDeltaf) {
    if (length(partsDeltaf)>1) {
      nobleachY = readRDS(paste0(outputFolder,"Step0Data/Ynobleach_part",part,".rds"))
      unlink(paste0(outputFolder,"Step0Data/Ynobleach_part",part,".rds"))
    }
    nobleachY = nobleachY + max(maxValBleach)
    saveRDS(maxValBleach, paste0(outputFolder,"Step0Data/maxValBleach.rds"))
    message("Performing 'delta f over f' transformation")
    #perform our version of 'delta f over f' transformation
    adjFactor = stats::quantile(nobleachY, probs=0.1)
    deltafoverfY = t(apply(nobleachY, 1, function(vec, adjFactor)
      (vec - stats::median(vec))/(stats::median(vec) + adjFactor), adjFactor=adjFactor))
    rm(list=c('nobleachY','adjFactor'))
    utils::write.table(1,paste0(outputFolder,"timings/Step0_part",part,"_4_deltaf.txt"))

    #save processed data
    saveRDS(deltafoverfY, file=paste0(outputFolder,"Step0Data/Ydeltaf_part",part,".rds"))
    minDeltaf = min(c(minDeltaf, min(deltafoverfY)), na.rm = T)
    maxDeltaf = max(c(maxDeltaf, max(deltafoverfY)), na.rm = T)
    thresholds[part,] = c(-stats::quantile(c(deltafoverfY), probs=0.001), -min(deltafoverfY))
  }

  if (processSeparately==TRUE) nFramesDeltaf = nFramesRaw else nFramesDeltaf = sum(nFramesRaw)
  lowThreshold = max(thresholds[,1])
  highThreshold = max(thresholds[,2])

  #summary file of data
  if (max(partsRaw)==1) {
    #1 raw data file
    beginning = paste0("We found 1 raw data file, which had ", nFramesRaw, " frames. Thus 1 processed data file with ", nFramesDeltaf, " frames was produced.")
  } else if (processSeparately==TRUE) {
    #>1 raw data file, processed separately
    beginning = paste0("We found ", max(partsRaw), " raw data files, which were processed separately, and had ", paste0(nFramesRaw, collapse = ", "), " frames, respectively. ",
                       "Thus ", max(partsDeltaf), " processed data files with ", paste0(nFramesDeltaf, collapse = ", "), " frames, respectively, were produced.")
  } else {
    #>1 raw data file, processed together
    beginning = paste0("We found ", max(partsRaw), " raw data files, which were processed all at once, and had ", paste0(nFramesRaw, collapse = ", "), " frames, respectively. ",
                       "Thus ", max(partsDeltaf), " processed data file with ", nFramesDeltaf, " frames was produced.")
  }
  fileConn = file(paste0(outputFolder, "Step0Summary.txt"))
  writeLines(c("Step 0 of SCALPEL","","SUMMARY",
               paste0(beginning, " The video has a height of ", videoHeight, " and a width of ", nrow(deltafoverfY)/videoHeight,
                      ". The minimum and maximum values of the raw data were ", signif(minRaw, 3), " and ", signif(maxRaw, 3), ", respectively."),
               "", "ARGUMENTS USED", paste0("Raw data location: ", rawDataFolder), paste0("Raw data file type: ", fileType),
               paste0("Video height: ",videoHeight), paste0("Process raw data files separately? ",ifelse(processSeparately==TRUE,"Yes","No")),
               paste0("Output files location: ", outputFolder)), fileConn)
  close(fileConn)

  #save variables for later use
  saveRDS(rawDataFolder, file=paste0(outputFolder,"Step0Data/rawDataFolder.rds"))
  saveRDS(videoHeight, file=paste0(outputFolder,"Step0Data/videoHeight.rds"))
  saveRDS(partsRaw, file=paste0(outputFolder,"Step0Data/partsRaw.rds"))
  saveRDS(partsDeltaf, file=paste0(outputFolder,"Step0Data/partsDeltaf.rds"))
  saveRDS(nFramesRaw, file=paste0(outputFolder,"Step0Data/nFramesRaw.rds"))
  saveRDS(nFramesDeltaf, file=paste0(outputFolder,"Step0Data/nFramesDeltaf.rds"))
  saveRDS(minRaw, file=paste0(outputFolder,"Step0Data/minRaw.rds"))
  saveRDS(maxRaw, file=paste0(outputFolder,"Step0Data/maxRaw.rds"))
  saveRDS(minDeltaf, file=paste0(outputFolder,"Step0Data/minDeltaf.rds"))
  saveRDS(maxDeltaf, file=paste0(outputFolder,"Step0Data/maxDeltaf.rds"))
  saveRDS(fileType, file=paste0(outputFolder,"Step0Data/fileType.rds"))
  saveRDS(highThreshold, file=paste0(outputFolder,"Step0Data/highThreshold.rds"))
  saveRDS(lowThreshold, file=paste0(outputFolder,"Step0Data/lowThreshold.rds"))

  #return variables needed for Step 1
  output = getScalpelStep0(outputFolder=outputFolder)

  utils::write.table(1,paste0(outputFolder,"timings/Step0_5_end.txt"))
  message("Step 0 of SCALPEL finished!")

  return(output)
}

###############################################################################################
## SCALPEL STEP 1: CONSTRUCTION OF A SPATIAL COMPONENT DICTIONARY
## PERFORM IMAGE SEGMENTATION
###############################################################################################

#' Perform Step 1 of SCALPEL.
#'
#' This step involves constructing a spatial component dictionary. We apply a simple image segmentation
#' procedure to each frame of the video in order to derive a dictionary of preliminary dictionary elements.
#' Ideally, this dictionary is a superset of the true spatial components.
#'
#' @param step0Output An object of class \code{scalpel} or \code{scalpelStep0}, which result from running the
#' \code{\link{scalpel}} or \code{\link{scalpelStep0}} functions, respectively.
#' @param minSize,maxSize The minimum and maximum size, respectively, for a preliminary dictionary element with default values
#' of 25 and 500, respectively.
#' @param maxWidth,maxHeight The maximum width and height, respectively, for a preliminary dictionary element with default values of 30.
#' @param thresholdVec Optional advanced user argument: A vector with the desired thresholds to use for image segmentation. If not specified, the default is to
#' use the negative of the minimum of the processed Y data (i.e., \code{step0Output$highThreshold}), the negative of the 0.1\% quantile of the processed Y data
#' (i.e., \code{step0Output$lowThreshold}), and the mean of these. These automatically chosen thresholds can also be updated using \code{\link{updateThreshold}}.
#'
#' @return An object of class \code{scalpelStep1}, which can be summarized using \code{\link{summary}}, used to run SCALPEL Step 2 using \code{\link{scalpelStep2}},
#' or can be used with the plotting function \code{\link{plotCandidateFrame}}.
#' \itemize{
#' \item{\code{Azero}: }{A matrix containing the preliminary dictionary elements, where the ith column of \code{Azero}
#' is a vector of 1's and 0's, indicating whether each pixel is contained in the ith preliminary dictionary element.}
#' \item{\code{AzeroFrames}: }{A vector whose ith element gives the video frame from which the preliminary dictionary element
#' in the ith column of \code{Azero} was derived.}
#' \item{\code{AzeroThreshold}: }{A vector whose ith element gives the threshold used to obtain the preliminary dictionary element
#' in the ith column of \code{Azero}.}
#' \item{\code{pixelsUse}: }{A vector with the pixels (i.e., indices of the rows of \code{Azero}) that are contained in at
#' least one preliminary dictionary element.}
#' \item{\code{version}: }{A 5-digit unique identifier for the output folder name that is automatically generated in this step.}
#' \item{Other elements: }{As specified by the user or returned from a previous step.}
#' }
#'
#' @details Several files containing data from this step and a summary of the step are saved in
#' "outputFolder/Step1_version" where \code{version} is a 5-digit unique identifier that is automatically generated.
#'
#' @seealso The entire SCALPEL pipeline can be implemented using the \code{\link{scalpel}} function. The
#' other steps in the pipeline can be run using the \code{\link{scalpelStep0}}, \code{\link{scalpelStep2}}, \code{\link{scalpelStep3}} functions.
#' Results from this step can be summarized using \code{\link{summary}}, loaded at a later time using \code{\link{getScalpelStep1}}, and plotted
#' using \code{\link{plotCandidateFrame}}.
#'
#' @examples
#' \dontrun{
#' ### many of the functions in this package are interconnected so the
#' ### easiest way to learn to use the package is by working through the vignette,
#' ### which is available at ajpete.com/software
#'
#' #assumes you have run the example for the "scalpelStep0" function
#'
#' #run Step 1 of SCALPEL
#' Step1Out = scalpelStep1(step0Output = Step0Out)
#' summary(Step1Out)
#' }
#' @import Matrix
#' @export
scalpelStep1 = function(step0Output, minSize=25, maxSize=500, maxWidth=30, maxHeight=30, thresholdVec=NULL) {

  message("Beginning Step 1 of SCALPEL")
  utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_0_start.txt"))

  #check function arguments
  if (class(step0Output)!="scalpel" & class(step0Output)!="scalpelStep0") stop("The class of 'step0Output' must be 'scalpel' or 'scalpelStep0'")
  if (minSize<25) {
    message("Note 'minSize' is too small -- a minimum size of 25 pixels will be used")
    minSize = 25
  }
  if (minSize>=maxSize) stop("Must choose 'minSize' to be smaller than 'maxSize'")

  #generate 5-digit unique identifier for folder name
  folderCreated = FALSE
  while(!folderCreated) {
    version = as.numeric(paste(sample(1:9, 5, replace=TRUE), collapse=""))
    #create folder
    if (!dir.exists(paste0(step0Output$outputFolder,"Step1_",version,"/"))) {
      dir.create(paste0(step0Output$outputFolder,"Step1_",version,"/"))
      folderCreated = TRUE
    }
  }
  step1folder = paste0(step0Output$outputFolder,"Step1_",version,"/Step1Data/")
  dir.create(step1folder)

  totalFrames = 0
  for (part in step0Output$partsDeltaf) {
    message("Performing image segmentation on part ", part, " of ", max(step0Output$partsDeltaf))

    #load in delta f over f
    deltafoverfY = readRDS(paste0(step0Output$outputFolder,"Step0Data/Ydeltaf_part",part,".rds"))
    totalFrames = totalFrames + ncol(deltafoverfY)

    #calculate thresholdVec (vector of thresholds to use in image segmentation) if not provided
    #default is to use negative of minimum of delta f, negative of 0.1% quantile of delta f, and mean of these
    #if (is.null(thresholdVec)) thresholdVec = seq(-stats::quantile(c(deltafoverfY), probs=0.001), -min(deltafoverfY), len=3)
    if (is.null(thresholdVec)) thresholdVec = seq(step0Output$lowThreshold, step0Output$highThreshold, len=3)
    utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_part",part,"_1_threshold.txt"))

    #store preliminary dictionary elements and frame from which component is derived
    AList = vector("list", length=length(thresholdVec))
    AframeList = vector("list", length=length(thresholdVec))

    #perform the image segmentation for each of the values in thresholdVec
    for (qindex in seq(thresholdVec)) {

      message("Performing segmentation for threshold ", qindex, " of ", length(thresholdVec))
      utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_part",part,"_2_qindex_",qindex,".txt"))

      threshold = thresholdVec[qindex]
      #create binary image
      thresholdY = (deltafoverfY>=threshold)
      thresholdYSp = Matrix(thresholdY, sparse=TRUE)
      rm('thresholdY')

      #extract preliminary dictionary elements from each frame
      #based on finding connected components of size 25 pixels or greater
      AList[[qindex]] = Matrix(0, nrow=nrow(deltafoverfY), ncol=60000, sparse=TRUE)
      AframeList[[qindex]] = rep(NA, 60000)

      #only consider frames which have at least 25 pixels above the threshold
      brightFrames = which(colSums(thresholdYSp)>=25)
      nframes = length(brightFrames); counter = 1

      #column index for AList
      index = 1

      for (frame in brightFrames) {
        if (counter%%100==0) message("Segmenting frame ", counter, " of ", nframes, " bright frames")
        counter = counter + 1
        #get preliminary dictionary elements
        comps = getSpatialComponents(frameVec = thresholdYSp[,frame], videoHeight = step0Output$videoHeight,
                                     connectivity = 4, minSize = minSize, maxSize = maxSize,
                                     maxWidth = maxWidth, maxHeight = maxHeight)

        #add preliminary dictionary elements to AList
        if (ncol(comps)>0) {
          AList[[qindex]][,index:(index+ncol(comps)-1)] = comps
          AframeList[[qindex]][index:(index+ncol(comps)-1)] = frame + totalFrames - ncol(deltafoverfY)
          index = index + ncol(comps)
        }
      }
      rm('thresholdYSp')

      #### updated 11/29/17: account for possibly not finding any prelim elements
      #### BEGINNING OF UPDATES
      if (length(which(colSums(AList[[qindex]])>0))>0) { 
        filled = 1:max(which(colSums(AList[[qindex]])>0))
        AList[[qindex]] = AList[[qindex]][,filled]
        AframeList[[qindex]] = AframeList[[qindex]][filled]
      } else {
        AList[[qindex]] = NULL
        AframeList[[qindex]] = NULL
      }
    }
    utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_part",part,"_3_finishsegmentation.txt"))
    rm('deltafoverfY')

    #combine components from different thresholds
    A = AList[[1]]
    Aframes = AframeList[[1]]
    if (!is.null(AList[[1]])) Athreshold = rep(thresholdVec[1], ncol(AList[[1]])) else Athreshold = c()

    if (length(AList)>1) {
      for (i in 2:length(AList)) {
        A = cbind(A, AList[[i]])
        Aframes = c(Aframes, AframeList[[i]])
        if (!is.null(AList[[i]])) Athreshold = c(Athreshold, rep(thresholdVec[i], ncol(AList[[i]])))
      }
    }
    if (is.null(A)) stop("No preliminary neurons found during Step 1. Different 'thresholdVec' needed.")
    #### END OF UPDATES
    
    saveRDS(A, paste0(step1folder,"Azero_part",part,".rds"))
    saveRDS(Aframes, paste0(step1folder,"AzeroFrames_part",part,".rds"))
    saveRDS(Athreshold, paste0(step1folder,"AzeroThreshold_part",part,".rds"))
    rm(list=c('AList','AframeList','A','Aframes','Athreshold'))
  }

  utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_4_startcombine.txt"))

  #now combine preliminary dictionary elements from all parts in scalpelOutput$partsDeltaf
  # and figure out which pixels from the video are needed (pixelsUse)
  for (part in step0Output$partsDeltaf) {
    if (part==1) {
      A = readRDS(paste0(step1folder,"Azero_part",part,".rds"))
      Aframes = readRDS(paste0(step1folder,"AzeroFrames_part",part,".rds"))
      Athreshold = readRDS(paste0(step1folder,"AzeroThreshold_part",part,".rds"))
    } else {
      A = cbind(A, readRDS(paste0(step1folder,"Azero_part",part,".rds")))
      Aframes = c(Aframes, readRDS(paste0(step1folder,"AzeroFrames_part",part,".rds")))
      Athreshold = c(Athreshold, readRDS(paste0(step1folder,"AzeroThreshold_part",part,".rds")))
    }
  }
  #the pixels that are non-zero in at least one preliminary dictionary element
  pixelsUse = which(rowSums(A)!=0)
  utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_5_endcombine.txt"))

  message("Saving data needed in Step 2 of SCALPEL")
  #create a matrix with the whole video
  #but only the pixels which are in at least one preliminary dictionary element
  zeroCut = min(thresholdVec)
  wholedeltafoverfY = matrix(0, nrow=length(pixelsUse), ncol=totalFrames)
  colIndex = 1
  for (part in step0Output$partsDeltaf) {
    deltafoverfY = readRDS(paste0(step0Output$outputFolder,"Step0Data/Ydeltaf_part",part,".rds"))
    deltafoverfY = deltafoverfY[pixelsUse,]
    deltafoverfY[deltafoverfY<=zeroCut] = 0
    wholedeltafoverfY[,colIndex:(colIndex+ncol(deltafoverfY)-1)] = deltafoverfY
    colIndex = colIndex + ncol(deltafoverfY)
    rm(deltafoverfY)
  }
  wholedeltafoverfY = Matrix(wholedeltafoverfY, sparse = TRUE)
  utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_6_wholeY.txt"))

  utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_7_thresholdY.txt"))

  #remove preliminary dictionary elements that have zero fluorescence using thresholded Y
  #i.e., those preliminary dictionary elements that don't have any pixels above the threshold for any frame
  totalFluor = rowSums(wholedeltafoverfY)
  Fluor = crossprod(A[pixelsUse,], matrix(totalFluor, ncol=1))
  remove = which(Fluor==0)
  if (length(remove)>0) {
    A = A[,-remove,drop=FALSE]
    Aframes = Aframes[-remove]
    Athreshold = Athreshold[-remove]
  }
  saveRDS(A, paste0(step1folder,"Azero.rds"))
  saveRDS(Athreshold, paste0(step1folder,"AzeroThreshold.rds"))
  saveRDS(Aframes, paste0(step1folder,"AzeroFrames.rds"))
  utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_8_newcandidates.txt"))

  #update pixelsUse and corresponding matrices
  pixelsUseUpdated = which(rowSums(A[pixelsUse,])!=0)
  if (length(pixelsUseUpdated)!=length(pixelsUse)) {
    wholedeltafoverfY = wholedeltafoverfY[pixelsUseUpdated,]
  }
  saveRDS(wholedeltafoverfY, paste0(step1folder,"Ydeltaf_pixelsUse_thresholded.rds"))
  rm(wholedeltafoverfY)
  pixelsUse = pixelsUse[pixelsUseUpdated]
  saveRDS(pixelsUse, paste0(step1folder,"pixelsUse.rds"))

  #summary file of data
  fileConn = file(paste0(step0Output$outputFolder, "Step1_",version,"/Step1Summary.txt"))
  writeLines(c("Step 1 of SCALPEL","","SUMMARY",
               paste0("We performed image segmentation to produce a total of ", ncol(A), " preliminary dictionary elements, which came from ",
                      length(unique(Aframes)), " different frames of the video. There are ", length(pixelsUse),
                      " pixels contained in at least one preliminary dictionary element. The version (i.e., unique identifier) for this run of Step 1 is ", version, "."),
               "", "ARGUMENTS USED", paste0("Minimum size of preliminary dictionary element allowed (in pixels): ", minSize),
               paste0("Maximum size of preliminary dictionary element allowed (in pixels): ", maxSize),
               paste0("Maximum width of preliminary dictionary element allowed (in pixels): ", maxWidth),
               paste0("Maximum height of preliminary dictionary element allowed (in pixels): ", maxHeight),
               "Threshold values used for image segmentation: ", paste0(signif(thresholdVec, 3))), fileConn)
  close(fileConn)

  saveRDS(thresholdVec, paste0(step1folder,"thresholdVec.rds"))

  #return variables needed for Step 2
  output = getScalpelStep1(outputFolder=step0Output$outputFolder, version=version)

  utils::write.table(1,paste0(step0Output$outputFolder,"timings/Step1_9_end.txt"))
  message("Step 1 of SCALPEL finished!")
  return(output)
}

###############################################################################################
## SCALPEL STEP 2: REFINEMENT OF THE SPATIAL COMPONENT DICTIONARY
## PERFORM PROTOTYPE CLUSTERING WITH SPATIAL/TEMPORAL DISSIMILARITY
###############################################################################################

#' Perform Step 2 of SCALPEL.
#'
#' This step involves refinement of the spatial component dictionary from Step 1. We eliminate redundancy
#' in the spatial component dictionary by clustering together preliminary dictionary elements that represent
#' the same neuron, based on spatial and temporal information.
#'
#' @param step1Output An object of class \code{scalpel} or \code{scalpelStep1}, which result from running the
#' \code{\link{scalpel}} or \code{\link{scalpelStep1}} functions, respectively.
#' @param cutoff A value in [0,1] indicating where to cut the dendrogram that results from hierarchical clustering
#' of the preliminary dictionary elements. The default value is 0.18.
#' @param omega A value in [0,1] indicating how to weight spatial vs. temporal information in the dissimilarity metric
#' used for clustering. If \code{omega=1}, only spatial information is used. The default value is 0.2.
#' @param maxSizeToCluster Optional advanced user argument: The maximum number of preliminary dictionary elements to cluster at once. We attempt to cluster each
#' overlapping set of preliminary dictionary elements, but if one of these sets is very large (e.g., >10,000), memory issues may
#' result. Thus we perform a two-stage clustering in which we first cluster together random sets of size
#' approximately equaling \code{maxSizeToCluster} and then cluster together the representatives from the first stage.
#' Finally, we recalculate the representatives using all of the preliminary dictionary elements in the final clusters. The default value is 3000.
#' If \code{maxSizeToCluster} is set to \code{NULL}, single-stage clustering is done, regardless of the size of the overlapping sets.
#' Memory issues may result when using this option to force single-stage clustering if the size of
#' the largest overlapping set of preliminary dictionary elements is very large (e.g., >10,000).
#'
#' @return An object of class \code{scalpelStep2}, which can be summarized using \code{\link{summary}}, used to run SCALPEL Step 3 using \code{\link{scalpelStep3}},
#' or can be used with the plotting functions \code{\link{plotCluster}} and \code{\link{plotSpatial}}.
#' \itemize{
#' \item{\code{A}: }{A matrix containing the dictionary elements, where the ith column of \code{A}
#' is a vector of 1's and 0's, indicating whether each pixel is contained in the ith dictionary element.}
#' \item{\code{repComps}: }{A vector where the ith element indicates which preliminary dictionary element is the ith representive
#' component. That is, \code{A[,i]=step1Output$Azero[,repComps[i]]}.}
#' \item{\code{clusterID}: }{A vector whose ith element indicates which of the dictionary elements in \code{A} is the
#' representative for the ith preliminary dictionary element.}
#' \item{\code{overlapSetID}: }{A vector indicating which preliminary dictionary elements overlap, with the ith element
#' giving the group index for the ith preliminary dictionary element.}
#' \item{\code{treeList}: }{A list of length \code{max(overlapSetID)} with the ith element containing an object of class \code{protoclust} corresponding
#' to prototype clustering for the preliminary dictionary elements with \code{overlapSetID=i}. If two-stage clustering was done for a
#' particular set, then the element will be \code{NULL}.}
#' \item{Other elements: }{As specified by the user or returned from a previous step.}
#' }
#'
#' @details Several files containing data from this step and a summary of the step are saved in the folder
#' "outputFolder/Step1_version/Step2_omega_omega_cutoff_cutoff" where \code{version} is a 5-digit unique
#'  identifier that is automatically generated in Step 1 and \code{omega} and \code{cutoff} are the user-supplied parameters.
#'
#' @seealso The entire SCALPEL pipeline can be implemented using the \code{\link{scalpel}} function. The
#' other steps in the pipeline can be run using the \code{\link{scalpelStep0}}, \code{\link{scalpelStep1}}, \code{\link{scalpelStep3}} functions.
#' Results from this step can be summarized using \code{\link{summary}}, loaded at a later time using \code{\link{getScalpelStep2}}, and plotted using
#' \code{\link{plotCluster}} and \code{\link{plotSpatial}}.
#'
#' @examples
#' \dontrun{
#' ### many of the functions in this package are interconnected so the
#' ### easiest way to learn to use the package is by working through the vignette,
#' ### which is available at ajpete.com/software
#'
#' #assumes you have run the example for the "scalpelStep1" function
#'
#' #run Step 2 of SCALPEL
#' Step2Out = scalpelStep2(step1Output = Step1Out)
#' summary(Step2Out)
#' }
#' @import Matrix
#' @export
scalpelStep2 = function(step1Output, cutoff=0.18, omega=0.2, maxSizeToCluster=3000) {

  message("Beginning Step 2 of SCALPEL")
  utils::write.table(1,paste0(step1Output$outputFolder,"timings/Step2_0_start.txt"))

  #check function arguments
  if (class(step1Output)!="scalpel" & class(step1Output)!="scalpelStep1") stop("The class of 'step1Output' must be 'scalpel' or 'scalpelStep1'")
  if (cutoff<0 | cutoff>1) stop("Specify 'cutoff' in [0,1]")
  if (omega<0 | omega>1) stop("Specify 'omega' in [0,1]")
  if (cutoff>omega) message("The clustering results may be inexact when 'cutoff'>'omega', as we only consider clustering overlapping sets of preliminary dictionary elements. When 'cutoff'>'omega', non-overlapping preliminary dictionary elements may be clustered together.")

  step1folder = paste0(step1Output$outputFolder,"Step1_",step1Output$version,"/Step1Data/")
  step2folder = paste0(step1Output$outputFolder,"Step1_",step1Output$version,"/Step2_omega_",omega,"_cutoff_",cutoff,"/")
  if (dir.exists(step2folder)) {
    overwrite = UIinputYesNo(paste0("Results already exist for omega=",omega," and cutoff=",cutoff,".\n
                                    Should we overwrite them? Enter 'Y' or 'N': \n"))
    if (overwrite==FALSE) {
      stop("Results not overwritten. Please rerun for a different combo of 'omega' and 'cutoff'.")
    } else {
      #delete existing data directory
      unlink(step2folder, recursive = TRUE)
    }
  }
  dir.create(step2folder)
  dir.create(paste0(step2folder,"Step2Data/"))

  #force one-stage clustering if maxSizeToCluster is NULL
  fullCluster = ifelse(is.null(maxSizeToCluster), TRUE, FALSE)

  #load thresholded version of Y
  YThreshold = readRDS(paste0(step1folder,"Ydeltaf_pixelsUse_thresholded.rds"))

  message("Determining sets of overlapping preliminary dictionary elements")
  #identify sets of overlapping preliminary dictionary elements, so we can perform prototype clustering on each
  #first find sets of connected preliminary dictionary elements for each chunk of 5000 components
  numChunks = ceiling(ncol(step1Output$Azero)/5000)
  set = c()
  for (i in 1:numChunks) {
    if (i==numChunks) cols = ((i-1)*5000+1):ncol(step1Output$Azero) else cols = ((i-1)*5000+1):(i*5000)
    newSet = findSets(step1Output$Azero[,cols])
    if (i==1) set = newSet else set = c(set, max(set) + newSet)
  }
  utils::write.table(1,paste0(step1Output$outputFolder,"timings/Step2_1_firstoverlap.txt"))

  #create new Azero matrix which adds together the components in the same cluster
  #then do clustering again on this matrix
  AzeroShrunk = matrix(NA, nrow=nrow(step1Output$Azero), ncol=max(set))
  for (col in 1:max(set)) {
    cols = which(set==col)
    AzeroShrunk[,col] = rowSums(step1Output$Azero[,cols,drop=FALSE])
  }
  AzeroShrunk = Matrix(AzeroShrunk, sparse=TRUE)
  setShrunk = findSets(AzeroShrunk)
  rm('AzeroShrunk')

  #final sets of overlapping preliminary dictionary elements
  finalSet = setShrunk[set]
  saveRDS(finalSet, paste0(step2folder, "Step2Data/overlapSetID.rds"))

  #matrix to store dictionary elements and vector for cluster IDs
  A = Matrix(0, nrow=nrow(step1Output$Azero), ncol=2000, sparse=TRUE)
  repComps = rep(NA, 2000)
  clusterID = rep(NA, ncol(step1Output$Azero))
  AIndex = 1
  treeList = vector("list", length=max(finalSet))

  utils::write.table(1,paste0(step1Output$outputFolder,"timings/Step2_2_ready.txt"))

  #do prototype clustering for each group of overlapping components
  for (setInd in 1:max(finalSet)) {
    message("Clustering overlapping set: ", setInd, " of ", max(finalSet))

    cols = which(finalSet==setInd)
    if (length(cols)>1) {
      #do prototype clustering but choose dictionary element that has
      #the smallest median dissimilarity from others
      clusterOut = clusterComponents(Azero=step1Output$Azero[which(rowSums(step1Output$Azero[,cols,drop=FALSE])!=0),cols],
                                     Y=YThreshold[which(rowSums(step1Output$Azero[step1Output$pixelsUse,cols,drop=FALSE])!=0),],
                                     videoHeight=step1Output$videoHeight, cutoff=cutoff, weight=omega,
                                     fullCluster=fullCluster, maxSizeToCluster=maxSizeToCluster)

      #save info
      clusterID[cols] = clusterOut$clusterID + max(c(clusterID, 0), na.rm=TRUE)
      A[,AIndex:(AIndex+length(clusterOut$repComps)-1)] = step1Output$Azero[,cols[clusterOut$repComps]]
      repComps[AIndex:(AIndex+length(clusterOut$repComps)-1)] = cols[clusterOut$repComps]
      AIndex = AIndex+length(clusterOut$repComps)
      if (!is.null(clusterOut$tree)) treeList[[setInd]] = clusterOut$tree
    } else {
      clusterID[cols] = 1 + max(c(clusterID, 0), na.rm=TRUE)

      A[,AIndex] = step1Output$Azero[,cols]
      repComps[AIndex] = cols
      AIndex = AIndex+1
    }
  }
  utils::write.table(1,paste0(step1Output$outputFolder,"timings/Step2_3_clustered.txt"))
  rm(YThreshold)

  repComps = repComps[1:max(which(colSums(A)>0))]
  A = A[,1:max(which(colSums(A)>0))]
  saveRDS(A, paste0(step2folder, "Step2Data/A.rds"))
  saveRDS(repComps, paste0(step2folder, "Step2Data/repComps.rds"))
  saveRDS(clusterID, paste0(step2folder, "Step2Data/clusterID.rds"))

  #summary file of data
  fileConn = file(paste0(step2folder, "Step2Summary.txt"))
  writeLines(c("Step 2 of SCALPEL","","SUMMARY",
               paste0("We performed clustering on the ", ncol(step1Output$Azero), " preliminary dictionary elements, which resulted in ",
                      ncol(A), " dictionary elements."),
               "", "ARGUMENTS USED",
               paste0("Cutoff for dendrogram: ", cutoff), paste0("Dissimilarity metric weight: ", omega),
               paste0("Maximum number of preliminary dictionary elements to cluster at once: ", ifelse(is.null(maxSizeToCluster), "N/A", maxSizeToCluster)),
               "", "ADVANCED USER INFO",
               paste0("There were ", max(finalSet), " overlapping sets of components with the largest set containing ",
                      max(table(finalSet)), " preliminary dictionary elements. The clustering shortcut for large sets of overlapping components ",
                      ifelse(fullCluster==TRUE, "was not", "was"), " used.",
                      ifelse(fullCluster==FALSE, paste0(" There were ", length(which(floor(table(finalSet)/maxSizeToCluster)>1)),
                                                        " overlapping sets for which the shortcut was used."),""))), fileConn)
  close(fileConn)
  rm(A)
  saveRDS(treeList, paste0(step2folder, "Step2Data/treeList.rds"))

  output = getScalpelStep2(outputFolder=step1Output$outputFolder, version=step1Output$version, cutoff=cutoff, omega=omega)
  utils::write.table(1,paste0(step1Output$outputFolder,"timings/Step2_4_end.txt"))

  message("Step 2 of SCALPEL finished!")
  return(output)
}

###############################################################################################
## SCALPEL STEP 3: SPATIAL COMPONENT SELECTION AND TEMPORAL COMPONENT ESTIMATION
## FIT SPARSE GROUP LASSO WITH NON-NEGATIVITY CONSTRAINT
###############################################################################################

#' Perform Step 3 of SCALPEL.
#'
#' This step involves spatial component selection and temporal component estimation. We estimate the temporal
#' components corresponding to the dictionary elements from Step 2 by solving a sparse group lasso problem
#' with a non-negativity constraint.
#'
#' @param step2Output An object of class \code{scalpel} or \code{scalpelStep2}, which result from running the
#' \code{\link{scalpel}} or \code{\link{scalpelStep2}} functions, respectively.
#' @param lambdaMethod A description of how lambda should be chosen: either \code{"trainval"} (default),
#' \code{"distn"}, or \code{"user"}. A value of \code{"trainval"} means lambda will be chosen using a training/validation
#' set approach. A value of \code{"distn"} means lambda will be chosen as the negative of the 0.1\% quantile
#' of elements of active pixels (i.e., those contained in at least one dictionary element) of Y.
#' Using \code{"distn"} is computationally faster than \code{"trainval"}. Alternatively with \code{"user"},
#' the value of lambda can be directly specified using \code{lambda}.
#' @param lambda The value of lambda to use when fitting the sparse group lasso. By default, the value is automatically
#' chosen using the approach specified by \code{lambdaMethod}. If a value is provided for \code{lambda}, then \code{lambdaMethod}
#' will be ignored.
#' @param minClusterSize The minimum number of preliminary dictionary elements that a cluster must contain in order to be included
#' in the sparse group lasso. The default value is 1 (i.e., all possible dictionary elements are included).
#' @param alpha The value of alpha to use when fitting the sparse group lasso. The default value is 0.9.
#' @param removeBorder A logical scalar indicating whether the dictionary elements containing pixels in the 10-pixel
#' border of the video should be removed prior to fitting the sparse group lasso. The default value is \code{FALSE}.
#' @param excludeReps A vector giving the indices of which dictionary elements to exclude, where the indices refer
#' to the columns of \code{step2Output$A}. The default value is \code{NULL} and no dictionary elements are excluded.
#' Users may also specify \code{"discarded"}, which will exclude all dictionary elements discarded
#' using a previous call to \code{\link{reviewNeurons}} or \code{\link{reviewNeuronsInteractive}}.
#'
#' @return An object of class \code{scalpelStep3}, which can be summarized using \code{\link{summary}} and used with the plotting functions
#' \code{\link{plotResults}}, \code{\link{plotResultsAllLambda}}, \code{\link{plotSpatial}}, \code{\link{plotTemporal}}, and \code{\link{plotBrightest}}.
#' \itemize{
#' \item{\code{Afilter}: }{A matrix containing the filtered dictionary elements, where the ith column of \code{Afilter}
#' is a vector of 1's and 0's, indicating whether each pixel is contained in the ith filtered dictionary element.
#' Note that \code{Afilter} is equivalent to \code{A} after removing the components excluded due to being on the border
#' (if \code{removeBorder=TRUE}) or having fewer preliminary dictionary elements in their cluster than \code{minClusterSize}.}
#' \item{\code{Zhat}: }{A matrix containing the estimated temporal components, where the ith row of \code{Zhat}
#' is the estimated calcium trace corresponding to the ith spatial component (i.e., the ith column of \code{Afilter}).}
#' \item{\code{lambda}: }{The value of lambda used in fitting the sparse group lasso.}
#' \item{\code{ZhatList}: }{A list of matrices containing the estimated temporal components for alternative values of \code{lambda}
#' specified in \code{lambdaSeq}. These can be plotted using \code{\link{plotResultsAllLambda}}}.
#' \item{\code{lambdaSeq}: }{A vector with length equaling the length of \code{ZhatList}, where the ith element indicates the value of lambda
#' corresponding to the temporal components in \code{ZhatList[[i]]}.}
#' \item{\code{clustersUse}: }{A vector with length equaling the number of columns of \code{Afilter}, where the ith element indicates which column of
#' \code{step2Output$A} the ith column of \code{Afilter} equals.}
#' \item{Other elements: }{As specified by the user or returned from a previous step.}
#' }
#'
#' @details To solve the sparse group lasso problem in this step, we minimize the following over \code{Z} with all non-negative elements:
#'
#' \code{0.5*sum((Y - AfilterTilde \%*\% Z)^2) + lambda*alpha*sum(Z)} \cr
#' \code{ + lambda*(1-alpha)*sum(sqrt(rowSums(Z^2)))}
#'
#' where \code{AfilterTilde} is a scaled version of \code{Afilter}.
#'
#' Several files containing data from this step and a summary of the step are saved in the folder
#' "outputFolder/Step1_version/Step2_omega_omega_cutoff_cutoff/Step3_lambdaMethod_lambdaMethod_
#' minClusterSize_minClusterSize_alpha_alpha_removeBorder_removeBorder" where \code{version} is a 5-digit unique
#'  identifier that is automatically generated in Step 1, \code{omega} and \code{cutoff} are the user-supplied parameters
#'  from Step 2, and \code{lambdaMethod}, \code{minClusterSize}, \code{alpha}, and \code{removeBorder} are
#'  the user-supplied parameters from this step. If dictionary elements were manually excluded using \code{excludeReps},
#'  this is appended to the folder name.
#'
#' @seealso The entire SCALPEL pipeline can be implemented using the \code{\link{scalpel}} function. The
#' other steps in the pipeline can be run using the \code{\link{scalpelStep0}}, \code{\link{scalpelStep1}}, \code{\link{scalpelStep2}} functions.
#' Results from this step can be summarized using \code{\link{summary}}, loaded at a later time using \code{\link{getScalpelStep3}}, and plotted using
#' \code{\link{plotSpatial}}, \code{\link{plotTemporal}}, \code{\link{plotResults}}, and \code{\link{plotBrightest}}.
#'
#' @examples
#' \dontrun{
#' ### many of the functions in this package are interconnected so the
#' ### easiest way to learn to use the package is by working through the vignette,
#' ### which is available at ajpete.com/software
#'
#' #assumes you have run the example for the "scalpelStep2" function
#'
#' #run Step 3 of SCALPEL
#' Step3Out = scalpelStep3(step2Output = Step2Out)
#' summary(Step3Out)
#' }
#' @import Matrix
#' @export
scalpelStep3 = function(step2Output, lambdaMethod="trainval", lambda=NULL, minClusterSize=1, alpha=0.9, removeBorder=FALSE, excludeReps=NULL) {

  message("Beginning Step 3 of SCALPEL")
  utils::write.table(1,paste0(step2Output$outputFolder,"timings/Step3_0_start.txt"))

  if (!is.null(lambda)) lambdaMethod = "user"
  #check function arguments
  if (lambdaMethod=="user" & is.null(lambda)) stop("Must specify 'lambda' if 'lambdaMethod' is 'user'")
  if (class(step2Output)!="scalpel" & class(step2Output)!="scalpelStep2") stop("The class of 'step2Output' must be 'scalpel' or 'scalpelStep2'")
  if (alpha<0 | alpha>1) stop("The value of 'alpha' must be in the interval [0,1]")
  if (!is.logical(removeBorder)) stop("Specify TRUE or FALSE for 'removeBorder'")
  if (nrow(step2Output$A)%%step2Output$videoHeight!=0) stop(paste0("The number of rows of 'step2Output$A' should be divisble by 'step2Output$videoHeight'"))
  if (lambdaMethod!="distn" & lambdaMethod!="trainval" & lambdaMethod!="user") stop("Specify 'distn', 'trainval', or 'user' for 'lambdaMethod', or specify 'lambda'")
  if (max(step2Output$clusterID)!=ncol(step2Output$A)) stop("The number of columns of 'step2Output$A' should equal the maximum value of 'step2Output$clusterID'")
  if (!is.null(excludeReps)) {
    #excludeReps is "discarded"
    if (length(excludeReps)==1) if (excludeReps=="discarded") {
      keepFilename = paste0(step2Output$outputFolder,"Step1_",step2Output$version,"/Step2_omega_",step2Output$omega,"_cutoff_",step2Output$cutoff,"/Step2Data/keep.rds")
      if (!file.exists(keepFilename)) stop(paste0("When 'excludeReps' is set to 'discarded', the file ", keepFilename, " must exist"))
      keep = readRDS(keepFilename)
      excludeReps = which(keep=="no")
    }
    if (max(excludeReps)>ncol(step2Output$A)) stop(paste0("The elements of 'excludeReps' must all be integers from 1 to ",ncol(step2Output$A)))
  }
  step1folder = paste0(step2Output$outputFolder,"Step1_",step2Output$version,"/Step1Data/")

  totalPixels = nrow(step2Output$A)

  #remove SCALPEL neurons with pixels in a 10-pixel border around the edge
  excludeManually = rep(0, ncol(step2Output$A))
  excludeManually[excludeReps] = 1
  if (removeBorder==TRUE) {
    videoWidth = totalPixels/step2Output$videoHeight
    indicesRemove = unique(c(1:(10*step2Output$videoHeight), ((videoWidth-10)*step2Output$videoHeight+1):totalPixels, which(1:totalPixels %% step2Output$videoHeight<=10), which(1:totalPixels %% step2Output$videoHeight>=(step2Output$videoHeight-9))))
    onBoundary = (colSums(step2Output$A[indicesRemove,])!=0)
    clustersUse = which(table(step2Output$clusterID)>=minClusterSize & !onBoundary & !excludeManually)
  } else {
    #only use dictionary elements based on minClusterSize or more preliminary dictionary elements
    clustersUse = which(table(step2Output$clusterID)>=minClusterSize & !excludeManually)
  }
  numExcluded = ncol(step2Output$A) - length(clustersUse)
  if (length(clustersUse)==0) stop("All of the dictionary elements have been excluded, so we cannot fit the sparse group lasso model.")

  #load in delta f over f Y
  nFramesDeltaf = readRDS(paste0(step2Output$outputFolder,"Step0Data/nFramesDeltaf.rds"))
  partsDeltaf = readRDS(paste0(step2Output$outputFolder,"Step0Data/partsDeltaf.rds"))
  Ypixelsuse = matrix(0, nrow=length(step2Output$pixelsUse), ncol=sum(nFramesDeltaf))
  colIndex = 1
  for (part in partsDeltaf) {
    deltafoverfY = readRDS(paste0(step2Output$outputFolder,"Step0Data/Ydeltaf_part",part,".rds"))
    Ypixelsuse[,colIndex:(colIndex+ncol(deltafoverfY)-1)] = deltafoverfY[step2Output$pixelsUse,]
    colIndex = colIndex + ncol(deltafoverfY)
    rm(deltafoverfY)
  }
  Ysgl = Ypixelsuse[which(rowSums(step2Output$A[step2Output$pixelsUse,clustersUse,drop=FALSE])!=0),]
  rm(Ypixelsuse)

  #A to use in SGL problem
  finalPixelsUse = which(rowSums(step2Output$A[,clustersUse,drop=FALSE])!=0)
  Asgl = step2Output$A[finalPixelsUse,clustersUse,drop=FALSE]

  #find sets of overlapping neurons
  setVec = findSets(Asgl)
  utils::write.table(1,paste0(step2Output$outputFolder,"timings/Step3_1_prelim.txt"))

  if (lambdaMethod=="distn") {

    message("Calculating value of lambda to use")
    #lambda chosen as negative of 0.1% quantile of elements of active pixels of deltafoverfY
    #choose a single lambda
    #if Ysgl is large, just use 1000 rows of it
    set.seed(100)
    if (nrow(Ysgl)>1000) rows = sample(1:nrow(Ysgl),1000) else rows = 1:nrow(Ysgl)
    lambdaByDistn = -stats::quantile(as.matrix(Ysgl)[rows,], p=0.001)
    #the above value is lambda*alpha so divide by alpha
    lambdaToFit = lambdaByDistn/alpha

  } else if (lambdaMethod=="trainval") {

    message("Constructing training and validation sets")
    #construct training and validation sets
    seed = 300
    pixelsTrain = c()
    for (i in 1:max(setVec)) {
      pixelsTrain = c(pixelsTrain, samplePixels(A=Asgl, columns=which(setVec==i), seed=seed))
    }
    pixelsVal = (1:nrow(Asgl))[-pixelsTrain]

    #scale A before starting
    ATildesgl = t(t(Asgl) / colSums(Asgl))

    message("Fitting SGL on training set")
    sglOutTrain = sgl(Y=Ysgl[pixelsTrain,], A=Matrix(ATildesgl[pixelsTrain,], sparse=TRUE),
                      videoHeight=step2Output$videoHeight, totalPixels=NA, pixelsUse=NA, lambdaMinRatio = 0.001,
                      alpha = alpha, tolerance = 10e-8, l2Scale = "no")

    message("Calculating validation error")
    #calculate validation error
    valError = rep(NA, length(sglOutTrain$lambdaSeq))
    #calculate validation error using thresholded Y
    YsglThreshold = Ysgl
    YsglThreshold[YsglThreshold<(-min(YsglThreshold))] = 0
    YsglThreshold = Matrix(YsglThreshold, sparse = TRUE)

    for (i in seq(valError)) {
      valError[i] = mean((YsglThreshold[pixelsVal,] - ATildesgl[pixelsVal,] %*% sglOutTrain$ZList[[i]])^2)
    }

    #calculate maximum lambda whose validation error is within 5% of the minimum validation error
    lambdaSmallChange = sglOutTrain$lambdaSeq[min(which((valError-min(valError))/min(valError)<=0.05))]

    #refit on entire A using lambda = (selected lambda using training set)/(pct of pixels in training set)
    #this adjusts for the fact that we don't scale the sum of sq. errors by the number of pixels
    #feed in non-scaled A and use usual sq. l2 scaling
    lambdaToFit = lambdaSmallChange/(length(pixelsTrain)/(length(pixelsTrain)+length(pixelsVal)))
  } else {
    lambdaToFit = lambda
  }

  utils::write.table(1,paste0(step2Output$outputFolder,"timings/Step3_2_lambdaFound.txt"))

  message("Fitting SGL")
  lambdaSeq = c(exp(seq(log(lambdaToFit*10), log(lambdaToFit), len=5)), exp(seq(log(lambdaToFit), log(lambdaToFit*0.1), len=6))[2:6])
  sglOutOverall = sgl(Y=Ysgl, A=Matrix(Asgl, sparse=TRUE), lambdaSeq = lambdaSeq,
                      videoHeight=step2Output$videoHeight, totalPixels=totalPixels, pixelsUse=finalPixelsUse,
                      alpha = alpha, tolerance = 10e-8)

  utils::write.table(1,paste0(step2Output$outputFolder,"timings/Step3_3_sglfit.txt"))

  message("Saving results")
  #save A hat and Z hat
  estZ = sglOutOverall$ZList[[5]]
  fullAsgl = Matrix(0, nrow=totalPixels, ncol=ncol(Asgl), sparse=TRUE)
  fullAsgl[finalPixelsUse,] = Asgl

  if (!is.null(excludeReps)) fileAppend = paste0("_excluded_",paste0(sort(excludeReps), collapse = "_")) else fileAppend = ""
  step3folder = paste0(step2Output$outputFolder,"Step1_",step2Output$version,"/Step2_omega_",step2Output$omega,"_cutoff_",step2Output$cutoff,
                       "/Step3_lambdaMethod_",lambdaMethod,"_lambda_",round(lambdaToFit,4),"_minClusterSize_",minClusterSize,
                       "_alpha_",alpha,"_removeBorder_",(removeBorder),fileAppend,"/")
  if (dir.exists(step3folder)) {
    overwrite = UIinputYesNo(paste0("Results already exist for these Step 3 parameters.\n
                                    Should we overwrite them? Enter 'Y' or 'N': \n"))
    if (overwrite==FALSE) {
      stop("Results not overwritten. Please rerun for a different combo of parameters.")
    } else {
      #delete existing data directory
      unlink(step3folder, recursive = TRUE)
    }
  }
  dir.create(step3folder)
  dir.create(paste0(step3folder,"Step3Data/"))

  saveRDS(fullAsgl, file=paste0(step3folder,"Step3Data/Afilter.rds"))
  saveRDS(estZ, file=paste0(step3folder,"Step3Data/Zhat.rds"))
  saveRDS(lambdaToFit, file=paste0(step3folder,"Step3Data/lambda.rds"))
  saveRDS(lambdaSeq, file=paste0(step3folder,"Step3Data/lambdaSeq.rds"))
  saveRDS(sglOutOverall$ZList, file=paste0(step3folder,"Step3Data/ZhatList.rds"))
  saveRDS(clustersUse, file=paste0(step3folder,"Step3Data/clustersUse.rds"))
  saveRDS(excludeReps, file=paste0(step3folder,"Step3Data/excludeReps.rds"))

  #summary file of data
  fileConn = file(paste0(step3folder, "Step3Summary.txt"))
  writeLines(c("Step 3 of SCALPEL","","SUMMARY",
               paste0("We began with ", ncol(step2Output$A), " dictionary elements, and ",
                      ifelse(ncol(step2Output$A)==ncol(Asgl), "all", ncol(Asgl)), " of these were included in the sparse group lasso model. The sparse group lasso was fit with lambda=",
                      signif(lambdaToFit, 4), " and alpha=", signif(alpha, 4), ", and there were ", nrow(estZ[rowSums(estZ)!=0,,drop=FALSE]),
                      " neurons with non-zero temporal components (i.e., the neuron was estimated to be active in at least one frame)."),
               "", "ARGUMENTS USED", paste0("Method to choose lambda: ", ifelse(lambdaMethod=="trainval", "Training/validation set", ifelse(lambdaMethod=="distn", "Using distribution of Y", "User-supplied"))),
               paste0("User-supplied lambda value: ", ifelse(is.null(lambda), "N/A", lambda)), paste0("Value of alpha: ", alpha),
               paste0("Minimum cluster size for dictionary element to be included in the sparse group lasso: ", minClusterSize),
               paste0("Should spatial components on the border of the video be included in the sparse group lasso? ", ifelse(removeBorder==TRUE, "No", "Yes")),
               paste0("Indices of dictionary elements to exclude from the sparse group lasso: ", ifelse(is.null(excludeReps), "N/A", paste0(excludeReps, collapse=", ")))), fileConn)
  close(fileConn)

  output = getScalpelStep3(outputFolder=step2Output$outputFolder, version=step2Output$version, cutoff=step2Output$cutoff,
                           omega=step2Output$omega, lambdaMethod=lambdaMethod, minClusterSize=minClusterSize, alpha=alpha,
                           lambda=lambdaToFit, removeBorder=removeBorder, excludeReps=excludeReps)

  utils::write.table(1,paste0(step2Output$outputFolder,"timings/Step3_4_end.txt"))
  message("Step 3 of SCALPEL finished!")
  return(output)
}

###############################################################################################
## SCALPEL FUNCTION
## PERFORMS ALL STEPS WITH A SINGLE FUNCTION CALL
###############################################################################################

#' Perform entire SCALPEL pipeline.
#'
#' Segmentation, Clustering, and Lasso Penalties (SCALPEL) is a method for neuronal calcium imaging
#' data that identifies the locations of neurons, and estimates their calcium concentrations over time.
#' The pipeline involves several steps, each of which is described briefly in its corresponding
#' function. See \code{\link{scalpelStep0}}, \code{\link{scalpelStep1}}, \code{\link{scalpelStep2}},
#' \code{\link{scalpelStep3}} for more details.
#' Full details for the SCALPEL method are provided in Petersen, A., Simon, N., and Witten, D. (Forthcoming).
#' SCALPEL: Extracting Neurons from Calcium Imaging Data.
#'
#' @param outputFolder Step 0 parameter: The existing directory where the results should be saved.
#' @param rawDataFolder Step 0 parameter: The directory where the raw data version of Y is saved. The data should be a
#' PxT matrix, where P is the total number of pixels per image frame and T
#' the number of frames of the video, for which the (i,j)th element contains the
#' fluorescence of the ith pixel in the jth frame. To create Y, you should
#' vectorize each 2-dimensional image frame by concatenating the columns of the image frame.  If the data is
#' saved in a single file, it should be named "Y_1.mat", "Y_1.rds", "Y_1.txt", or "Y_1.txt.gz" (depending on \code{fileType}),
#' and if the data is split over multiple files, they should be split into chunks of the columns
#' and named consecutively ("Y_1.mat", "Y_2.mat", etc.; "Y_1.rds", "Y_2.rds", etc.; "Y_1.txt", "Y_2.txt", etc.; or "Y_1.txt.gz", "Y_2.txt.gz", etc.).
#' @param videoHeight Step 0 parameter: The height of the video (in pixels).
#' @param minClusterSize Step 3 parameter: The minimum number of preliminary dictionary elements that a cluster must contain in order to be included
#' in the sparse group lasso.
#' @param lambdaMethod Step 3 parameter: A description of how lambda should be chosen: either \code{"trainval"} (default),
#' \code{"distn"}, or \code{"user"}. A value of \code{"trainval"} means lambda will be chosen using a training/validation
#' set approach. A value of \code{"distn"} means lambda will be chosen as the negative of the 0.1\% quantile
#' of elements of active pixels (i.e., those contained in at least one dictionary element) of Y.
#' Using \code{"distn"} is computationally faster than \code{"trainval"}. Alternatively with \code{"user"},
#' the value of lambda can be directly specified using \code{lambda}.
#' @param lambda Step 3 parameter: The value of lambda to use when fitting the sparse group lasso. By default, the value is automatically
#' chosen using the approach specified by \code{lambdaMethod}. If a value is provided for \code{lambda}, then \code{lambdaMethod}
#' will be ignored.
#' @param cutoff Step 2 parameter: A value in [0,1] indicating where to cut the dendrogram that results from hierarchical clustering
#' of the preliminary dictionary elements. The default value is 0.18.
#' @param omega Step 2 parameter: A value in [0,1] indicating how to weight spatial vs. temporal information in the dissimilarity metric
#' used for clustering. If \code{omega=1}, only spatial information is used. The default value is 0.2.
#' @param fileType Step 0 parameter: Indicates whether raw data is an .rds (default value; \code{fileType="R"}), .mat (\code{fileType="matlab"}),
#' .txt (\code{fileType="text"}), or .txt.gz (\code{fileType="zippedText"}) file. Any text files should not have row or column names.
#' @param processSeparately Step 0 parameter: Logical scalar giving whether the multiple raw data files should be
#' processed individually, versus all at once. Processing the files separately may be preferable for larger videos.
#' Default value is \code{TRUE}; this argument is ignored if the raw data is saved in a single file.
#' @param minSize,maxSize Step 1 parameter: The minimum and maximum size, respectively, for a preliminary dictionary element with default values
#' of 25 and 500, respectively.
#' @param maxWidth,maxHeight Step 1 parameter: The maximum width and height, respectively, for a preliminary dictionary element with default values of 30.
#' @param removeBorder Step 3 parameter: A logical scalar indicating whether the dictionary elements containing pixels in the 10-pixel
#' border of the video should be removed prior to fitting the sparse group lasso. The default value is \code{FALSE}.
#' @param alpha Step 3 parameter: The value of alpha to use when fitting the sparse group lasso. The default value is 0.9.
#' @param thresholdVec Optional advanced user argument: Step 1 parameter: A vector with the desired thresholds to use for image segmentation. If not specified, the default is to
#' use the negative of the minimum of the processed Y data, the negative of the 0.1\% quantile of the processed Y data, and the mean of these. If there were multiple raw data
#' files that were processed separately, these values are calculated on only the first part of data, and then these thresholds are used for the remaining parts.
#' @param maxSizeToCluster Optional advanced user argument: Step 2 parameter: The maximum number of preliminary dictionary elements to cluster at once. We attempt to cluster each
#' overlapping set of preliminary dictionary elements, but if one of these sets is very large (e.g., >10,000), memory issues may
#' result. Thus we perform a two-stage clustering in which we first cluster together random sets of size
#' approximately equaling \code{maxSizeToCluster} and then cluster together the representatives from the first stage.
#' Finally, we recalculate the representatives using all of the preliminary dictionary elements in the final clusters. The default value is 3000.
#' If \code{maxSizeToCluster} is set to \code{NULL}, single-stage clustering is done, regardless of the size of the overlapping sets.
#' Memory issues may result when using this option to force single-stage clustering if the size of
#' the largest overlapping set of preliminary dictionary elements is very large (e.g., >10,000).
#'
#' @return An object of class \code{scalpel}, which can be summarized using \code{\link{summary}}, used to rerun SCALPEL Steps 1-3 with new parameters using \code{\link{scalpelStep1}}, \code{\link{scalpelStep2}}, and \code{\link{scalpelStep3}},
#' or can be used with any of the plotting functions: \code{\link{plotFrame}}, \code{\link{plotThresholdedFrame}}, \code{\link{plotVideoVariance}}, \code{\link{plotCandidateFrame}},
#' \code{\link{plotCluster}}, \code{\link{plotResults}}, \code{\link{plotResultsAllLambda}}, \code{\link{plotSpatial}},
#' \code{\link{plotTemporal}}, and \code{\link{plotBrightest}}.
#' The individual elements are described in detail in the documentation for the corresponding step: \code{\link{scalpelStep0}}, \code{\link{scalpelStep1}}, \code{\link{scalpelStep2}}, and \code{\link{scalpelStep3}}.
#'
#' @details Several files containing data from the pipeline, as well as summaries of each step,
#' are saved in various subdirectories of "outputFolder".
#'
#' @seealso The individual steps in the pipeline can be run using the \code{\link{scalpelStep0}},
#' \code{\link{scalpelStep1}}, \code{\link{scalpelStep2}}, and \code{\link{scalpelStep3}} functions.
#' Results can be summarized using \code{\link{summary}}, loaded at a later time using \code{\link{getScalpel}}, and plotted using \code{\link{plotResults}},
#' \code{\link{plotSpatial}}, \code{\link{plotTemporal}}, \code{\link{plotCluster}}, \code{\link{plotVideoVariance}},
#' \code{\link{plotFrame}}, \code{\link{plotThresholdedFrame}}, \code{\link{plotCandidateFrame}}, and \code{\link{plotBrightest}}.
#'
#' @examples
#' \dontrun{
#' ### many of the functions in this package are interconnected so the
#' ### easiest way to learn to use the package is by working through the vignette,
#' ### which is available at ajpete.com/software
#'
#' #existing folder to save results (update this to an existing folder on your computer)
#' outputFolder = "scalpelResults"
#' #location on computer of raw data in R package to use
#' rawDataFolder = gsub("Y_1.rds", "", system.file("extdata", "Y_1.rds", package = "scalpel"))
#' #video height of raw data in R package
#' videoHeight = 30
#' #run SCALPEL pipeline
#' scalpelOutput = scalpel(outputFolder = outputFolder, rawDataFolder = rawDataFolder,
#'                        videoHeight = videoHeight)
#' #summarize each step
#' summary(scalpelOutput, step = 0)
#' summary(scalpelOutput, step = 1)
#' summary(scalpelOutput, step = 2)
#' summary(scalpelOutput, step = 3)
#' }
#' @import Matrix
#' @export
scalpel = function(outputFolder, rawDataFolder, videoHeight, minClusterSize=1, lambdaMethod="trainval", lambda=NULL, cutoff=0.18,
                   omega=0.2, fileType="R", processSeparately=TRUE, minSize=25, maxSize=500, maxWidth=30,
                   maxHeight=30, removeBorder=FALSE, alpha=0.9, thresholdVec=NULL, maxSizeToCluster=3000) {

  if (substr(outputFolder, nchar(outputFolder), nchar(outputFolder))!="/") outputFolder = paste0(outputFolder, "/")
  if (!dir.exists(outputFolder)) stop("Specify an existing folder for 'outputFolder'")
  if (!dir.exists(paste0(outputFolder,"timings/"))) dir.create(paste0(outputFolder,"timings/"))
  utils::write.table(1,paste0(outputFolder,"timings/SCALPEL_Step0_start.txt"))
  step0Output = scalpelStep0(outputFolder=outputFolder, videoHeight=videoHeight,
                             rawDataFolder=rawDataFolder, processSeparately=processSeparately, fileType=fileType)
  utils::write.table(1,paste0(outputFolder,"timings/SCALPEL_Step0_end.txt"))

  utils::write.table(1,paste0(outputFolder,"timings/SCALPEL_Step1_start.txt"))
  step1Output = scalpelStep1(step0Output=step0Output, minSize=minSize, maxSize=maxSize, maxWidth=maxWidth,
                             maxHeight=maxHeight, thresholdVec=thresholdVec)
  utils::write.table(1,paste0(outputFolder,"timings/SCALPEL_Step1_end.txt"))

  utils::write.table(1,paste0(outputFolder,"timings/SCALPEL_Step2_start.txt"))
  step2Output = scalpelStep2(step1Output=step1Output, cutoff=cutoff, omega=omega, maxSizeToCluster=maxSizeToCluster)
  utils::write.table(1,paste0(outputFolder,"timings/SCALPEL_Step2_end.txt"))

  utils::write.table(1,paste0(outputFolder,"timings/SCALPEL_Step3_start.txt"))
  step3Output = scalpelStep3(step2Output=step2Output, lambdaMethod=lambdaMethod, minClusterSize=minClusterSize,
                             alpha=alpha, removeBorder=removeBorder, lambda=lambda)
  utils::write.table(1,paste0(outputFolder,"timings/SCALPEL_Step3_end.txt"))

  output = getScalpel(outputFolder=outputFolder, version=step1Output$version, cutoff=cutoff, omega=omega,
                      lambdaMethod=step3Output$lambdaMethod, lambda=step3Output$lambda, minClusterSize=minClusterSize,
                      alpha=alpha, removeBorder=removeBorder, excludeReps=step3Output$excludeReps)
  return(output)
}