R/cbaf-automatedStatistics.R

Defines functions automatedStatistics

Documented in automatedStatistics

#' @title Perform the requested statistics for various studies / subgroups of a
#' study.
#'
#' @description This function calculates frequency percentage, frequency ratio,
#' mean value and median value of samples greater than specific cutoff in the
#' selected study / subgroups of the study. Furthermore, it can looks for the
#' five genes that contain the highest values in each study / study subgroup.
#' It uses the data generated by obtainOneStudy()/obtainMultipleStudies()
#' function.
#'
#' @details
#' \tabular{lllll}{
#' Package: \tab cbaf \cr
#' Type: \tab Package \cr
#' Version: \tab 1.20.0 \cr
#' Date: \tab 2022-10-24 \cr
#' License: \tab Artistic-2.0 \cr
#' }
#'
#'
#'
#' @importFrom stats median na.omit
#'
#' @importFrom BiocFileCache bfcnew bfcquery bfcpath
#'
#' @importFrom utils head setTxtProgressBar txtProgressBar
#'
#'
#'
#' @include cbaf-obtainOneStudy.R cbaf-obtainMultipleStudies.R
#'
#'
#'
#'
#' @usage automatedStatistics(submissionName, obtainedDataType =
#'   "multiple studies", calculate = c("frequencyPercentage", "frequencyRatio",
#'   "meanValue"), topGenes = TRUE, cutoff=NULL, round=TRUE)
#'
#'
#'
#' @param submissionName a character string containing name of interest. It is
#' used for naming the process.
#'
#' @param obtainedDataType a character string that specifies the type of input
#' data produced by the previous function. Two options are availabe:
#' \code{"single study"} for obtainOneStudy() and \code{"multiple studies"} for
#' obtainMultipleStudies(). The function uses obtainedDataType and
#' submissionName to construct the name of the BiocFileCach object and then
#' finds the appropriate data inside it. Default value is multiple studies`.
#'
#' @param calculate a character vector that containes the statistical procedures
#' users prefer the function to compute. The complete results can be obtained
#' by \code{c("frequencyPercentage", "frequencyRatio", "meanValue",
#' "medianValue")}. This will tell the function to compute the following:
#' \code{"frequencyPercentage"}, which is the percentge of samples having the
#' value greather than specific cutoff divided by the total sample size for
#' every study / study subgroup;
#' \code{"frequency ratio"}, which shows the number of selected samples divided
#' by the total number of samples that give the frequency percentage for every
#' study / study subgroup. It shows the selected and total sample sizes.;
#' \code{"Mean Value"}, that contains mean value of selected samples for each
#' study;
#' \code{"Median Value"}, which shows the median value of selected samples for
#' each study.
#' The default input is \code{calculate = c("frequencyPercentage",
#' "frequencyRatio", "meanValue")}.
#'
#' @param topGenes a logical value that, if set as TRUE, causes the function to
#' create three data.frame that contain the five top genes for each cancer. To
#' get all the three data.frames, "frequencyPercentage", "meanValue" and
#' "MedianValue" must have been included for \code{calculate}.
#'
#' @param cutoff a number used to limit samples to those that are greather than
#' this number (cutoff). The default value for methylation data is \code{0.8}
#' while gene expression studies use default value of \code{2}. For methylation
#' studies, it is \code{average of relevant locations}, for the rest, it is
#' \code{"log z-score"}. To change the cutoff to any desired number, change the
#' option to \code{cutoff = desiredNumber} in which desiredNumber is the number
#' of interest.
#'
#' @param round a logical value that, if set to be \code{TRUE}, will force the
#' function to round all the calculated values to two decimal places. The
#' default value is \code{TRUE}.
#'
#'
#'
#' @return A new section in the BiocFileCache object that was created by one of
#' the obtainOneStudy() or obtainMultipleStudies() functions. It contains a list
#'  that contains some or all of the following statistical measurements for
#'  every gene group, based on what user has chosen: \code{Frequency.Percentage}
#'  , \code{Top.Genes.of.Frequency.Percentage}, \code{Frequency.Ratio},
#'  \code{Mean.Value}, \code{Top.Genes.of.Mean.Value}, \code{Median},
#'  \code{Top.Genes.of.Median}.
#'
#'
#'
#' @examples
#' genes <- list(K.demethylases = c("KDM1A", "KDM1B", "KDM2A", "KDM2B", "KDM3A",
#'  "KDM3B", "JMJD1C", "KDM4A"), K.methyltransferases = c("SUV39H1", "SUV39H2",
#'  "EHMT1", "EHMT2", "SETDB1", "SETDB2", "KMT2A", "KMT2A"))
#'
#' obtainOneStudy(genes, "test", "Breast Invasive Carcinoma (TCGA, Cell 2015)",
#' "RNA-Seq", desiredCaseList = c(3,4))
#'
#' automatedStatistics("test", obtainedDataType = "single study", calculate =
#' c("frequencyPercentage", "frequencyRatio"))
#'
#' @author Arman Shahrisa, \email{shahrisa.arman@hotmail.com} [maintainer,
#' copyright holder]
#' @author Maryam Tahmasebi Birgani, \email{tahmasebi-ma@ajums.ac.ir}
#'
#' @export



################################################################################
################################################################################
############### Automatically calculate statistical measurements ###############
################################################################################
################################################################################

automatedStatistics<- function(

  submissionName,

  obtainedDataType = "multiple studies",

  calculate = c("frequencyPercentage", "frequencyRatio", "meanValue"),

  topGenes = TRUE,

  cutoff = NULL,

  round = TRUE

  ){

  ##############################################################################
  ########## Prerequisites

  # Check submissionName

  if(!is.character(submissionName)){

    stop("[automatedStatistics] 'submissionName' must be a character string!")

  }



  # Obtain the unprocessed data list

  if(obtainedDataType == "multiple studies"){

    previousParamName <- "Parameters for obtainMultipleStudies()"

    paramDeterminant <- "ObtainMultipleStudies"

    databaseType <- "Obtained data for multiple studies"

  } else if(obtainedDataType == "single study"){

    previousParamName <- "Parameters for obtainOneStudy()"

    paramDeterminant <- "ObtainOneStudy"

    databaseType <- "Obtained data for single study"

  } else{

    stop("[automatedStatistics] 'obtainedDataType' must be either 'multiple studies' or 'single study'!")

  }



  # Check calculate

  if(is.character(calculate)){

    if(!any(calculate %in% c("frequencyPercentage",

                             "frequencyRatio",

                             "meanValue",

                             "medianValue"))

       ){

      stop("[automatedStatistics] 'calculate' must consist of one or more of these terms: 'frequencyPercentage', 'frequencyRatio', 'meanValue' and/or 'medianValue'!")

    }

  }else{

    stop("[automatedStatistics] 'calculate' must be a character string!")

  }



  # Check topGenes

  if(!is.logical(topGenes)){

    stop("[automatedStatistics] 'topGenes' must be either TRUE or FALSE!")

  }



  # Check cutoff

  if(!is.null(cutoff) & !is.numeric(cutoff)){

    stop("[automatedStatistics] 'cutoff' must be either NULL or a numeric value!")

  }



  # Check round

  if(!is.logical(round)){

    stop("[automatedStatistics] 'round' must be either TRUE or FALSE!")

  }





  ##############################################################################
  ########## Decide whether function should stops now!

  # Check wheather the requested data exists

  database <- system.file("extdata", submissionName, package="cbaf")

  if(!dir.exists(database)){

    stop("[automatedStatistics] Please run one of the obtainSingleStudy() or obtainMultipleStudies() functions first!")

  } else if(dir.exists(database)){

    bfc <- BiocFileCache(

      file.path(system.file("extdata", package = "cbaf"), submissionName),

      ask = FALSE

      )

    if(!nrow(bfcquery(bfc, previousParamName)) == 1){

      stop("[automatedStatistics] Please run one of the obtainSingleStudy() or obtainMultipleStudies() functions first!")

    }

  }



  # obtain parameters for prevous function

  previousFunctionParam <-

    readRDS(bfcpath(bfc, bfcquery(bfc, c(previousParamName))$rid))


  # fetch an old parameter from the previous function

  desiredTechnique <- previousFunctionParam$desiredTechnique



  # setting the value for cutoff

  if(desiredTechnique == "methylation"){

    cutoff.phrase <- "Mean methylation cutoff"

    if(is.null(cutoff)){

      cutoff <- 0.8

    }

  } else{

    cutoff.phrase <- "log z-score cutoff"

    if(is.null(cutoff)){

      cutoff <- 2.0

    }

  }





  # Store the new parameters

  newParameters <-list()

  newParameters$submissionName <- submissionName

  newParameters$obtainedDataType <- obtainedDataType

  newParameters$calculate <- calculate

  newParameters$cutoff <- cutoff

  newParameters$round <- round

  newParameters$topGenes <- topGenes

  newParameters$desiredTechnique <- desiredTechnique





  # Check wheather the requested data exists

  number.of.rows.parameters <-

    nrow(bfcquery(bfc, "Parameters for automatedStatistics()"))


  if(number.of.rows.parameters == 1){

    oldParameters <-

      readRDS(bfcpath(

        bfc,

        bfcquery(bfc, c("Parameters for automatedStatistics()"))$rid)

    )

    # Check whether the previous function is skipped

    if(previousFunctionParam$lastRunStatus == "skipped"){

      if(identical(oldParameters[-8], newParameters) |

         submissionName %in% c("test", "test2")){

        continue <- FALSE

        # Store the last parameter

        newParameters$lastRunStatus <- "skipped"

        oldParamAutomatedStatistics <- newParameters

        saveRDS(

          oldParamAutomatedStatistics,

          file=bfc[[bfcquery(bfc, "Parameters for automatedStatistics()")$rid]]

          )

        if(submissionName %in% c("test", "test2")){

          message("[automatedStatistics] Please choose a name other than 'test' and 'test2'.")

        }

        message("[automatedStatistics] The requested data already exist locally.")

        message("[automatedStatistics] The function was haulted!")

      } else{

        continue <- TRUE

      }

    } else{

      continue <- TRUE

    }

  } else{

    continue <- TRUE

  }





  if(continue){

    # Getting the source data

    sourceDataList <- readRDS(bfcpath(bfc, bfcquery(bfc, databaseType)$rid))

    number.of.gene.groups <- sourceDataList[[1]]

    if(!is.list(sourceDataList)){

      stop("[automatedStatistics] Input database must be a list!")

    }



    ############################################################################
    ########## Set the function ready to work

    # creating output fortmat

    processedList <- list()

    # temporarily Inactive

    # options(stringsAsFactors = FALSE)



    # Report

    message("[automatedStatistics] Calculating statistics.")



    # Create a progressbar

    total.number <- length(sourceDataList)*length(number.of.gene.groups)

    automatedStatisticsProgressBar <-

      txtProgressBar(min = 0, max = total.number , style = 3)

    ExtA <- 0



    ############################################################################
    ########## Repetitive code section

    if(topGenes){

      default_top = 5

      top_finder <- function(startingMatrix, topNumber = 5){


        # Removing NaN and NA

        startingMatrix[is.nan(startingMatrix) | is.na(startingMatrix)] <- 0



        # Finding the top values

        topGenes.values <-

          head(unique(sort(startingMatrix, decreasing = TRUE)), n = topNumber)

        # Creating empty list for iterations

        complete.top.list <- vector("list", length(topGenes.values))


        for(topV in seq_along(topGenes.values)){

          topGene.name <-

            colnames(startingMatrix)[startingMatrix %in% topGenes.values[topV]]

          # check whether ttwo or more genes have the same rank

          if(length(topGene.name) > 1){

            topGene.name <- paste(topGene.name, collapse = ", ")

          }

          # rounding

          if(round){

            complete.top <- data.frame(

              topGene = topGene.name,

              topValue = round(topGenes.values[topV], digits = 2)

              , stringsAsFactors = FALSE

            )

          } else{

            complete.top <- data.frame(

              topGene = topGene.name,

              topValue = topGenes.values[topV],

              stringsAsFactors = FALSE

            )

          }

          # correcting column names

          colnames(complete.top) <- c(paste(topV, "th ", "Gene", sep=""),

                                      paste(topV, "th ", "Value", sep=""))

          # complete list

          complete.top.list[[topV]] <- complete.top

        }

        # Merge list to give post.topGenes

        post.topGenes <- do.call("cbind", complete.top.list)

        # correcting rowname

        rownames(post.topGenes) <- source.data.subset.name


        # fixing the problem caused by more thank one gene with same rank

        if(length(topGenes.values) < topNumber){

          # Repeat unit

          fix.dataframe <- data.frame(

            topGene = "-",

            topValue = "-",

            stringsAsFactors = FALSE

          )

          # number of new units

          newUnits <- topNumber - length(topGenes.values)

          # finding current number of units

          oldUnits <- length(topGenes.values)

          top_list <- vector("list", length = newUnits)

          for(empty in seq_len(newUnits)){

            colnames(fix.dataframe) <-

              c(paste(oldUnits + empty, "th ", "Gene", sep = ""),

                paste(oldUnits + empty, "th ", "Value", sep = ""))

            rownames(fix.dataframe) <- rownames(post.topGenes)

            top_list[[empty]] <- fix.dataframe

          }

          missing.tops <- do.call(cbind, top_list)

          post.topGenes <- cbind(post.topGenes, missing.tops)

        }

        post.topGenes

      }

    }



    ############################################################################
    ########## Core segment

    # calculating the first 'for' loop for different gene groups

    for(gg in seq_along(sourceDataList)){

      temList <- list()



      # Creating empty lists for iterations

      if("frequencyPercentage" %in% calculate){

        Frequency.Percentage <- vector("list", length(number.of.gene.groups))

        if(topGenes){

          Top.Genes.of.Frequency.Percentage <-

            vector("list", length(number.of.gene.groups))

        }

      }


      if("frequencyRatio" %in% calculate){

        Frequency.Ratio <- vector("list", length(number.of.gene.groups))

      }


      if("meanValue" %in% calculate){

        Mean.Value <- vector("list", length(number.of.gene.groups))

        if(topGenes){

          Top.Genes.of.Mean.Value <-

            vector("list", length(number.of.gene.groups))

        }

      }


      if("medianValue" %in% calculate){

        Median.Value <- vector("list", length(number.of.gene.groups))

        if(topGenes){

          Top.Genes.of.Median.Value <-

            vector("list", length(number.of.gene.groups))

        }

      }





      for(cs in seq_along(number.of.gene.groups)){

        # start working on one study

        geneNumber <- ncol(sourceDataList[[gg]][[cs]])

        source.data.subset <- sourceDataList[[gg]][[cs]]

        source.data.subset.name <- names(sourceDataList[[gg]])[cs]

        genes.involved <- colnames(sourceDataList[[gg]][[cs]])





        # Creating and filling the empty matrix with frequency.percentage data

        if("frequencyPercentage" %in% calculate){

          # creating empty matrix

          frequency.percentage.for.a.subset <-

            matrix(, nrow = 1, ncol = geneNumber)


          dimnames(frequency.percentage.for.a.subset) <-

            list(source.data.subset.name, genes.involved)



          # calculate frequency percentage

          for(fp in seq_len(geneNumber)){

            # Subset a column

            a.column <- source.data.subset[,fp]

            a.column.with.absolute.values <- abs(a.column)



            # General statements for core statistics conditions

            frequency <-

              mean(

                as.vector(a.column.with.absolute.values >= cutoff),

                na.rm=TRUE

                )


            mean.with.cutoff.minus.NA <-

              mean(

                as.vector(a.column)[a.column.with.absolute.values >= cutoff],

                na.rm=TRUE

                )


            mean.is.not.na <- !is.na(mean(as.vector(a.column)))

            mean.is.nan.with.cutoff <- is.nan(mean.with.cutoff.minus.NA)

            number.of.not.nan.members <- length((a.column)[!is.nan(a.column)])

            all.members.are.infinite <- all(!is.finite(a.column))



            # Check all members are under cutoff

            if(mean.is.not.na & mean.is.nan.with.cutoff){

              frequency.percentage.for.a.subset[1, fp] <- 0

              # Check all members are NaN

            } else if(number.of.not.nan.members == 0 &

                      all.members.are.infinite &

                      mean.is.nan.with.cutoff){

              frequency.percentage.for.a.subset[1, fp] <- NaN

              # Check all members are NA

            } else if(number.of.not.nan.members > 0 &

                      all.members.are.infinite &

                      mean.is.nan.with.cutoff){

              frequency.percentage.for.a.subset[1, fp] <- NA

              # Mean is bigger than 0

            } else if(mean.with.cutoff.minus.NA > 0 & !mean.is.nan.with.cutoff){

              frequency.percentage.for.a.subset[1, fp] <- 100*frequency

              # Mean is smaller than 0

            } else if(mean.with.cutoff.minus.NA < 0 & !mean.is.nan.with.cutoff){

              frequency.percentage.for.a.subset[1, fp] <- -100*frequency

            }

          }



          # Merging calculations

          if(round){

            Frequency.Percentage[[cs]] <-

              round(frequency.percentage.for.a.subset, digits = 2)

          }else{

            Frequency.Percentage[[cs]] <- frequency.percentage.for.a.subset

          }





          if(topGenes){

            Top.Genes.of.Frequency.Percentage[[cs]] <-

              top_finder(frequency.percentage.for.a.subset, default_top)

          }

        }










        # Creating and filling the empty matrix with frequency.ratio data

        if("frequencyRatio" %in% calculate){

          # creating empty matrix

          frequency.ratio.for.a.subset <- matrix(, nrow = 1, ncol = geneNumber)

          dimnames(frequency.ratio.for.a.subset) <-

            list(source.data.subset.name, genes.involved)



          # calculate frequency ratio

          for(fr in seq_len(geneNumber)){

            # Subset a column

            a.column <- source.data.subset[,fr]

            a.column.with.absolute.values <- abs(a.column)



            # General statements for core statistics conditions

            frequency <-

              mean(

                as.vector(a.column.with.absolute.values >= cutoff),

                na.rm=TRUE

              )


            mean.with.cutoff.minus.NA <-

              mean(

                as.vector(a.column)[a.column.with.absolute.values >= cutoff],

                na.rm=TRUE

                )


            mean.is.not.na <- !is.na(mean(as.vector(a.column)))

            mean.is.nan.with.cutoff <- is.nan(mean.with.cutoff.minus.NA)

            number.of.not.nan.members <- length((a.column)[!is.nan(a.column)])

            all.members.are.infinite <- all(!is.finite(a.column))



            # Check all members are under cutoff

            if(mean.is.not.na & mean.is.nan.with.cutoff){

              frequency.ratio.for.a.subset[1, fr] <-

                paste0("0 out of ", as.character(length(as.vector(a.column))))

              # Check all members are NaN

            } else if (number.of.not.nan.members == 0 &

                       all.members.are.infinite &

                       mean.is.nan.with.cutoff){

              frequency.ratio.for.a.subset[1, fr] <- NaN

              # Check all members are NA

            } else if (number.of.not.nan.members > 0 &

                       all.members.are.infinite &

                       mean.is.nan.with.cutoff){

              frequency.ratio.for.a.subset[1, fr] <- NA

              # Mean is number

            } else if (!mean.is.nan.with.cutoff){

              frequency.ratio.for.a.subset[1, fr] <-

                paste(as.character(length(na.omit(as.vector(a.column)[

                  a.column.with.absolute.values >= cutoff]))), " out of ",

                  as.character(length(as.vector(a.column))), sep="")

            }

          }



          # Merging calculations

          Frequency.Ratio[[cs]] <- frequency.ratio.for.a.subset

        }










        # Creating and filling the empty matrix with mean.value data

        if("meanValue" %in% calculate){

          # creating empty matrix

          mean.value.for.a.subset <- matrix(, nrow = 1, ncol = geneNumber)

          dimnames(mean.value.for.a.subset) <-

            list(source.data.subset.name, genes.involved)



          # calculate Mean value

          for(mv in seq_len(geneNumber)){

            # Subset a column

            a.column <- source.data.subset[,mv]

            a.column.with.absolute.values <- abs(a.column)



            # General statements for core statistics conditions

            frequency <-

              mean(

                as.vector(a.column.with.absolute.values >= cutoff),

                na.rm=TRUE

                )


            mean.with.cutoff.minus.NA <-

              mean(

                as.vector(a.column)[a.column.with.absolute.values >= cutoff],

                na.rm=TRUE

                )

            mean.is.not.na <- !is.na(mean(as.vector(a.column)))

            mean.is.nan.with.cutoff <- is.nan(mean.with.cutoff.minus.NA)

            number.of.not.nan.members <- length((a.column)[!is.nan(a.column)])

            all.members.are.infinite <- all(!is.finite(a.column))



            # Check all members are under cutoff

            if(mean.is.not.na & mean.is.nan.with.cutoff){

              mean.value.for.a.subset[1, mv] <- 0

              # Check all members are NaN

            } else if (number.of.not.nan.members == 0 &

                       all.members.are.infinite &

                       mean.is.nan.with.cutoff){

              mean.value.for.a.subset[1, mv] <- NaN

              # Check all members are NA

            } else if (number.of.not.nan.members > 0 &

                       all.members.are.infinite &

                       mean.is.nan.with.cutoff){

              mean.value.for.a.subset[1, mv] <- NA

              # Mean is number

            } else if (!mean.is.nan.with.cutoff){

              mean.value.for.a.subset[1, mv] <-

                mean(

                  as.vector(a.column)[a.column.with.absolute.values >= cutoff],

                  na.rm=TRUE

                )

            }

          }



          # Merging calculations

          if(round){

            Mean.Value[[cs]] <- round(mean.value.for.a.subset, digits = 2)

          }else{

            Mean.Value[[cs]] <- mean.value.for.a.subset

          }





          if(topGenes){

            Top.Genes.of.Mean.Value[[cs]] <-

              top_finder(mean.value.for.a.subset, default_top)

          }

        }










        # Creating and filling the empty matrix with median.value data

        if("medianValue" %in% calculate){

          # creating empty matrix

          median.value.for.a.subset <- matrix(, nrow = 1, ncol = geneNumber)

          dimnames(median.value.for.a.subset) <-

            list(source.data.subset.name, genes.involved)



          # calculate median value

          for(mdv in seq_len(geneNumber)){

            # Subset a column

            a.column <- source.data.subset[,mdv]

            a.column.with.absolute.values <- abs(a.column)



            # General statements for core statistics conditions

            frequency <-

              mean(

                as.vector(a.column.with.absolute.values >= cutoff),

                na.rm=TRUE

                )


            mean.with.cutoff.minus.NA <-

              mean(

                as.vector(a.column)[a.column.with.absolute.values >= cutoff],

                na.rm=TRUE

                )


            mean.is.not.na <- !is.na(mean(as.vector(a.column)))

            mean.is.nan.with.cutoff <- is.nan(mean.with.cutoff.minus.NA)

            number.of.not.nan.members <- length((a.column)[!is.nan(a.column)])

            all.members.are.infinite <- all(!is.finite(a.column))



            # Check all members are under cutoff

            if(mean.is.not.na & mean.is.nan.with.cutoff){

              median.value.for.a.subset[1, mdv] <- 0

              # Check all members are NaN

            } else if (number.of.not.nan.members == 0 &

                       all.members.are.infinite &

                       mean.is.nan.with.cutoff){

              median.value.for.a.subset[1, mdv] <- NaN

              # Check all members are NA

            } else if (number.of.not.nan.members > 0 &

                       all.members.are.infinite &

                       mean.is.nan.with.cutoff){

              median.value.for.a.subset[1, mdv] <- NA

              # Mean is number

            } else if (!mean.is.nan.with.cutoff){

              median.value.for.a.subset[1, mdv] <-

                median(

                  as.vector(a.column)[a.column.with.absolute.values >= cutoff],

                  na.rm=TRUE

                  )

            }

          }



          # Merging calculations

          if(round){

            Median.Value[[cs]] <- round(median.value.for.a.subset, digits = 2)

          }else{

            Median.Value[[cs]] <- median.value.for.a.subset

          }





          if(topGenes){

            Top.Genes.of.Median.Value[[cs]] <-

              top_finder(median.value.for.a.subset, default_top)

          }

        }

        # Update progressbar

        ExtA <- ExtA + 1

        setTxtProgressBar(automatedStatisticsProgressBar, ExtA)

      }

      # Accounting for variable number of availabe genes for each study




      # assign the statistics list for a subgroup of processedList

      if("frequencyPercentage" %in% calculate){

        temList$Frequency.Percentage <- do.call("rbind", Frequency.Percentage)

        if(topGenes){

          temList$Top.Genes.of.Frequency.Percentage <-

            do.call("rbind", Top.Genes.of.Frequency.Percentage)

        }

      }


      if("frequencyRatio" %in% calculate){

        temList$Frequency.Ratio <- do.call("rbind", Frequency.Ratio)

      }


      if("meanValue" %in% calculate){

        temList$Mean.Value <- do.call("rbind", Mean.Value)

        if(topGenes){

          temList$Top.Genes.of.Mean.Value <-

            do.call("rbind", Top.Genes.of.Mean.Value)

        }

      }


      if("medianValue" %in% calculate){

        temList$Median.Value <- do.call("rbind", Median.Value)

        if(topGenes){

          temList$Top.Genes.of.Median.Value <-

            do.call("rbind", Top.Genes.of.Median.Value)

        }

      }





      processedList[[gg]] <- temList

      names(processedList)[gg] <- names(sourceDataList)[gg]

    }

    # close progressbar

    close(automatedStatisticsProgressBar)



    # Store the prepared Data

    number.of.rows.calculated.data <-

      nrow(bfcquery(bfc, "Calculated statistics"))

    if(number.of.rows.calculated.data == 0){

      saveRDS(

        processedList,

        file=bfcnew(bfc, "Calculated statistics", ext="RDS")

        )

    } else if(number.of.rows.calculated.data == 1){

      saveRDS(

        processedList,

        file=bfc[[bfcquery(bfc, "Calculated statistics")$rid]]

        )

    }



    # Store the last parameter

    newParameters$lastRunStatus <- "succeeded"

    oldParamAutomatedStatistics <- newParameters


    # Store the parameters for this run

    if(number.of.rows.parameters == 0){

      saveRDS(

        oldParamAutomatedStatistics,

        file=bfcnew(bfc, "Parameters for automatedStatistics()", ext="RDS")

        )

    } else if(number.of.rows.parameters == 1){

      saveRDS(

        oldParamAutomatedStatistics,

        file=bfc[[bfcquery(bfc, "Parameters for automatedStatistics()")$rid]]

        )

    }

  }

  # message("[obtainMultipleStudies] Finished.")

}
armanshahrisa/cBioAutomatedTools documentation built on Oct. 29, 2022, 2:38 p.m.