Nothing
#' @title Perform the requested statistics for various studies / subgroups of a
#' study.
#'
#' @description This function calculates frequency percentage, frequency ratio,
#' mean value and median value of samples greather than specific cutoff in the
#' selected study / subgroups of the study. Furthermore, it can looks for the
#' five genes that contain the highest values in each study / study subgroup.
#' It uses the data generated by obtainOneStudy()/obtainMultipleStudies()
#' function.
#'
#' @details
#' \tabular{lllll}{
#' Package: \tab cbaf \cr
#' Type: \tab Package \cr
#' Version: \tab 1.12.1 \cr
#' Date: \tab 2020-12-07 \cr
#' License: \tab Artistic-2.0 \cr
#' }
#'
#'
#'
#' @importFrom stats median na.omit
#'
#' @importFrom BiocFileCache bfcnew bfcquery bfcpath
#'
#' @importFrom utils head setTxtProgressBar txtProgressBar
#'
#'
#'
#' @include cbaf-obtainOneStudy.R cbaf-obtainMultipleStudies.R
#'
#'
#'
#'
#' @usage automatedStatistics(submissionName, obtainedDataType =
#' "multiple studies", calculate = c("frequencyPercentage", "frequencyRatio",
#' "meanValue"), topGenes = TRUE, cutoff=NULL, round=TRUE)
#'
#'
#'
#' @param submissionName a character string containing name of interest. It is
#' used for naming the process.
#'
#' @param obtainedDataType a character string that specifies the type of input
#' data produced by the previous function. Two options are availabe:
#' \code{"single study"} for obtainOneStudy() and \code{"multiple studies"} for
#' obtainMultipleStudies(). The function uses obtainedDataType and
#' submissionName to construct the name of the BiocFileCach object and then
#' finds the appropriate data inside it. Default value is multiple studies`.
#'
#' @param calculate a character vector that containes the statistical procedures
#' users prefer the function to compute. The complete results can be obtained
#' by \code{c("frequencyPercentage", "frequencyRatio", "meanValue",
#' "medianValue")}. This will tell the function to compute the following:
#' \code{"frequencyPercentage"}, which is the percentge of samples having the
#' value greather than specific cutoff divided by the total sample size for
#' every study / study subgroup;
#' \code{"frequency ratio"}, which shows the number of selected samples divided
#' by the total number of samples that give the frequency percentage for every
#' study / study subgroup. It shows the selected and total sample sizes.;
#' \code{"Mean Value"}, that contains mean value of selected samples for each
#' study;
#' \code{"Median Value"}, which shows the median value of selected samples for
#' each study.
#' The default input is \code{calculate = c("frequencyPercentage",
#' "frequencyRatio", "meanValue")}.
#'
#' @param topGenes a logical value that, if set as TRUE, causes the function to
#' create three data.frame that contain the five top genes for each cancer. To
#' get all the three data.frames, "frequencyPercentage", "meanValue" and
#' "MedianValue" must have been included for \code{calculate}.
#'
#' @param cutoff a number used to limit samples to those that are greather than
#' this number (cutoff). The default value for methylation data is \code{0.6}
#' while gene expression studies use default value of \code{2}. For methylation
#' studies, it is \code{average of relevant locations}, for the rest, it is
#' \code{"log z-score"}. To change the cutoff to any desired number, change the
#' option to \code{cutoff = desiredNumber} in which desiredNumber is the number
#' of interest.
#'
#' @param round a logical value that, if set to be \code{TRUE}, will force the
#' function to round all the calculated values to two decimal places. The
#' default value is \code{TRUE}.
#'
#'
#'
#' @return A new section in the BiocFileCache object that was created by one of
#' the obtainOneStudy() or obtainMultipleStudies() functions. It contains a list
#' that contains some or all of the following statistical measurements for
#' every gene group, based on what user has chosen: \code{Frequency.Percentage}
#' , \code{Top.Genes.of.Frequency.Percentage}, \code{Frequency.Ratio},
#' \code{Mean.Value}, \code{Top.Genes.of.Mean.Value}, \code{Median},
#' \code{Top.Genes.of.Median}.
#'
#'
#'
#' @examples
#' genes <- list(K.demethylases = c("KDM1A", "KDM1B", "KDM2A", "KDM2B", "KDM3A",
#' "KDM3B", "JMJD1C", "KDM4A"), K.methyltransferases = c("SUV39H1", "SUV39H2",
#' "EHMT1", "EHMT2", "SETDB1", "SETDB2", "KMT2A", "KMT2A"))
#'
#' obtainOneStudy(genes, "test", "Breast Invasive Carcinoma (TCGA, Cell 2015)",
#' "RNA-Seq", desiredCaseList = c(3,4))
#'
#' automatedStatistics("test", obtainedDataType = "single study", calculate =
#' c("frequencyPercentage", "frequencyRatio"))
#'
#' @author Arman Shahrisa, \email{shahrisa.arman@hotmail.com} [maintainer,
#' copyright holder]
#' @author Maryam Tahmasebi Birgani, \email{tahmasebi-ma@ajums.ac.ir}
#'
#' @export
################################################################################
################################################################################
############### Automatically calculate statistical measurements ###############
################################################################################
################################################################################
automatedStatistics<- function(
submissionName,
obtainedDataType = "multiple studies",
calculate = c("frequencyPercentage", "frequencyRatio", "meanValue"),
topGenes = TRUE,
cutoff = NULL,
round = TRUE
){
##############################################################################
########## Prerequisites
# Check submissionName
if(!is.character(submissionName)){
stop("'submissionName' must be entered as a character string for naming the process")
}
# Obtain the unprocessed data list
if(obtainedDataType == "multiple studies"){
previousParamName <- "Parameters for obtainMultipleStudies()"
paramDeterminant <- "ObtainMultipleStudies"
databaseType <- "Obtained data for multiple studies"
} else if(obtainedDataType == "single study"){
previousParamName <- "Parameters for obtainOneStudy()"
paramDeterminant <- "ObtainOneStudy"
databaseType <- "Obtained data for single study"
} else{
stop("'obtainedDataType' must be entered as either 'multiple studies' or 'single study'.")
}
# Check calculate
if(is.character(calculate)){
if(!any(calculate %in% c("frequencyPercentage",
"frequencyRatio",
"meanValue",
"medianValue"))
){
stop("'calculate' must contain at least one of the following: 'frequencyPercentage', 'frequencyRatio', 'meanValue' and 'medianValue'.")
}
}else{
stop("'calculate' must be a character string.")
}
# Check topGenes
if(!is.logical(topGenes)){
stop("'topGenes' can only accept logical values: TRUE or FALSE .")
}
# Check cutoff
if(!is.null(cutoff) & !is.numeric(cutoff)){
stop("'cutoff' must be set as NULL or a be a numeric value.")
}
# Check round
if(!is.logical(round)){
stop("'round' can only accept logical values: TRUE or FALSE .")
}
##############################################################################
########## Decide whether function should stops now!
# Check wheather the requested data exists
database <- system.file("extdata", submissionName, package="cbaf")
if(!dir.exists(database)){
stop("Please run one of the obtainSingleStudy() or obtainMultipleStudies() functions first")
} else if(dir.exists(database)){
bfc <- BiocFileCache(
file.path(system.file("extdata", package = "cbaf"), submissionName),
ask = FALSE
)
if(!nrow(bfcquery(bfc, previousParamName)) == 1){
stop("Please run one of the obtainSingleStudy() or obtainMultipleStudies() functions first")
}
}
# obtain parameters for prevous function
previousFunctionParam <-
readRDS(bfcpath(bfc, bfcquery(bfc, c(previousParamName))$rid))
# fetch an old parameter from the previous function
desiredTechnique <- previousFunctionParam$desiredTechnique
# setting the value for cutoff
if(desiredTechnique == "methylation"){
cutoff.phrase <- "average of relevant locations cutoff"
if(is.null(cutoff)){
cutoff <- 0.8
}
} else{
cutoff.phrase <- "log z-score cutoff"
if(is.null(cutoff)){
cutoff <- 0.5
}
}
# Store the new parameteres
newParameters <-list()
newParameters$submissionName <- submissionName
newParameters$obtainedDataType <- obtainedDataType
newParameters$calculate <- calculate
newParameters$cutoff <- cutoff
newParameters$round <- round
newParameters$topGenes <- topGenes
newParameters$desiredTechnique <- desiredTechnique
# Check wheather the requested data exists
number.of.rows.parameters <-
nrow(bfcquery(bfc, "Parameters for automatedStatistics()"))
if(number.of.rows.parameters == 1){
oldParameters <-
readRDS(bfcpath(
bfc,
bfcquery(bfc, c("Parameters for automatedStatistics()"))$rid)
)
# Check whether the previous function is skipped
if(previousFunctionParam$lastRunStatus == "skipped"){
if(identical(oldParameters[-8], newParameters) |
submissionName %in% c("test", "test2")){
continue <- FALSE
# Store the last parameter
newParameters$lastRunStatus <- "skipped"
oldParamAutomatedStatistics <- newParameters
saveRDS(
oldParamAutomatedStatistics,
file=bfc[[bfcquery(bfc, "Parameters for automatedStatistics()")$rid]]
)
if(submissionName %in% c("test", "test2")){
message("--- 'test' and 'test2' databases contain sample data and therefore, are not changable. Please use a different submission name. ---")
}
message("--- Function 'automatedStatistics()' was skipped: the requested data already exist ---")
} else{
continue <- TRUE
}
} else{
continue <- TRUE
}
} else{
continue <- TRUE
}
if(continue){
# Getting the source data
sourceDataList <- readRDS(bfcpath(bfc, bfcquery(bfc, databaseType)$rid))
number.of.gene.groups <- sourceDataList[[1]]
if(!is.list(sourceDataList)){
stop("Input database must be a list.")
}
############################################################################
########## Set the function ready to work
# creating output fortmat
processedList <- list()
# temporarily Inactive
# options(stringsAsFactors = FALSE)
# Report
message("***", "Performing the requested statistical analyses for ", submissionName, "***")
# Create a progressbar
total.number <- length(sourceDataList)*length(number.of.gene.groups)
automatedStatisticsProgressBar <-
txtProgressBar(min = 0, max = total.number , style = 3)
ExtA <- 0
############################################################################
########## Core segment
# calculating the first 'for' loop for different gene groups
for(gg in seq_along(sourceDataList)){
geneNumber <- ncol(sourceDataList[[gg]][[1]])
temList <- list()
# Creating empty lists for iterations
if("frequencyPercentage" %in% calculate){
Frequency.Percentage <- vector("list", length(number.of.gene.groups))
if(topGenes){
Top.Genes.of.Frequency.Percentage <-
vector("list", length(number.of.gene.groups))
}
}
if("frequencyRatio" %in% calculate){
Frequency.Ratio <- vector("list", length(number.of.gene.groups))
}
if("meanValue" %in% calculate){
Mean.Value <- vector("list", length(number.of.gene.groups))
if(topGenes){
Top.Genes.of.Mean.Value <-
vector("list", length(number.of.gene.groups))
}
}
if("medianValue" %in% calculate){
Median.Value <- vector("list", length(number.of.gene.groups))
if(topGenes){
Top.Genes.of.Median.Value <-
vector("list", length(number.of.gene.groups))
}
}
for(cs in seq_along(number.of.gene.groups)){
# start working on one study
source.data.subset <- sourceDataList[[gg]][[cs]]
source.data.subset.name <- names(sourceDataList[[gg]])[cs]
genes.involved <- colnames(sourceDataList[[gg]][[cs]])
# Creating and filling the empty matrix with frequency.percentage data
if("frequencyPercentage" %in% calculate){
# creating empty matrix
frequency.percentage.for.a.subset <-
matrix(, nrow = 1, ncol = geneNumber)
dimnames(frequency.percentage.for.a.subset) <-
list(source.data.subset.name, genes.involved)
# calculate frequency percentage
for(fp in seq_len(geneNumber)){
# Subset a column
a.column <- source.data.subset[,fp]
a.column.with.absolute.values <- abs(a.column)
# General statements for core statistics conditions
frequency <-
mean(
as.vector(a.column.with.absolute.values >= cutoff),
na.rm=TRUE
)
mean.with.cutoff.minus.NA <-
mean(
as.vector(a.column)[a.column.with.absolute.values >= cutoff],
na.rm=TRUE
)
mean.is.not.na <- !is.na(mean(as.vector(a.column)))
mean.is.nan.with.cutoff <- is.nan(mean.with.cutoff.minus.NA)
number.of.not.nan.members <- length((a.column)[!is.nan(a.column)])
all.members.are.infinite <- all(!is.finite(a.column))
# Check all members are under cutoff
if(mean.is.not.na & mean.is.nan.with.cutoff){
frequency.percentage.for.a.subset[1, fp] <- 0
# Check all members are NaN
} else if(number.of.not.nan.members == 0 &
all.members.are.infinite &
mean.is.nan.with.cutoff){
frequency.percentage.for.a.subset[1, fp] <- NaN
# Check all members are NA
} else if(number.of.not.nan.members > 0 &
all.members.are.infinite &
mean.is.nan.with.cutoff){
frequency.percentage.for.a.subset[1, fp] <- NA
# Mean is bigger than 0
} else if(mean.with.cutoff.minus.NA > 0 & !mean.is.nan.with.cutoff){
frequency.percentage.for.a.subset[1, fp] <- 100*frequency
# Mean is smaller than 0
} else if(mean.with.cutoff.minus.NA < 0 & !mean.is.nan.with.cutoff){
frequency.percentage.for.a.subset[1, fp] <- -100*frequency
}
}
# Merging calculations
if(round){
Frequency.Percentage[[cs]] <-
round(frequency.percentage.for.a.subset, digits = 2)
}else{
Frequency.Percentage[[cs]] <- frequency.percentage.for.a.subset
}
if(topGenes){
# Check if manual naming is requested
pre.topGenes <- frequency.percentage.for.a.subset
# Removing NaN and NA
pre.topGenes[is.nan(pre.topGenes) | is.na(pre.topGenes)] <- 0
# Finding the top 5 values
topGenes.values <-
head(unique(sort(pre.topGenes, decreasing = TRUE)), n = 5)
# Creating empty list for iterations
complete.top.list <- vector("list", length(topGenes.values))
for(topV in seq_along(topGenes.values)){
topGene.name <-
colnames(pre.topGenes)[pre.topGenes %in% topGenes.values[topV]]
# check whether ttwo or more genes have the same rank
if(length(topGene.name) > 1){
topGene.name <- paste(topGene.name, collapse = ", ")
}
# rounding
if(round){
complete.top <- data.frame(
topGene = topGene.name,
topValue = round(topGenes.values[topV], digits = 2)
, stringsAsFactors = FALSE
)
} else{
complete.top <- data.frame(
topGene = topGene.name,
topValue = topGenes.values[topV],
stringsAsFactors = FALSE
)
}
# correcting column names
colnames(complete.top) <- c(paste(topV, "th ", "Gene", sep=""),
paste(topV, "th ", "Value", sep=""))
# complete list
complete.top.list[[topV]] <- complete.top
}
# Merge list to give post.topGenes
post.topGenes <- do.call("cbind", complete.top.list)
# correcting rowname
rownames(post.topGenes) <- source.data.subset.name
# fixing the problem caused by more thank one gene with same rank
if(length(topGenes.values) < 5){
# Repeat unit
fix.dataframe <- data.frame(
topGene = "-",
topValue = "-",
stringsAsFactors = FALSE
)
# number of new units
newUnits <- 5 - length(topGenes.values)
# finding current number of units
oldUnits <- length(topGenes.values)
for(empty in seq_len(newUnits)){
colnames(fix.dataframe) <-
c(paste(oldUnits + empty, "th ", "Gene", sep=""),
paste(oldUnits + empty, "th ", "Value", sep=""))
post.topGenes <- cbind(post.topGenes, fix.dataframe)
}
}
# assigning the value to the second level list
Top.Genes.of.Frequency.Percentage[[cs]] <- post.topGenes
}
}
# Creating and filling the empty matrix with frequency.ratio data
if("frequencyRatio" %in% calculate){
# creating empty matrix
frequency.ratio.for.a.subset <- matrix(, nrow = 1, ncol = geneNumber)
dimnames(frequency.ratio.for.a.subset) <-
list(source.data.subset.name, genes.involved)
# calculate frequency ratio
for(fr in seq_len(geneNumber)){
# Subset a column
a.column <- source.data.subset[,fr]
a.column.with.absolute.values <- abs(a.column)
# General statements for core statistics conditions
frequency <-
mean(
as.vector(a.column.with.absolute.values >= cutoff),
na.rm=TRUE
)
mean.with.cutoff.minus.NA <-
mean(
as.vector(a.column)[a.column.with.absolute.values >= cutoff],
na.rm=TRUE
)
mean.is.not.na <- !is.na(mean(as.vector(a.column)))
mean.is.nan.with.cutoff <- is.nan(mean.with.cutoff.minus.NA)
number.of.not.nan.members <- length((a.column)[!is.nan(a.column)])
all.members.are.infinite <- all(!is.finite(a.column))
# Check all members are under cutoff
if(mean.is.not.na & mean.is.nan.with.cutoff){
frequency.ratio.for.a.subset[1, fr] <-
paste0("0 out of ", as.character(length(as.vector(a.column))))
# Check all members are NaN
} else if (number.of.not.nan.members == 0 &
all.members.are.infinite &
mean.is.nan.with.cutoff){
frequency.ratio.for.a.subset[1, fr] <- NaN
# Check all members are NA
} else if (number.of.not.nan.members > 0 &
all.members.are.infinite &
mean.is.nan.with.cutoff){
frequency.ratio.for.a.subset[1, fr] <- NA
# Mean is number
} else if (!mean.is.nan.with.cutoff){
frequency.ratio.for.a.subset[1, fr] <-
paste(as.character(length(na.omit(as.vector(a.column)[
a.column.with.absolute.values >= cutoff]))), " out of ",
as.character(length(as.vector(a.column))), sep="")
}
}
# Merging calculations
Frequency.Ratio[[cs]] <- frequency.ratio.for.a.subset
}
# Creating and filling the empty matrix with mean.value data
if("meanValue" %in% calculate){
# creating empty matrix
mean.value.for.a.subset <- matrix(, nrow = 1, ncol = geneNumber)
dimnames(mean.value.for.a.subset) <-
list(source.data.subset.name, genes.involved)
# calculate Mean value
for(mv in seq_len(geneNumber)){
# Subset a column
a.column <- source.data.subset[,mv]
a.column.with.absolute.values <- abs(a.column)
# General statements for core statistics conditions
frequency <-
mean(
as.vector(a.column.with.absolute.values >= cutoff),
na.rm=TRUE
)
mean.with.cutoff.minus.NA <-
mean(
as.vector(a.column)[a.column.with.absolute.values >= cutoff],
na.rm=TRUE
)
mean.is.not.na <- !is.na(mean(as.vector(a.column)))
mean.is.nan.with.cutoff <- is.nan(mean.with.cutoff.minus.NA)
number.of.not.nan.members <- length((a.column)[!is.nan(a.column)])
all.members.are.infinite <- all(!is.finite(a.column))
# Check all members are under cutoff
if(mean.is.not.na & mean.is.nan.with.cutoff){
mean.value.for.a.subset[1, mv] <- 0
# Check all members are NaN
} else if (number.of.not.nan.members == 0 &
all.members.are.infinite &
mean.is.nan.with.cutoff){
mean.value.for.a.subset[1, mv] <- NaN
# Check all members are NA
} else if (number.of.not.nan.members > 0 &
all.members.are.infinite &
mean.is.nan.with.cutoff){
mean.value.for.a.subset[1, mv] <- NA
# Mean is number
} else if (!mean.is.nan.with.cutoff){
mean.value.for.a.subset[1, mv] <-
mean(
as.vector(a.column)[a.column.with.absolute.values >= cutoff],
na.rm=TRUE
)
}
}
# Merging calculations
if(round){
Mean.Value[[cs]] <- round(mean.value.for.a.subset, digits = 2)
}else{
Mean.Value[[cs]] <- mean.value.for.a.subset
}
if(topGenes){
# Check if manual naming is requested
pre.topGenes <- mean.value.for.a.subset
# Removing NaN and NA
pre.topGenes[is.nan(pre.topGenes) | is.na(pre.topGenes)] <- 0
# Finding the top 5 values
topGenes.values <-
head(unique(sort(pre.topGenes, decreasing = TRUE)), n = 5)
# Creating empty list for iterations
complete.top.list <- vector("list", length(topGenes.values))
for(topV in seq_along(topGenes.values)){
topGene.name <-
colnames(pre.topGenes)[pre.topGenes %in% topGenes.values[topV]]
# check whether ttwo or more genes have the same rank
if(length(topGene.name) > 1){
topGene.name <- paste(topGene.name, collapse = ", ")
}
# rounding
if(round){
complete.top <- data.frame(
topGene = topGene.name,
topValue = round(topGenes.values[topV], digits = 2)
, stringsAsFactors = FALSE
)
} else{
complete.top <- data.frame(
topGene = topGene.name,
topValue = topGenes.values[topV],
stringsAsFactors = FALSE
)
}
# correcting column names
colnames(complete.top) <- c(paste(topV, "th ", "Gene", sep=""),
paste(topV, "th ", "Value", sep=""))
# complete list
complete.top.list[[topV]] <- complete.top
}
# Merge list to give post.topGenes
post.topGenes <- do.call("cbind", complete.top.list)
# correcting rowname
rownames(post.topGenes) <- source.data.subset.name
# fixing the problem caused by more thank one gene with same rank
if(length(topGenes.values) < 5){
# Repeat unit
fix.dataframe <- data.frame(
topGene = "-",
topValue = "-",
stringsAsFactors = FALSE
)
# number of new units
newUnits <- 5 - length(topGenes.values)
# finding current number of units
oldUnits <- length(topGenes.values)
for(empty in seq_len(newUnits)){
colnames(fix.dataframe) <-
c(paste(oldUnits + empty, "th ", "Gene", sep=""),
paste(oldUnits + empty, "th ", "Value", sep=""))
post.topGenes <- cbind(post.topGenes, fix.dataframe)
}
}
# assigning the value to the second level list
Top.Genes.of.Mean.Value[[cs]] <- post.topGenes
}
}
# Creating and filling the empty matrix with median.value data
if("medianValue" %in% calculate){
# creating empty matrix
median.value.for.a.subset <- matrix(, nrow = 1, ncol = geneNumber)
dimnames(median.value.for.a.subset) <-
list(source.data.subset.name, genes.involved)
# calculate median value
for(mdv in seq_len(geneNumber)){
# Subset a column
a.column <- source.data.subset[,mdv]
a.column.with.absolute.values <- abs(a.column)
# General statements for core statistics conditions
frequency <-
mean(
as.vector(a.column.with.absolute.values >= cutoff),
na.rm=TRUE
)
mean.with.cutoff.minus.NA <-
mean(
as.vector(a.column)[a.column.with.absolute.values >= cutoff],
na.rm=TRUE
)
mean.is.not.na <- !is.na(mean(as.vector(a.column)))
mean.is.nan.with.cutoff <- is.nan(mean.with.cutoff.minus.NA)
number.of.not.nan.members <- length((a.column)[!is.nan(a.column)])
all.members.are.infinite <- all(!is.finite(a.column))
# Check all members are under cutoff
if(mean.is.not.na & mean.is.nan.with.cutoff){
median.value.for.a.subset[1, mdv] <- 0
# Check all members are NaN
} else if (number.of.not.nan.members == 0 &
all.members.are.infinite &
mean.is.nan.with.cutoff){
median.value.for.a.subset[1, mdv] <- NaN
# Check all members are NA
} else if (number.of.not.nan.members > 0 &
all.members.are.infinite &
mean.is.nan.with.cutoff){
median.value.for.a.subset[1, mdv] <- NA
# Mean is number
} else if (!mean.is.nan.with.cutoff){
median.value.for.a.subset[1, mdv] <-
median(
as.vector(a.column)[a.column.with.absolute.values >= cutoff],
na.rm=TRUE
)
}
}
# Merging calculations
if(round){
Median.Value[[cs]] <- round(median.value.for.a.subset, digits = 2)
}else{
Median.Value[[cs]] <- median.value.for.a.subset
}
if(topGenes){
# Check if manual naming is requested
pre.topGenes <- median.value.for.a.subset
# Removing NaN and NA
pre.topGenes[is.nan(pre.topGenes) | is.na(pre.topGenes)] <- 0
# Finding the top 5 values
topGenes.values <-
head(unique(sort(pre.topGenes, decreasing = TRUE)), n = 5)
# Creating empty list for iterations
complete.top.list <- vector("list", length(topGenes.values))
for(topV in seq_along(topGenes.values)){
topGene.name <-
colnames(pre.topGenes)[pre.topGenes %in% topGenes.values[topV]]
# check whether ttwo or more genes have the same rank
if(length(topGene.name) > 1){
topGene.name <- paste(topGene.name, collapse = ", ")
}
# rounding
if(round){
complete.top <- data.frame(
topGene = topGene.name,
topValue = round(topGenes.values[topV], digits = 2)
, stringsAsFactors = FALSE
)
} else{
complete.top <- data.frame(
topGene = topGene.name,
topValue = topGenes.values[topV],
stringsAsFactors = FALSE
)
}
# correcting column names
colnames(complete.top) <- c(paste(topV, "th ", "Gene", sep=""),
paste(topV, "th ", "Value", sep=""))
# complete list
complete.top.list[[topV]] <- complete.top
}
# Merge list to give post.topGenes
post.topGenes <- do.call("cbind", complete.top.list)
# correcting rowname
rownames(post.topGenes) <- source.data.subset.name
# fixing the problem caused by more thank one gene with same rank
if(length(topGenes.values) < 5){
# Repeat unit
fix.dataframe <- data.frame(
topGene = "-",
topValue = "-",
stringsAsFactors = FALSE
)
# number of new units
newUnits <- 5 - length(topGenes.values)
# finding current number of units
oldUnits <- length(topGenes.values)
for(empty in seq_len(newUnits)){
colnames(fix.dataframe) <-
c(paste(oldUnits + empty, "th ", "Gene", sep=""),
paste(oldUnits + empty, "th ", "Value", sep=""))
post.topGenes <- cbind(post.topGenes, fix.dataframe)
}
}
# assigning the value to the second level list
Top.Genes.of.Median.Value[[cs]] <- post.topGenes
}
}
# Update progressbar
ExtA <- ExtA + 1
setTxtProgressBar(automatedStatisticsProgressBar, ExtA)
}
# assign the statistics list fot a subgroup of processedList
if("frequencyPercentage" %in% calculate){
temList$Frequency.Percentage <- do.call("rbind", Frequency.Percentage)
if(topGenes){
temList$Top.Genes.of.Frequency.Percentage <-
do.call("rbind", Top.Genes.of.Frequency.Percentage)
}
}
if("frequencyRatio" %in% calculate){
temList$Frequency.Ratio <- do.call("rbind", Frequency.Ratio)
}
if("meanValue" %in% calculate){
temList$Mean.Value <- do.call("rbind", Mean.Value)
if(topGenes){
temList$Top.Genes.of.Mean.Value <-
do.call("rbind", Top.Genes.of.Mean.Value)
}
}
if("medianValue" %in% calculate){
temList$Median.Value <- do.call("rbind", Median.Value)
if(topGenes){
temList$Top.Genes.of.Median.Value <-
do.call("rbind", Top.Genes.of.Median.Value)
}
}
processedList[[gg]] <- temList
names(processedList)[gg] <- names(sourceDataList)[gg]
}
# close progressbar
close(automatedStatisticsProgressBar)
# Store the prepared Data
number.of.rows.calculated.data <-
nrow(bfcquery(bfc, "Calculated statistics"))
if(number.of.rows.calculated.data == 0){
saveRDS(
processedList,
file=bfcnew(bfc, "Calculated statistics", ext="RDS")
)
} else if(number.of.rows.calculated.data == 1){
saveRDS(
processedList,
file=bfc[[bfcquery(bfc, "Calculated statistics")$rid]]
)
}
# Store the last parameter
newParameters$lastRunStatus <- "succeeded"
oldParamAutomatedStatistics <- newParameters
# Store the parameters for this run
if(number.of.rows.parameters == 0){
saveRDS(
oldParamAutomatedStatistics,
file=bfcnew(bfc, "Parameters for automatedStatistics()", ext="RDS")
)
} else if(number.of.rows.parameters == 1){
saveRDS(
oldParamAutomatedStatistics,
file=bfc[[bfcquery(bfc, "Parameters for automatedStatistics()")$rid]]
)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.