R/class_analysis_dataset.R

Defines functions print.Dataset .getDatasetArgumentsRCodeLines summary.Dataset .getMaxDigits .isFloatingPointSampleSize plot.Dataset .getRandomDataMeans .getDatasetExample .getEnrichmentDatasetFromArgs .getEnrichmentDataFrameFromArgs .validateEnrichmentDataFrame .validateEnrichmentDataFrameHasConsistentNumberOfStages .validateEnrichmentDataFrameRates .validateEnrichmentDataFrameSurvival .validateEnrichmentDataFrameMeans .validateEnrichmentDataFrameDeselection .getEndpointSpecificDataFrameParameterNames .validateEnrichmentDataFrameAtFirstStage .getSubsetsFromArgs .optionalArgsContainsDatasets .getDatasetMeansFromModelsByStage .getStandardDeviationFromStandardError .getDatasetMeansModelObjectsList getDataSet getDataset .getDataset writeDatasets readDatasets writeDataset readDataset

Documented in getDataset getDataSet plot.Dataset print.Dataset readDataset readDatasets summary.Dataset writeDataset writeDatasets

## |
## |  *Dataset classes*
## |
## |  This file is part of the R package rpact:
## |  Confirmatory Adaptive Clinical Trial Design and Analysis
## |
## |  Author: Gernot Wassmer, PhD, and Friedrich Pahlke, PhD
## |  Licensed under "GNU Lesser General Public License" version 3
## |  License text can be found here: https://www.r-project.org/Licenses/LGPL-3
## |
## |  RPACT company website: https://www.rpact.com
## |  rpact package website: https://www.rpact.org
## |
## |  Contact us for information about our services: info@rpact.com
## |
## |  File version: $Revision: 7139 $
## |  Last changed: $Date: 2023-06-28 08:15:31 +0200 (Mi, 28 Jun 2023) $
## |  Last changed by: $Author: pahlke $
## |

#' @include f_analysis_utilities.R
#' @include f_core_utilities.R
#' @include f_object_r_code.R
NULL

C_KEY_WORDS_GROUPS <- c("group", "groups")

C_KEY_WORDS_STAGES <- c("stage", "stages")

C_KEY_WORDS_SUBSETS <- c("subset", "subsets")

C_KEY_WORDS_SAMPLE_SIZES <- .getAllParameterNameVariants(c("n", "N", "sampleSizes", "sampleSize"))

C_KEY_WORDS_MEANS <- c("means", "mean")

C_KEY_WORDS_ST_DEVS <- .getAllParameterNameVariants(c("stDevs", "stDev", "stds", "sd"))

C_KEY_WORDS_EVENTS <- c("event", "events")

C_KEY_WORDS_OVERALL_EVENTS <- .getAllParameterNameVariants(c("overallEvents", "overallEvent"))

C_KEY_WORDS_EXPECTED_EVENTS <- .getAllParameterNameVariants(c("expectedEvents", "expectedEvent"))

C_KEY_WORDS_VARIANCE_EVENTS <- .getAllParameterNameVariants(c("varianceEvents", "varianceEvent"))

C_KEY_WORDS_OVERALL_EXPECTED_EVENTS <- .getAllParameterNameVariants(c("overallExpectedEvents", "overallExpectedEvent"))

C_KEY_WORDS_OVERALL_VARIANCE_EVENTS <- .getAllParameterNameVariants(c("overallVarianceEvents", "overallVarianceEvent"))

C_KEY_WORDS_OVERALL_SAMPLE_SIZES <- .getAllParameterNameVariants(c(
    "overallN", "overallSampleSizes", "overallSampleSize"
))

C_KEY_WORDS_OVERALL_MEANS <- .getAllParameterNameVariants(c("overallMeans", "overallMean"))

C_KEY_WORDS_OVERALL_ST_DEVS <- .getAllParameterNameVariants(c(
    "overallStDevs", "overallStDev", "overall.sd", "overall_sd"
))

C_KEY_WORDS_ALLOCATION_RATIOS <- .getAllParameterNameVariants(c("ar", "allocationRatios", "allocationRatio"))

C_KEY_WORDS_LOG_RANKS <- .getAllParameterNameVariants(c("logRanks", "logRank", "lr"))

C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS <- .getAllParameterNameVariants(c(
    "oar", "car", "overallAllocationRatios", "overallAllocationRatio"
))

C_KEY_WORDS_OVERALL_LOG_RANKS <- .getAllParameterNameVariants(c("olr", "clr", "overallLogRanks", "overallLogRank"))

C_KEY_WORDS <- c(
    C_KEY_WORDS_GROUPS,
    C_KEY_WORDS_STAGES,
    C_KEY_WORDS_SUBSETS,
    C_KEY_WORDS_SAMPLE_SIZES,
    C_KEY_WORDS_MEANS,
    C_KEY_WORDS_ST_DEVS,
    C_KEY_WORDS_EVENTS,
    C_KEY_WORDS_OVERALL_EVENTS,
    C_KEY_WORDS_OVERALL_SAMPLE_SIZES,
    C_KEY_WORDS_OVERALL_MEANS,
    C_KEY_WORDS_OVERALL_ST_DEVS,
    C_KEY_WORDS_ALLOCATION_RATIOS,
    C_KEY_WORDS_LOG_RANKS,
    C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
    C_KEY_WORDS_OVERALL_LOG_RANKS
)

#' @title
#' Read Dataset
#'
#' @description
#' Reads a data file and returns it as dataset object.
#'
#' @param file A CSV file (see \code{\link[utils]{read.table}}).
#' @param header A logical value indicating whether the file contains the names of
#'        the variables as its first line.
#' @param sep The field separator character. Values on each line of the file are separated
#'        by this character. If sep = "," (the default for \code{readDataset}) the separator is a comma.
#' @param quote The set of quoting characters. To disable quoting altogether, use
#'        quote = "". See scan for the behavior on quotes embedded in quotes. Quoting is only
#'        considered for columns read as character, which is all of them unless \code{colClasses} is specified.
#' @param dec The character used in the file for decimal points.
#' @param fill logical. If \code{TRUE} then in case the rows have unequal length, blank fields
#'        are implicitly added.
#' @param comment.char character: a character vector of length one containing a single character
#'        or an empty string. Use "" to turn off the interpretation of comments altogether.
#' @param fileEncoding character string: if non-empty declares the encoding used on a file
#'        (not a connection) so the character data can be re-encoded.
#'        See the 'Encoding' section of the help for file, the 'R Data Import/Export Manual' and 'Note'.
#' @param ... Further arguments to be passed to code{\link[utils]{read.table}}.
#'
#' @details
#' \code{readDataset} is a wrapper function that uses \code{\link[utils]{read.table}} to read the
#' CSV file into a data frame, transfers it from long to wide format with \code{\link[stats]{reshape}}
#' and puts the data to \code{\link[=getDataset]{getDataset()}}.
#'
#' @template return_object_dataset
#'
#' @seealso
#' \itemize{
#'   \item \code{\link[=readDatasets]{readDatasets()}} for reading multiple datasets,
#'   \item \code{\link[=writeDataset]{writeDataset()}} for writing a single dataset,
#'   \item \code{\link[=writeDatasets]{writeDatasets()}} for writing multiple datasets.
#' }
#'
#' @examples
#' \dontrun{
#' dataFileRates <- system.file("extdata",
#'     "dataset_rates.csv",
#'     package = "rpact"
#' )
#' if (dataFileRates != "") {
#'     datasetRates <- readDataset(dataFileRates)
#'     datasetRates
#' }
#'
#' dataFileMeansMultiArm <- system.file("extdata",
#'     "dataset_means_multi-arm.csv",
#'     package = "rpact"
#' )
#' if (dataFileMeansMultiArm != "") {
#'     datasetMeansMultiArm <- readDataset(dataFileMeansMultiArm)
#'     datasetMeansMultiArm
#' }
#'
#' dataFileRatesMultiArm <- system.file("extdata",
#'     "dataset_rates_multi-arm.csv",
#'     package = "rpact"
#' )
#' if (dataFileRatesMultiArm != "") {
#'     datasetRatesMultiArm <- readDataset(dataFileRatesMultiArm)
#'     datasetRatesMultiArm
#' }
#'
#' dataFileSurvivalMultiArm <- system.file("extdata",
#'     "dataset_survival_multi-arm.csv",
#'     package = "rpact"
#' )
#' if (dataFileSurvivalMultiArm != "") {
#'     datasetSurvivalMultiArm <- readDataset(dataFileSurvivalMultiArm)
#'     datasetSurvivalMultiArm
#' }
#' }
#'
#' @export
#'
readDataset <- function(file, ..., header = TRUE, sep = ",", quote = "\"",
        dec = ".", fill = TRUE, comment.char = "", fileEncoding = "UTF-8") {
    if (!file.exists(file)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "the file '", file, "' does not exist")
    }

    data <- utils::read.table(
        file = file, header = header, sep = sep,
        quote = quote, dec = dec, fill = fill, fileEncoding = fileEncoding, ...
    )
    dataWide <- stats::reshape(data = data, direction = "wide", idvar = "stages", timevar = "groups")
    colnames(dataWide) <- gsub("\\.", "", colnames(dataWide))
    return(getDataset(dataWide))
}

#' @title
#' Write Dataset
#'
#' @description
#' Writes a dataset to a CSV file.
#'
#' @param dataset A dataset.
#' @param file The target CSV file.
#' @param append Logical. Only relevant if file is a character string.
#'        If \code{TRUE}, the output is appended to the file. If \code{FALSE}, any existing file of the name is destroyed.
#' @param sep The field separator character. Values on each line of the file are separated
#'        by this character. If sep = "," (the default for \code{writeDataset}) the separator is a comma.
#' @param quote The set of quoting characters. To disable quoting altogether, use
#'        quote = "". See scan for the behavior on quotes embedded in quotes. Quoting is only
#'        considered for columns read as character, which is all of them unless \code{colClasses} is specified.
#' @param dec The character used in the file for decimal points.
#' @param eol The character(s) to print at the end of each line (row).
#' @param na The string to use for missing values in the data.
#' @param row.names Either a logical value indicating whether the row names of \code{dataset} are
#'        to be written along with  \code{dataset}, or a character vector of row names to be written.
#' @param col.names Either a logical value indicating whether the column names of  \code{dataset} are
#'        to be written along with  \code{dataset}, or a character vector of column names to be written.
#'        See the section on 'CSV files' for the meaning of \code{col.names = NA}.
#' @param qmethod A character string specifying how to deal with embedded double quote characters
#'        when quoting strings. Must be one of "double" (default in \code{writeDataset}) or "escape".
#' @param fileEncoding Character string: if non-empty declares the encoding used on a file
#'        (not a connection) so the character data can be re-encoded.
#'        See the 'Encoding' section of the help for file, the 'R Data Import/Export Manual' and 'Note'.
#' @param ... Further arguments to be passed to \code{\link[utils]{write.table}}.
#'
#' @details
#' \code{\link[=writeDataset]{writeDataset()}} is a wrapper function that coerces the dataset to a data frame and uses \cr
#' \code{\link[utils]{write.table}} to write it to a CSV file.
#'
#' @seealso
#' \itemize{
#'   \item \code{\link[=writeDatasets]{writeDatasets()}} for writing multiple datasets,
#'   \item \code{\link[=readDataset]{readDataset()}} for reading a single dataset,
#'   \item \code{\link[=readDatasets]{readDatasets()}} for reading multiple datasets.
#' }
#'
#' @examples
#' \dontrun{
#' datasetOfRates <- getDataset(
#'     n1 = c(11, 13, 12, 13),
#'     n2 = c(8, 10, 9, 11),
#'     events1 = c(10, 10, 12, 12),
#'     events2 = c(3, 5, 5, 6)
#' )
#' writeDataset(datasetOfRates, "dataset_rates.csv")
#' }
#'
#' @export
#'
writeDataset <- function(dataset, file, ..., append = FALSE, quote = TRUE, sep = ",",
        eol = "\n", na = "NA", dec = ".", row.names = TRUE,
        col.names = NA, qmethod = "double",
        fileEncoding = "UTF-8") {
    .assertIsDataset(dataset)

    x <- as.data.frame(dataset, niceColumnNamesEnabled = FALSE)

    utils::write.table(
        x = x, file = file, append = append, quote = quote, sep = sep,
        eol = eol, na = na, dec = dec, row.names = FALSE,
        col.names = TRUE, qmethod = qmethod,
        fileEncoding = fileEncoding
    )
}

#' @title
#' Read Multiple Datasets
#'
#' @description
#' Reads a data file and returns it as a list of dataset objects.
#'
#' @param file A CSV file (see \code{\link[utils]{read.table}}).
#' @param header A logical value indicating whether the file contains the names of
#'        the variables as its first line.
#' @param sep The field separator character. Values on each line of the file are separated
#'        by this character. If sep = "," (the default for \code{readDatasets}) the separator is a comma.
#' @param quote The set of quoting characters. To disable quoting altogether, use
#'        quote = "". See scan for the behavior on quotes embedded in quotes. Quoting is only
#'        considered for columns read as character, which is all of them unless \code{colClasses} is specified.
#' @param dec The character used in the file for decimal points.
#' @param fill logical. If \code{TRUE} then in case the rows have unequal length, blank fields
#'        are implicitly added.
#' @param comment.char character: a character vector of length one containing a single character
#'        or an empty string. Use "" to turn off the interpretation of comments altogether.
#' @param fileEncoding character string: if non-empty declares the encoding used on a file
#'        (not a connection) so the character data can be re-encoded.
#'        See the 'Encoding' section of the help for file, the 'R Data Import/Export Manual' and 'Note'.
#' @param ... Further arguments to be passed to \code{\link[utils]{read.table}}.
#'
#' @details
#' Reads a file that was written by \code{\link[=writeDatasets]{writeDatasets()}} before.
#'
#' @return Returns a \code{\link[base]{list}} of \code{\link{Dataset}} objects.
#'
#' @seealso
#' \itemize{
#'   \item \code{\link[=readDataset]{readDataset()}} for reading a single dataset,
#'   \item \code{\link[=writeDatasets]{writeDatasets()}} for writing multiple datasets,
#'   \item \code{\link[=writeDataset]{writeDataset()}} for writing a single dataset.
#' }
#'
#' @examples
#' dataFile <- system.file("extdata", "datasets_rates.csv", package = "rpact")
#' if (dataFile != "") {
#'     datasets <- readDatasets(dataFile)
#'     datasets
#' }
#' @export
#'
readDatasets <- function(file, ..., header = TRUE, sep = ",", quote = "\"",
        dec = ".", fill = TRUE, comment.char = "", fileEncoding = "UTF-8") {
    if (!file.exists(file)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "the file '", file, "' does not exist")
    }

    data <- utils::read.table(
        file = file, header = header, sep = sep,
        quote = quote, dec = dec, fill = fill, fileEncoding = fileEncoding, ...
    )

    if (is.null(data[["datasetId"]])) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "data file must contain the column 'datasetId'")
    }

    datasets <- list()
    for (datasetId in unique(data$datasetId)) {
        subData <- data[data$datasetId == datasetId, ]
        dataFrame <- subset(subData, select = -datasetId)
        description <- NA_character_
        if (!is.null(dataFrame[["description"]])) {
            description <- as.character(dataFrame$description[1])
            dataFrame <- subset(dataFrame, select = -description)
        }
        if (length(unique(subData$groups)) == 2) {
            dataWide <- stats::reshape(dataFrame, direction = "wide", idvar = "stages", timevar = "groups")
            colnames(dataWide) <- gsub("\\.", "", colnames(dataWide))
            dataset <- getDataset(dataWide)
        } else {
            dataset <- getDataset(dataFrame)
        }
        dataset$setDescription(description)
        datasets <- c(datasets, dataset)
    }
    return(datasets)
}

#' @title
#' Write Multiple Datasets
#'
#' @description
#' Writes a list of datasets to a CSV file.
#'
#' @param datasets A list of datasets.
#' @param file The target CSV file.
#' @param append Logical. Only relevant if file is a character string.
#'        If \code{TRUE}, the output is appended to the file. If FALSE, any existing file of the name is destroyed.
#' @param sep The field separator character. Values on each line of the file are separated
#'        by this character. If sep = "," (the default for \code{writeDatasets}) the separator is a comma.
#' @param quote The set of quoting characters. To disable quoting altogether, use
#'        quote = "". See scan for the behavior on quotes embedded in quotes. Quoting is only
#'        considered for columns read as character, which is all of them unless \code{colClasses} is specified.
#' @param dec The character used in the file for decimal points.
#' @param eol The character(s) to print at the end of each line (row).
#' @param na The string to use for missing values in the data.
#' @param row.names Either a logical value indicating whether the row names of \code{dataset} are
#'        to be written along with  \code{dataset}, or a character vector of row names to be written.
#' @param col.names Either a logical value indicating whether the column names of  \code{dataset} are
#'        to be written along with  \code{dataset}, or a character vector of column names to be written.
#'        See the section on 'CSV files' for the meaning of \code{col.names = NA}.
#' @param qmethod A character string specifying how to deal with embedded double quote characters
#'        when quoting strings. Must be one of "double" (default in \code{writeDatasets}) or "escape".
#' @param fileEncoding Character string: if non-empty declares the encoding used on a file
#'        (not a connection) so the character data can be re-encoded.
#'        See the 'Encoding' section of the help for file, the 'R Data Import/Export Manual' and 'Note'.
#' @param ... Further arguments to be passed to \code{\link[utils]{write.table}}.
#'
#' @details
#' The format of the CSV file is optimized for usage of \code{\link[=readDatasets]{readDatasets()}}.
#'
#' @seealso
#' \itemize{
#'   \item \code{\link[=writeDataset]{writeDataset()}} for writing a single dataset,
#'   \item \code{\link[=readDatasets]{readDatasets()}} for reading multiple datasets,
#'   \item \code{\link[=readDataset]{readDataset()}} for reading a single dataset.
#' }
#'
#' @examples
#' \dontrun{
#' d1 <- getDataset(
#'     n1 = c(11, 13, 12, 13),
#'     n2 = c(8, 10, 9, 11),
#'     events1 = c(10, 10, 12, 12),
#'     events2 = c(3, 5, 5, 6)
#' )
#' d2 <- getDataset(
#'     n1 = c(9, 13, 12, 13),
#'     n2 = c(6, 10, 9, 11),
#'     events1 = c(10, 10, 12, 12),
#'     events2 = c(4, 5, 5, 6)
#' )
#' datasets <- list(d1, d2)
#' writeDatasets(datasets, "datasets_rates.csv")
#' }
#'
#' @export
#'
writeDatasets <- function(datasets, file, ..., append = FALSE, quote = TRUE, sep = ",",
        eol = "\n", na = "NA", dec = ".", row.names = TRUE,
        col.names = NA, qmethod = "double",
        fileEncoding = "UTF-8") {
    if (!is.list(datasets)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'datasets' must be a list of datasets")
    }

    if (length(datasets) == 0) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'datasets' is empty")
    }

    datasetType <- NA_character_
    dataFrames <- NULL
    for (i in 1:length(datasets)) {
        dataset <- datasets[[i]]
        .assertIsDataset(dataset)
        if (is.na(datasetType)) {
            datasetType <- .getClassName(dataset)
        } else if (.getClassName(dataset) != datasetType) {
            stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all datasets must have the same type")
        }

        data <- as.data.frame(dataset, niceColumnNamesEnabled = FALSE)
        datasetId <- ifelse(!is.null(dataset$getId()) && !is.na(dataset$getId()), dataset$getId(), i)
        data <- cbind(rep(datasetId, nrow(data)), data)
        colnames(data)[1] <- "datasetId"

        if (!is.null(dataset$getDescription()) && !is.na(dataset$getDescription())) {
            data <- cbind(data, rep(dataset$getDescription(), nrow(data)))
            colnames(data)[ncol(data)] <- "description"
        }

        if (is.null(dataFrames)) {
            dataFrames <- data
        } else {
            dataFrames <- rbind(dataFrames, data)
        }
    }

    if (is.null(dataFrames)) {
        stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "failed to bind datasets")
    }

    utils::write.table(
        x = dataFrames, file = file, append = append, quote = quote, sep = sep,
        eol = eol, na = na, dec = dec, row.names = FALSE,
        col.names = TRUE, qmethod = qmethod,
        fileEncoding = fileEncoding
    )
}

.getDataset <- function(..., floatingPointNumbersEnabled = FALSE) {
    args <- list(...)
    if (length(args) == 0) {
        stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "data.frame, data vectors, or datasets expected")
    }

    if (.optionalArgsContainsDatasets(...)) {
        if (length(args) == 1) {
            return(args[[1]])
        }

        design <- .getDesignFromArgs(...)
        if (length(args) == 2 && !is.null(design)) {
            dataset <- .getDatasetFromArgs(...)
            if (!is.null(dataset)) {
                dataset <- dataset$copy(shallow = FALSE)
                dataset$.design <- design
                return(dataset)
            }
        }

        return(.getEnrichmentDatasetFromArgs(...))
    }

    exampleType <- args[["example"]]
    if (!is.null(exampleType) && exampleType %in% c("means", "rates", "survival")) {
        return(.getDatasetExample(exampleType = exampleType))
    }

    if (length(args) == 1 && !is.null(args[[1]]) && is.list(args[[1]]) && !is.data.frame(args[[1]])) {
        return(.getDatasetMeansFromModelsByStage(emmeansResults = args[[1]]))
    }

    emmeansResults <- .getDatasetMeansModelObjectsList(args)
    if (!is.null(emmeansResults) && length(emmeansResults) > 0) {
        return(.getDatasetMeansFromModelsByStage(emmeansResults = emmeansResults))
    }

    dataFrame <- .getDataFrameFromArgs(...)

    design <- .getDesignFromArgs(...)

    if (is.null(dataFrame)) {
        args <- .removeDesignFromArgs(args)

        paramNames <- names(args)
        paramNames <- paramNames[paramNames != ""]

        numberOfParameters <- length(args)
        if (numberOfParameters > 0 && names(args)[1] == "" && .isTrialDesign(args[[1]])) {
            numberOfParameters <- numberOfParameters - 1
        }

        if (length(paramNames) != numberOfParameters) {
            stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all parameters must be named")
        }

        if (length(paramNames) != length(unique(paramNames))) {
            stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "the parameter names must be unique")
        }

        dataFrame <- .createDataFrame(...)
    }

    enrichmentEnabled <- .isDataObjectEnrichment(...)

    if (.isDataObjectMeans(...)) {
        return(DatasetMeans(
            dataFrame = dataFrame,
            floatingPointNumbersEnabled = floatingPointNumbersEnabled,
            enrichmentEnabled = enrichmentEnabled,
            .design = design
        ))
    }

    if (.isDataObjectRates(...)) {
        return(DatasetRates(
            dataFrame = dataFrame,
            floatingPointNumbersEnabled = floatingPointNumbersEnabled,
            enrichmentEnabled = enrichmentEnabled,
            .design = design
        ))
    }

    if (.isDataObjectNonStratifiedEnrichmentSurvival(...)) {
        return(DatasetEnrichmentSurvival(
            dataFrame = dataFrame,
            floatingPointNumbersEnabled = floatingPointNumbersEnabled,
            enrichmentEnabled = enrichmentEnabled,
            .design = design
        ))
    }

    if (.isDataObjectSurvival(...)) {
        return(DatasetSurvival(
            dataFrame = dataFrame,
            floatingPointNumbersEnabled = floatingPointNumbersEnabled,
            enrichmentEnabled = enrichmentEnabled,
            .design = design
        ))
    }

    stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "failed to identify dataset type")
}

#' @title
#' Get Dataset
#'
#' @description
#' Creates a dataset object and returns it.
#'
#' @param ... A \code{data.frame} or some data vectors defining the dataset.
#' @param floatingPointNumbersEnabled If \code{TRUE},
#'        sample sizes and event numbers can be specified as floating-point numbers
#'        (this make sense, e.g., for theoretical comparisons); \cr
#'        by default \code{floatingPointNumbersEnabled = FALSE}, i.e.,
#'        samples sizes and event numbers defined as floating-point numbers will be truncated.
#'
#' @details
#' The different dataset types \code{DatasetMeans}, of \code{DatasetRates}, or
#' \code{DatasetSurvival} can be created as follows:
#' \itemize{
#'   \item An element of \code{\link{DatasetMeans}} for one sample is created by \cr
#'     \code{getDataset(sampleSizes =, means =, stDevs =)} where \cr
#'     \code{sampleSizes}, \code{means}, \code{stDevs} are vectors with stage-wise sample sizes,
#'     means and standard deviations of length given by the number of available stages.
#'   \item An element of \code{\link{DatasetMeans}} for two samples is created by \cr
#'     \code{getDataset(sampleSizes1 =, sampleSizes2 =, means1 =, means2 =, } \cr
#'     \code{stDevs1 =, stDevs2 =)} where
#'     \code{sampleSizes1}, \code{sampleSizes2}, \code{means1}, \code{means2},
#'     \code{stDevs1}, \code{stDevs2} are vectors with
#'     stage-wise sample sizes, means and standard deviations for the two treatment groups
#'     of length given by the number of available stages.
#'   \item An element of \code{\link{DatasetRates}} for one sample is created by \cr
#'     \code{getDataset(sampleSizes =, events =)} where \code{sampleSizes}, \code{events} are vectors
#'     with stage-wise sample sizes and events of length given by the number of available stages.
#'   \item An element of \code{\link{DatasetRates}} for two samples is created by \cr
#'     \code{getDataset(sampleSizes1 =, sampleSizes2 =, events1 =, events2 =)} where
#'     \code{sampleSizes1}, \code{sampleSizes2}, \code{events1}, \code{events2}
#'     are vectors with stage-wise sample sizes
#'     and events  for the two treatment groups of length given by the number of available stages.
#'   \item An element of \code{\link{DatasetSurvival}} is created by \cr
#'     \code{getDataset(events =, logRanks =, allocationRatios =)} where
#'     \code{events}, \code{logRanks}, and \code{allocation ratios} are the stage-wise events,
#'     (one-sided) logrank statistics, and allocation ratios.
#'   \item An element of \code{\link{DatasetMeans}}, \code{\link{DatasetRates}}, and \code{\link{DatasetSurvival}}
#'     for more than one comparison is created by adding subsequent digits to the variable names.
#'     The system can analyze these data in a multi-arm many-to-one comparison setting where the
#'     group with the highest index represents the control group.
#' }
#' Prefix \code{overall[Capital case of first letter of variable name]...} for the variable
#' names enables entering the overall (cumulative) results and calculates stage-wise statistics.
#' Since rpact version 3.2, the prefix \code{cumulative[Capital case of first letter of variable name]...} or
#' \code{cum[Capital case of first letter of variable name]...} can alternatively be used for this.
#'
#' \code{n} can be used in place of \code{samplesizes}.
#'
#' Note that in survival design usually the overall (cumulative) events and logrank test statistics are provided
#' in the output, so \cr
#' \code{getDataset(cumulativeEvents=, cumulativeLogRanks =, cumulativeAllocationRatios =)} \cr
#' is the usual command for entering survival data. Note also that for \code{cumulativeLogranks} also the
#' z scores from a Cox regression can be used.
#'
#' For multi-arm designs, the index refers to the considered comparison. For example,\cr
#' \code{
#'   getDataset(events1=c(13, 33), logRanks1 = c(1.23, 1.55), events2 = c(16, NA), logRanks2 = c(1.55, NA))
#' } \cr
#' refers to the case where one active arm (1) is considered at both stages whereas active arm 2
#' was dropped at interim. Number of events and logrank statistics are entered for the corresponding
#' comparison to control (see Examples).
#'
#' For enrichment designs, the comparison of two samples is provided for an unstratified
#' (sub-population wise) or stratified data input.\cr
#' For unstratified (sub-population wise) data input the data sets are defined for the sub-populations
#' S1, S2, ..., F, where F refers to the full populations. Use of \code{getDataset(S1 = , S2, ..., F = )}
#' defines the data set to be used in \code{\link[=getAnalysisResults]{getAnalysisResults()}} (see examples)\cr
#' For stratified data input the data sets are defined for the strata S1, S12, S2, ..., R, where R
#' refers to the remainder of the strata such that the union of all sets is the full population.
#' Use of \code{getDataset(S1 = , S12 = , S2, ..., R = )} defines the data set to be used in
#' \code{\link[=getAnalysisResults]{getAnalysisResults()}} (see examples)\cr
#' For survival data, for enrichment designs the log-rank statistics should be entered as stratified
#' log-rank statistics in order to provide strong control of Type I error rate. For stratified data input,
#' the variables to be specified in \code{getDataset()} are \code{events}, \code{expectedEvents},
#' \code{varianceEvents}, and \code{allocationRatios} or \code{overallEvents}, \code{overallExpectedEvents},
#' \code{overallVarianceEvents}, and \code{overallAllocationRatios}. From this, (stratified) log-rank tests are
#' calculated.
#'
#' @template return_object_dataset
#'
#' @template examples_get_dataset
#'
#' @include f_analysis_base.R
#' @include f_analysis_utilities.R
#'
#' @export
#'
getDataset <- function(..., floatingPointNumbersEnabled = FALSE) {
    dataset <- .getDataset(floatingPointNumbersEnabled = floatingPointNumbersEnabled, ...)
    if (dataset$.enrichmentEnabled && dataset$getNumberOfGroups() != 2) {
        warning("Only population enrichment data with 2 groups can be analyzed but ",
            dataset$getNumberOfGroups(), " group",
            ifelse(dataset$getNumberOfGroups() == 1, " is", "s are"), " defined",
            call. = FALSE
        )
    }
    return(dataset)
}

#' @rdname getDataset
#' @export
getDataSet <- function(..., floatingPointNumbersEnabled = FALSE) {
    return(getDataset(floatingPointNumbersEnabled = floatingPointNumbersEnabled, ...))
}

.getDatasetMeansModelObjectsList <- function(args) {
    if (is.null(args) || length(args) == 0 || !is.list(args)) {
        return(NULL)
    }

    emmeansResults <- list()
    for (arg in args) {
        if (inherits(arg, "emmGrid")) {
            emmeansResults[[length(emmeansResults) + 1]] <- arg
        }
    }
    if (length(emmeansResults) == 0) {
        return(NULL)
    }

    argNames <- names(args)
    for (i in 1:length(args)) {
        arg <- args[[i]]
        if (!inherits(arg, "emmGrid")) {
            argName <- argNames[i]
            argInfo <- ""
            if (length(argName) == 1 && argName != "") {
                argInfo <- paste0(sQuote(argName), " ")
            }
            argInfo <- paste0(argInfo, "(", .arrayToString(arg), ")")
            warning("Argument ", argInfo, " will be ignored because only 'emmGrid' objects will be respected")
        }
    }

    return(emmeansResults)
}

.getStandardDeviationFromStandardError <- function(sampleSize, standardError, ...,
        dfValue = NA_real_, alpha = 0.05, lmEnabled = TRUE, stDevCalcMode = "auto") {
    qtCalcEnablbled <- length(stDevCalcMode) == 1 && !is.na(stDevCalcMode) && stDevCalcMode == "t"
    if ((qtCalcEnablbled || !lmEnabled) && !is.na(dfValue) && !is.infinite(dfValue) && dfValue > 0) {
        qValue <- stats::qt(1 - alpha / 2, df = dfValue)
        stDev <- standardError * 2 / qValue * sqrt(sampleSize)
    } else {
        stDev <- standardError * sqrt(sampleSize)
    }

    return(stDev)
}

.getDatasetMeansFromModelsByStage <- function(emmeansResults, correctGroupOrder = TRUE) {
    if (is.null(emmeansResults)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(emmeansResults), " must be a non-empty list")
    }
    if (!is.list(emmeansResults)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(emmeansResults), " must be a list")
    }
    if (length(emmeansResults) == 0) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(emmeansResults), " must be not empty")
    }

    for (stage in 1:length(emmeansResults)) {
        if (!inherits(emmeansResults[[stage]], "emmGrid")) {
            stop(sprintf(
                paste0(
                    "%s%s must contain %s objects created by emmeans(x), ",
                    "where x is a linear model result (one object per stage; class is %s at stage %s)"
                ),
                C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote("emmeansResults"), sQuote("emmGrid"),
                .getClassName(emmeansResults[[stage]]), stage
            ))
        }
    }

    stages <- integer(0)
    groups <- integer(0)
    means <- numeric(0)
    stDevs <- numeric(0)
    sampleSizes <- numeric(0)

    lmEnabled <- TRUE
    tryCatch(
        {
            modelCall <- emmeansResults[[1]]@model.info$call
            modelFunction <- as.character(modelCall)[1]
            lmEnabled <- modelFunction == "lm"
            if (!grepl(paste0("::", modelFunction), modelFunction)) {
                packageName <- .getPackageName(modelFunction)
                if (!is.na(packageName)) {
                    modelFunction <- paste0(packageName, "::", modelFunction)
                }
            }

            if (lmEnabled) {
                warning("When using ", modelFunction, "() ",
                    "the estimated marginal means and standard deviations can be inaccurate ",
                    "and analysis results based on this values may be imprecise",
                    call. = FALSE
                )
            } else {
                warning("Using ", modelFunction, " emmeans result objects as ",
                    "arguments of getDataset() is experminental in this rpact version and not fully validated",
                    call. = FALSE
                )
            }
        },
        error = function(e) {
            warning("Using emmeans result objects as ",
                "arguments of getDataset() is experminental in this rpact version and not fully validated",
                call. = FALSE
            )
        }
    )

    stDevCalcMode <- getOption("rpact.dataset.stdev.calc.mode", "auto") # auto, sigma, norm, t
    for (stage in 1:length(emmeansResults)) {
        emmeansResult <- emmeansResults[[stage]]
        emmeansResultsSummary <- summary(emmeansResult)
        emmeansResultsList <- as.list(emmeansResult)

        if (is.null(emmeansResultsSummary[["emmean"]])) {
            stop(
                C_EXCEPTION_TYPE_RUNTIME_ISSUE,
                "the objects in summary(emmeansResults) must contain the field 'emmean'"
            )
        }
        for (expectedField in c("sigma", "extras")) {
            if (is.null(emmeansResultsList[[expectedField]])) {
                stop(
                    C_EXCEPTION_TYPE_RUNTIME_ISSUE,
                    "the objects in as.list(emmeansResults) must contain the field ", sQuote(expectedField)
                )
            }
        }

        numberOfGroups <- length(emmeansResultsSummary$emmean)
        rpactGroupNumbers <- 1:numberOfGroups
        if (correctGroupOrder) {
            rpactGroupNumbers <- 1
            if (numberOfGroups > 1) {
                rpactGroupNumbers <- c(2:numberOfGroups, rpactGroupNumbers)
            }
        }
        for (group in 1:length(emmeansResultsSummary$emmean)) {
            stages <- c(stages, stage)
            groups <- c(groups, group)
            rpactGroupNumber <- rpactGroupNumbers[group]

            standardError <- emmeansResultsSummary$SE[rpactGroupNumber]

            sampleSize <- emmeansResultsList$extras[rpactGroupNumber, ]
            meanValue <- emmeansResultsSummary$emmean[rpactGroupNumber]
            dfValue <- emmeansResultsSummary$df[rpactGroupNumber]
            if (length(stDevCalcMode) == 1 && !is.na(stDevCalcMode) && stDevCalcMode == "sigma") {
                # pooled standard deviation from emmeans
                stDev <- emmeansResultsList$sigma
            } else {
                stDev <- .getStandardDeviationFromStandardError(sampleSize, standardError,
                    dfValue = dfValue, lmEnabled = lmEnabled, stDevCalcMode = stDevCalcMode
                )
            }

            means <- c(means, meanValue)
            stDevs <- c(stDevs, stDev)
            sampleSizes <- c(sampleSizes, sampleSize)
        }
    }

    data <- data.frame(
        stages = stages,
        groups = groups,
        means = means,
        stDevs = stDevs,
        sampleSizes = sampleSizes
    )
    data <- data[order(data$stages, data$groups), ]
    dataWide <- stats::reshape(data = data, direction = "wide", idvar = "stages", timevar = "groups")
    colnames(dataWide) <- gsub("\\.", "", colnames(dataWide))
    return(getDataset(dataWide))
}

.optionalArgsContainsDatasets <- function(...) {
    args <- list(...)
    if (length(args) == 0) {
        return(FALSE)
    }

    for (arg in args) {
        if (inherits(arg, "Dataset")) {
            return(TRUE)
        }
    }
    return(FALSE)
}

.getSubsetsFromArgs <- function(...) {
    args <- list(...)
    if (length(args) == 0) {
        stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "one or more subset datasets expected")
    }

    subsetNames <- names(args)
    if (is.null(subsetNames)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all subsets must be named")
    }

    if (!("R" %in% subsetNames) && !("F" %in% subsetNames)) {
        stop(
            C_EXCEPTION_TYPE_MISSING_ARGUMENT,
            '"R" (stratified analysis)" or "F" (non-stratified analysis) must be defined as subset'
        )
    }

    subsetNumbers <- gsub("\\D", "", subsetNames)
    subsetNumbers <- subsetNumbers[subsetNumbers != ""] #  & nchar(subsetNumbers) == 1
    if (length(subsetNumbers) == 0) {
        stop(
            C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all subset names (",
            .arrayToString(subsetNames), ") must be \"S[n]\", \"R\", or \"F\", ",
            "where [n] is a number with increasing digits (starting with 1)"
        )
    }

    stratifiedInput <- "R" %in% subsetNames

    subsetNumbers <- paste0(subsetNumbers, collapse = "")
    subsetNumbers <- strsplit(subsetNumbers, "")[[1]]
    subsetNumbers <- as.integer(subsetNumbers)
    gMax <- max(subsetNumbers) + 1
    validSubsetNames <- .createSubsetsByGMax(gMax, stratifiedInput = stratifiedInput, all = FALSE)
    for (subsetName in subsetNames) {
        if (subsetName == "") {
            stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all subsets must be named")
        }

        if (!(subsetName %in% validSubsetNames)) {
            suffix <- ifelse(stratifiedInput, " (stratified analysis)", " (non-stratified analysis)")
            if (length(validSubsetNames) < 10) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "invalid subset name (", subsetName, "); ",
                    "valid names are ", .arrayToString(validSubsetNames), suffix
                )
            } else {
                restFull <- ifelse(stratifiedInput, '"R"', '"F"')
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "invalid subset name (", subsetName, "): ",
                    "all subset names must be \"S[n]\" or ", restFull, ", ",
                    "where [n] is a number with increasing digits", suffix
                )
            }
        }
    }

    subsets <- NULL
    subsetType <- NA_character_
    emptySubsetNames <- validSubsetNames[!(validSubsetNames %in% subsetNames)]
    for (subsetName in subsetNames) {
        subset <- args[[subsetName]]
        if (is.null(subset) || (!isS4(subset) && is.na(subset))) {
            emptySubsetNames <- c(emptySubsetNames, subsetName)
        } else {
            if (!.isDataset(subset)) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                    "subset ", subsetName, " is not a dataset (is ", .getClassName(subset), ")"
                )
            }
            if (!is.na(subsetType) && subsetType != .getClassName(subset)) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                    "all subsets must have the same type (found ", subsetType, " and ", .getClassName(subset), ")"
                )
            }
            subsetType <- .getClassName(subset)
            if (is.null(subset[[".data"]])) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                    "subset ", subsetName, " does not contain field '.data'"
                )
            }
            subset <- subset$.data
            subset$subset <- rep(subsetName, nrow(subset))
            if (is.null(subsets)) {
                subsets <- subset
            } else {
                subsets <- rbind(subsets, subset)
            }
        }
    }

    if (length(emptySubsetNames) > 0) {
        emptySubsetNames <- unique(emptySubsetNames)
        template <- subsets[subsets$subset == ifelse(stratifiedInput, "R", "F"), ]
        colNames <- colnames(template)
        colNames <- colNames[!(colNames %in% c("stage", "group", "subset"))]
        for (colName in colNames) {
            template[[colName]] <- rep(NA_real_, nrow(template))
        }

        for (subsetName in emptySubsetNames) {
            template$subset <- rep(subsetName, nrow(template))
            subsets <- rbind(subsets, template)
        }

        if (length(emptySubsetNames) == 1) {
            warning("The undefined subset ", emptySubsetNames,
                " was defined as empty subset",
                call. = FALSE
            )
        } else {
            warning(gettextf(
                "The %s undefined subsets %s were defined as empty subsets",
                length(emptySubsetNames), .arrayToString(emptySubsetNames)
            ), call. = FALSE)
        }
    }

    return(subsets)
}

.validateEnrichmentDataFrameAtFirstStage <- function(dataFrame, params) {
    dataFrameStage1 <- dataFrame[dataFrame$stage == 1, ]
    for (param in params) {
        paramValue <- dataFrameStage1[[param]]
        if (any(is.null(paramValue) || any(is.infinite(paramValue)))) {
            stop(
                C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                gettextf(
                    "all %s values (%s) at first stage must be valid",
                    sQuote(param), .arrayToString(paramValue, maxLength = 10)
                )
            )
        }
        if (any(is.na(paramValue))) {
            subsets <- unique(dataFrame$subset)
            for (s in subsets) {
                subData <- dataFrame[dataFrame$subset == s, ]
                subsetParamValues <- subData[[param]]
                if (!all(is.na(subsetParamValues)) && any(is.na(subsetParamValues[subData$stage == 1]))) {
                    stop(
                        C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                        gettextf(
                            "all %s values (%s) at first stage must be valid (NA is not allowed)",
                            sQuote(param), .arrayToString(paramValue, maxLength = 10)
                        )
                    )
                }
            }
        }
    }
}

.getEndpointSpecificDataFrameParameterNames <- function(dataFrame) {
    paramNames <- colnames(dataFrame)
    paramNames <- paramNames[!(paramNames %in% c("stage", "group", "subset"))]
    return(paramNames)
}

.validateEnrichmentDataFrameDeselection <- function(dataFrame) {
    paramNames <- .getEndpointSpecificDataFrameParameterNames(dataFrame)
    for (i in 1:nrow(dataFrame)) {
        row <- dataFrame[i, paramNames]
        if (any(is.na(row)) && !all(is.na(row))) {
            stop(
                C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
                gettextf(
                    paste0(
                        "inconsistent deselection in group %s at stage %s (",
                        "%s: all or none must be NA)"
                    ),
                    dataFrame$group[i], dataFrame$stage[i], .arrayToString(paramNames, maxCharacters = 40)
                )
            )
        }
    }

    subsets <- unique(dataFrame$subset)
    for (s in subsets) {
        deselectedStage <- 0
        for (stage in unique(dataFrame$stage)) {
            subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage, paramNames]

            if (deselectedStage > 0 && !all(is.na(subData))) {
                stop(
                    C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
                    gettextf(paste0(
                        "%s was deselected at stage %s ",
                        "and therefore must be also deselected in the following stages, ",
                        "but is no longer deselected in stage %s"
                    ), s, deselectedStage, stage)
                )
            }

            if (any(is.na(subData))) {
                deselectedStage <- stage
            }
        }
    }
}

.validateEnrichmentDataFrameMeans <- function(dataFrame) {
    if (any(na.omit(dataFrame$stDev) <= 0) || any(na.omit(dataFrame$overallStDev) <= 0)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all standard deviations must be > 0")
    }
    if (any(na.omit(dataFrame$sampleSize) <= 0) || any(na.omit(dataFrame$overallSampleSize) <= 0)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be > 0")
    }

    .validateEnrichmentDataFrameAtFirstStage(dataFrame,
        params = c("sampleSize", "overallSampleSize", "mean", "overallMean", "stDev", "overallStDev")
    )

    .validateEnrichmentDataFrameDeselection(dataFrame)

    subsets <- unique(dataFrame$subset)
    if ("F" %in% subsets) {
        subsets <- subsets[subsets != "F"]
        fullData <- dataFrame[dataFrame$subset == "F", ]
        for (s in subsets) {
            for (stage in unique(dataFrame$stage)) {
                for (group in unique(dataFrame$group)) {
                    subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage & dataFrame$group == group, ]

                    stDevFull <- na.omit(fullData$stDev[fullData$stage == stage & fullData$group == group])
                    stDevSubset <- na.omit(subData$stDev)
                    if (length(stDevFull) > 0 && length(stDevSubset) > 0 && any(stDevFull <= stDevSubset)) {
                        stop(
                            C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
                            gettextf(
                                "'stDev' F (%s) must be > 'stDev' %s (%s) in group %s at stage %s",
                                .arrayToString(stDevFull), s,
                                .arrayToString(stDevSubset), group, stage
                            )
                        )
                    }

                    sampleSizeFull <- na.omit(fullData$sampleSize[fullData$stage == stage & fullData$group == group])
                    sampleSizeSubset <- na.omit(subData$sampleSize)
                    if (length(sampleSizeFull) > 0 && length(sampleSizeSubset) > 0 && any(sampleSizeFull < sampleSizeSubset)) {
                        stop(
                            C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
                            gettextf(
                                "'sampleSize' F (%s) must be >= 'sampleSize' %s (%s) in group %s at stage %s",
                                .arrayToString(sampleSizeFull), s,
                                .arrayToString(sampleSizeSubset), group, stage
                            )
                        )
                    }
                }
            }
        }
    }
}

.validateEnrichmentDataFrameSurvival <- function(dataFrame) {
    if (any(na.omit(dataFrame$event) < 0) || any(na.omit(dataFrame$overallEvent) < 0)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0")
    }

    .validateEnrichmentDataFrameAtFirstStage(dataFrame,
        params = c("event", "overallEvent")
    )

    .validateEnrichmentDataFrameDeselection(dataFrame)

    subsets <- unique(dataFrame$subset)
    if ("F" %in% subsets) {
        subsets <- subsets[subsets != "F"]
        fullData <- dataFrame[dataFrame$subset == "F", ]
        for (s in subsets) {
            for (stage in unique(dataFrame$stage)) {
                for (group in unique(dataFrame$group)) {
                    subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage & dataFrame$group == group, ]

                    eventFull <- na.omit(fullData$event[fullData$stage == stage & fullData$group == group])
                    eventSubset <- na.omit(subData$event)
                    if (length(eventFull) > 0 && length(eventSubset) > 0 && any(eventFull < eventSubset)) {
                        stop(
                            C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
                            gettextf(
                                "'event' F (%s) must be >= 'event' %s (%s) in group %s at stage %s",
                                .arrayToString(eventFull), s,
                                .arrayToString(eventSubset), group, stage
                            )
                        )
                    }
                }
            }
        }
    }
}

.validateEnrichmentDataFrameRates <- function(dataFrame) {
    if (any(na.omit(dataFrame$sampleSize) <= 0) || any(na.omit(dataFrame$overallSampleSize) <= 0)) {
        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be > 0")
    }

    .validateEnrichmentDataFrameAtFirstStage(dataFrame,
        params = c("sampleSize", "overallSampleSize")
    )

    .validateEnrichmentDataFrameDeselection(dataFrame)

    subsets <- unique(dataFrame$subset)
    if ("F" %in% subsets) {
        subsets <- subsets[subsets != "F"]
        fullData <- dataFrame[dataFrame$subset == "F", ]
        for (s in subsets) {
            for (stage in unique(dataFrame$stage)) {
                for (group in unique(dataFrame$group)) {
                    subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage & dataFrame$group == group, ]

                    sampleSizeFull <- na.omit(fullData$sampleSize[fullData$stage == stage & fullData$group == group])
                    sampleSizeSubset <- na.omit(subData$sampleSize)
                    if (length(sampleSizeFull) > 0 && length(sampleSizeSubset) > 0 && any(sampleSizeFull < sampleSizeSubset)) {
                        stop(
                            C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
                            gettextf(
                                "'sampleSize' F (%s) must be >= 'sampleSize' %s (%s) in group %s at stage %s",
                                .arrayToString(sampleSizeFull), s,
                                .arrayToString(sampleSizeSubset), group, stage
                            )
                        )
                    }
                }
            }
        }
    }

    .validateEnrichmentDataFrameSurvival(dataFrame)
}

.validateEnrichmentDataFrameHasConsistentNumberOfStages <- function(dataFrame) {
    subsets <- unique(dataFrame$subset)
    kMaxList <- list()
    for (s in subsets) {
        subsetStages <- as.integer(sort(unique(na.omit(as.character(dataFrame$stage[dataFrame$subset == s])))))
        kMax <- max(subsetStages)
        if (!identical(1:kMax, subsetStages)) {
            stop(
                C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                gettextf("subset %s has incomplete stages (%s)", s, .arrayToString(subsetStages))
            )
        }

        kMaxList[[s]] <- kMax
    }

    kMax <- unique(unlist(kMaxList))
    if (length(kMax) > 1) {
        stop(
            C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
            "all subsets must have the identical number of stages defined (kMax: ", .listToString(kMaxList), ")"
        )
    }
}

.validateEnrichmentDataFrame <- function(dataFrame) {
    paramNames <- colnames(dataFrame)
    if (any(grepl("(S|s)tDev", paramNames))) {
        .validateEnrichmentDataFrameMeans(dataFrame)
    } else if (any(grepl("(S|s)ampleSize", paramNames)) && any(grepl("(E|e)vent", paramNames))) {
        .validateEnrichmentDataFrameRates(dataFrame)
    } else if (any(grepl("(L|l)ogRank", paramNames)) || any(grepl("(E|e)xpectedEvent", paramNames))) {
        .validateEnrichmentDataFrameSurvival(dataFrame)
    } else {
        print(paramNames)
        stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "could not identify the endpoint of the specified dataset")
    }

    subsets <- unique(dataFrame$subset)
    if ("R" %in% subsets) {
        paramNames <- .getEndpointSpecificDataFrameParameterNames(dataFrame)
        paramName <- paramNames[1]
        subsets <- subsets[subsets != "R"]
        subsets <- subsets[grepl("^S\\d$", subsets)]
        if (length(subsets) > 0) {
            restData <- dataFrame[dataFrame$subset == "R", ]
            for (s in subsets) {
                stages <- unique(dataFrame$stage)
                stages <- stages[stages != 1]
                if (length(stages) > 0) {
                    for (stage in stages) {
                        for (group in unique(dataFrame$group)) {
                            subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage & dataFrame$group == group, ]

                            paramValueRest <- restData[[paramName]][restData$stage == stage & restData$group == group]
                            paramValueSubset <- subData[[paramName]]
                            if (length(paramValueRest) > 0 && length(paramValueSubset) > 0 &&
                                    any(is.na(paramValueSubset)) && !all(is.na(paramValueRest))) {
                                stop(
                                    C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
                                    gettextf(
                                        paste0(
                                            "if %s is deselected (NA) then R also must be deselected (NA) but, e.g., ",
                                            "%s R is %s in group %s at stage %s"
                                        ),
                                        s, sQuote(paramName), .arrayToString(paramValueRest, vectorLookAndFeelEnabled = TRUE),
                                        group, stage
                                    )
                                )
                            }
                        }
                    }
                }
            }
        }
    }

    .validateEnrichmentDataFrameHasConsistentNumberOfStages(dataFrame)
}

.getEnrichmentDataFrameFromArgs <- function(...) {
    dataFrame <- .getSubsetsFromArgs(...)

    validColumns <- c()
    for (j in 1:ncol(dataFrame)) {
        if (!all(is.na(dataFrame[, j]))) {
            validColumns <- c(validColumns, j)
        }
    }
    if (length(validColumns) > 0) {
        dataFrame <- dataFrame[, validColumns]
    }

    return(dataFrame)
}

.getEnrichmentDatasetFromArgs <- function(...) {
    dataFrame <- .getEnrichmentDataFrameFromArgs(...)
    .validateEnrichmentDataFrame(dataFrame)
    dataFrame <- .getWideFormat(dataFrame)
    return(.getDataset(dataFrame = dataFrame))
}

.getDatasetExample <- function(exampleType) {
    if (exampleType == "means") {
        return(getDataset(
            n1 = c(13, 25),
            n2 = c(15, NA),
            n3 = c(14, 27),
            n4 = c(12, 29),
            means1 = c(24.2, 22.2),
            means2 = c(18.8, NA),
            means3 = c(26.7, 27.7),
            means4 = c(9.2, 12.2),
            stDevs1 = c(24.4, 22.1),
            stDevs2 = c(21.2, NA),
            stDevs3 = c(25.6, 23.2),
            stDevs4 = c(21.5, 22.7)
        ))
    } else if (exampleType == "rates") {
        return(getDataset(
            n1 = c(23, 25),
            n2 = c(25, NA),
            n3 = c(24, 27),
            n4 = c(22, 29),
            events1 = c(15, 12),
            events2 = c(19, NA),
            events3 = c(18, 22),
            events4 = c(12, 13)
        ))
    } else if (exampleType == "survival") {
        return(getDataset(
            events1   = c(25, 32),
            events2   = c(18, NA),
            events3   = c(22, 36),
            logRanks1 = c(2.2, 1.8),
            logRanks2 = c(1.99, NA),
            logRanks3 = c(2.32, 2.11)
        ))
    }

    stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'exampleType' (", exampleType, ") is not allowed")
}

#'
#' @name Dataset
#'
#' @title
#' Dataset
#'
#' @description
#' Basic class for datasets.
#'
#' @template field_stages
#' @template field_groups
#'
#' @details
#' \code{Dataset} is the basic class for
#' \itemize{
#'   \item \code{\link{DatasetMeans}},
#'   \item \code{\link{DatasetRates}},
#'   \item \code{\link{DatasetSurvival}}, and
#'   \item \code{\link{DatasetEnrichmentSurvival}}.
#' }
#' This basic class contains the fields \code{stages} and \code{groups} and several commonly used
#' functions.
#'
#' @include class_core_parameter_set.R
#' @include class_core_plot_settings.R
#' @include class_design.R
#' @include f_core_constants.R
#' @include f_core_assertions.R
#'
#' @keywords internal
#'
#' @importFrom methods new
#'
Dataset <- setRefClass("Dataset",
    contains = "ParameterSet",
    fields = list(
        .data = "data.frame",
        .plotSettings = "PlotSettings",
        .id = "integer",
        .description = "character",
        .floatingPointNumbersEnabled = "logical",
        .kMax = "integer",
        .enrichmentEnabled = "logical",
        .inputType = "character",
        .design = "ANY",
        stages = "integer",
        groups = "integer",
        subsets = "character"
    ),
    methods = list(
        initialize = function(dataFrame, ..., floatingPointNumbersEnabled = FALSE, enrichmentEnabled = FALSE) {
            callSuper(
                .floatingPointNumbersEnabled = floatingPointNumbersEnabled,
                .enrichmentEnabled = enrichmentEnabled, ...
            )
            .plotSettings <<- PlotSettings()
            .parameterNames <<- .getParameterNames(dataset = .self)
            .parameterFormatFunctions <<- C_PARAMETER_FORMAT_FUNCTIONS

            .id <<- NA_integer_
            .description <<- NA_character_
            .inputType <<- NA_character_

            if (!missing(dataFrame)) {
                .initByDataFrame(dataFrame)
                .kMax <<- getNumberOfStages()
                if (!.enrichmentEnabled) {
                    .validateDataset()
                }
            }
        },
        getPlotSettings = function() {
            return(.plotSettings)
        },
        show = function(showType = 1, digits = NA_integer_) {
            "Method for automatically printing dataset objects"
            .show(showType = showType, digits = digits, consoleOutputEnabled = TRUE)
        },
        .show = function(showType = 1, digits = NA_integer_, consoleOutputEnabled = TRUE) {
            .resetCat()

            if (!is.null(showType) && length(showType) == 1 && !is.na(showType) &&
                    is.character(showType) && showType == "rcmd") {
                s <- strsplit(getObjectRCode(.self, stringWrapParagraphWidth = NULL), "), *")[[1]]
                s[2:length(s)] <- paste0("\t", s[2:length(s)])
                s <- paste0(s, collapse = "),\n")
                cat(s, "\n")
            } else if (showType == 2) {
                callSuper(showType = showType, digits = digits, consoleOutputEnabled = consoleOutputEnabled)
            } else {
                .showParametersOfOneGroup(.getUserDefinedParameters(),
                    title = .toString(startWithUpperCase = TRUE), orderByParameterName = FALSE,
                    consoleOutputEnabled = consoleOutputEnabled
                )

                .showParametersOfOneGroup(.getGeneratedParameters(),
                    title = "Calculated data", orderByParameterName = FALSE,
                    consoleOutputEnabled = consoleOutputEnabled
                )

                .showUnknownParameters(consoleOutputEnabled = consoleOutputEnabled)

                if (!is.na(.description) && nchar(.description) > 0) {
                    .cat("Description: ", .description, "\n\n",
                        consoleOutputEnabled = consoleOutputEnabled
                    )
                }
            }
        },
        .initByDataFrame = function(dataFrame) {
            if (!is.data.frame(dataFrame)) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                    "'dataFrame' must be a data.frame (is an instance of class ", .getClassName(dataFrame), ")"
                )
            }

            if (!.paramExists(dataFrame, "stage") && !.paramExists(dataFrame, "stages")) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                    "'dataFrame' must contain parameter 'stages' or 'stage'"
                )
            }

            stages <<- as.integer(.getValuesByParameterName(dataFrame, c("stages", "stage")))
            if (!.enrichmentEnabled && length(unique(stages)) < length(stages)) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'stages' (", .arrayToString(stages),
                    ") must be a unique vector of stage numbers"
                )
            }
            groups <<- rep(1L, length(stages))

            .setParameterType("groups", C_PARAM_USER_DEFINED)
            .setParameterType("stages", C_PARAM_USER_DEFINED)

            if (any(grepl("^subsets?\\d*$", colnames(dataFrame)))) {
                numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, c(C_KEY_WORDS_SAMPLE_SIZES, C_KEY_WORDS_LOG_RANKS))
                subsets <<- character(0)
                for (group in 1:numberOfTreatmentGroups) {
                    suffix <- ifelse(any(grepl("^subsets?\\d+$", colnames(dataFrame))), group, "")
                    subsets <<- c(subsets, .getValuesByParameterName(dataFrame, C_KEY_WORDS_SUBSETS, suffix = suffix))
                }
                .setParameterType("subsets", C_PARAM_USER_DEFINED)
            } else {
                subsets <<- rep(NA_character_, length(stages))
            }
        },
        .validateDataset = function() {
            .assertIsValidKMax(kMax = getNumberOfStages())

            for (var in names(.self)) {
                values <- .self[[var]]
                if (any(is.nan(values)) || any(is.infinite(values))) {
                    stop(
                        C_EXCEPTION_TYPE_RUNTIME_ISSUE, "'", var, "' (", .arrayToString(values),
                        ") contains illegal values, i.e., something went wrong"
                    )
                }
            }
        },
        .validateValues = function(values, name) {
            if (.enrichmentEnabled) {
                return(invisible())
            }

            l1 <- length(unique(stages))
            l2 <- length(values)
            if (l1 != l2) {
                stop(
                    C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
                    "there ", ifelse(l1 == 1, paste("is", l1, "stage"),
                        paste("are", l1, "stages")
                    ), " defined",
                    " (", .arrayToString(unique(stages)), ") and '", name, "' has length ", l2
                )
            }
        },
        .recreateDataFrame = function() {
            .data <<- data.frame(
                stage = factor(stages),
                group = factor(groups),
                subset = factor(subsets)
            )
        },
        .setDataToVariables = function() {
            stages <<- as.integer(.data$stage)
            groups <<- as.integer(.data$group)
            subsets <<- as.character(.data$subset)
        },
        .fillWithNAs = function(kMax) {
            numberOfStages <- getNumberOfStages()
            .kMax <<- numberOfStages
            if (numberOfStages >= kMax) {
                return(invisible())
            }

            numberOfGroups <- getNumberOfGroups(survivalCorrectionEnabled = FALSE)
            if (.enrichmentEnabled) {
                for (stage in (numberOfStages + 1):kMax) {
                    for (group in 1:numberOfGroups) {
                        for (subset in levels(.data$subset)) {
                            stages <<- c(stages, stage)
                            groups <<- c(groups, group)
                            subsets <<- c(subsets, subset)
                        }
                    }
                }
            } else {
                for (stage in (numberOfStages + 1):kMax) {
                    for (group in 1:numberOfGroups) {
                        stages <<- c(stages, stage)
                        groups <<- c(groups, group)
                        subsets <<- c(subsets, NA_character_)
                    }
                }
            }
        },
        .trim = function(kMax) {
            if (is.na(kMax)) {
                kMax <- .kMax
            }
            numberOfStages <- getNumberOfStages(FALSE)
            if (numberOfStages <= kMax) {
                return(invisible(numeric(0)))
            }

            indices <- which(stages <= kMax)

            stages <<- stages[indices]
            groups <<- groups[indices]
            subsets <<- subsets[indices]

            return(indices)
        },
        .orderDataByStageAndGroup = function() {
            if (.enrichmentEnabled) {
                dat <- .data
                dat$char <- gsub("\\d", "", as.character(.data$subset))
                dat$char[dat$char == "R"] <- "Z"
                dat$char[dat$char == "F"] <- "Z"
                dat$num <- as.integer(gsub("\\D", "", as.character(.data$subset)))

                .data <<- .data[order(.data$stage, .data$group, dat$char, dat$num), ]
            } else {
                .data <<- .data[order(.data$stage, .data$group), ]
            }
        },
        .getNumberOfNAsToAdd = function(kMax) {
            n <- kMax - getNumberOfStages()
            if (n <= 0) {
                return(0)
            }

            n <- n * getNumberOfGroups(survivalCorrectionEnabled = FALSE)
            if (.enrichmentEnabled) {
                n <- n * getNumberOfSubsets()
            }
            return(n)
        },
        .paramExists = function(dataFrame, parameterName) {
            for (p in parameterName) {
                value <- dataFrame[[p]]
                if (!is.null(value)) {
                    return(TRUE)
                }
            }
            return(FALSE)
        },
        .getValuesByParameterName = function(dataFrame, parameterNameVariants, ...,
                defaultValues = NULL, suffix = "") {
            for (parameterName in parameterNameVariants) {
                key <- paste0(parameterName, suffix)
                if (.paramExists(dataFrame, key)) {
                    return(dataFrame[[key]])
                }
            }

            if (!is.null(defaultValues)) {
                return(defaultValues)
            }

            stop(
                C_EXCEPTION_TYPE_MISSING_ARGUMENT, "parameter '",
                paste0(parameterNameVariants[1], suffix), "' is missing or not correctly specified"
            )
        },
        .getValueLevels = function(values) {
            if (is.factor(values)) {
                return(levels(values))
            }

            return(sort(unique(na.omit(values))))
        },
        .getValues = function(paramName, paramValues) {
            values <- .data[[paramName]]
            valueLevels <- .getValueLevels(values)
            if (!all(paramValues %in% valueLevels)) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'", paramName, "' (", .arrayToString(paramValues),
                    ") out of range [", .arrayToString(valueLevels), "]"
                )
            }
            return(values)
        },
        .getIndexValues = function(paramName, paramValues, subset = NA_character_) {
            values <- .getValues(paramName, paramValues)
            if (all(is.na(subset))) {
                return(which(values %in% paramValues))
            }

            .assertIsValidSubset(subset)
            return(which(values %in% paramValues & .data$subset %in% subset))
        },
        .assertIsValidSubset = function(subset) {
            for (s in subset) {
                if (!(s %in% levels(.data$subset))) {
                    stop(
                        C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'subset' (", s,
                        ") is not a defined value [", .arrayToString(levels(.data$subset)), "]"
                    )
                }
            }
        },
        .getIndices = function(..., stage, group, subset = NA_character_) {
            if (is.null(.data)) {
                stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "'.data' must be defined")
            }

            if (!is.null(stage) && !any(is.na(stage)) && all(stage < 0)) {
                index <- 1:getNumberOfStages()
                stage <- index[!(index %in% abs(stage))]
            }

            if (!is.null(group) && !any(is.na(group)) && all(group < 0)) {
                index <- 1:getNumberOfGroups(survivalCorrectionEnabled = FALSE)
                group <- index[!(index %in% abs(group))]
            }

            # stage only and optional subset
            if (!is.null(group) && length(group) == 1 && is.na(group)) {
                return(.getIndexValues("stage", stage, subset))
            }

            # group only and optional subset
            if (!is.null(stage) && length(stage) == 1 && is.na(stage)) {
                return(.getIndexValues("group", group, subset))
            }

            # stage and group and optional subset
            stageValues <- .getValues("stage", stage)
            groupValues <- .getValues("group", group)
            if (all(is.na(subset))) {
                return(which(stageValues %in% stage & groupValues %in% group))
            }

            .assertIsValidSubset(subset)
            return(which(stageValues %in% stage & groupValues %in% group & .data$subset %in% subset))
        },
        .getValidatedFloatingPointNumbers = function(x, parameterName = "Sample sizes") {
            if (.floatingPointNumbersEnabled) {
                return(x)
            }

            nToCheck <- stats::na.omit(x)
            if (any(nToCheck != as.integer(nToCheck))) {
                warning(parameterName, " specified as floating-point numbers were truncated", call. = FALSE)
            }

            x[!is.na(x)] <- as.integer(x[!is.na(x)])
            return(x)
        },
        .keyWordExists = function(dataFrame, keyWords, suffix = "") {
            for (key in keyWords) {
                if (.paramExists(dataFrame, paste0(key, suffix))) {
                    return(TRUE)
                }
            }
            return(FALSE)
        },
        .getNumberOfGroups = function(dataFrame, keyWords) {
            for (group in 2:1000) {
                if (!.keyWordExists(dataFrame, keyWords, group)) {
                    return(group - 1)
                }
            }
            return(1)
        },
        .getValidatedStage = function(stage = NA_integer_) {
            if (all(is.na(stage))) {
                stage <- c(1:getNumberOfStages())
            }
            return(stage)
        },
        getNumberOfGroups = function(survivalCorrectionEnabled = TRUE) {
            data <- stats::na.omit(.data)
            if (!survivalCorrectionEnabled) {
                return(length(levels(data$group)))
            }
            return(length(levels(data$group)) + ifelse(inherits(.self, "DatasetSurvival"), 1, 0))
        },
        getNumberOfStages = function(naOmitEnabled = TRUE) {
            if (naOmitEnabled) {
                colNames <- colnames(.data)
                validColNames <- character(0)
                for (colName in colNames) {
                    colValues <- .data[, colName]
                    if (length(colValues) > 0 && !all(is.na(colValues))) {
                        validColNames <- c(validColNames, colName)
                    }
                }
                subData <- stats::na.omit(.data[, validColNames])
                numberOfStages <- length(unique(as.character(subData$stage)))
                if (numberOfStages == 0) {
                    print(.data[, validColNames])
                    stop(
                        C_EXCEPTION_TYPE_RUNTIME_ISSUE,
                        ".data seems to contain an invalid column"
                    )
                }
                return(numberOfStages)
            }
            return(length(levels(.data$stage)))
        },
        getNumberOfSubsets = function() {
            return(length(levels(.data$subset)))
        },
        isDatasetMeans = function() {
            return(inherits(.self, "DatasetMeans"))
        },
        isDatasetRates = function() {
            return(inherits(.self, "DatasetRates"))
        },
        isDatasetSurvival = function() {
            return(inherits(.self, "DatasetSurvival"))
        },
        isStratified = function() {
            return(.enrichmentEnabled && "R" %in% levels(.data$subset))
        },
        setId = function(id) {
            .id <<- as.integer(id)
        },
        getId = function() {
            return(.id)
        },
        setDescription = function(description) {
            .description <<- description
        },
        getDescription = function() {
            return(.description)
        },
        .toString = function(startWithUpperCase = FALSE) {
            s <- "dataset of "
            if (.enrichmentEnabled) {
                s <- paste0(s, "enrichment ")
            } else if (.self$getNumberOfGroups() > 2) {
                s <- paste0(s, "multi-arm ")
            }

            if (isDatasetMeans()) {
                s <- paste0(s, "means")
            } else if (isDatasetRates()) {
                s <- paste0(s, "rates")
            } else if (isDatasetSurvival()) {
                s <- paste0(s, "survival data")
            } else {
                s <- paste0(s, "unknown endpoint")
            }
            return(ifelse(startWithUpperCase, .firstCharacterToUpperCase(s), s))
        }
    )
)

#'
#' @name DatasetMeans
#'
#' @title
#' Dataset of Means
#'
#' @description
#' Class for a dataset of means.
#'
#' @template field_groups
#' @template field_stages
#' @template field_sampleSizes
#' @template field_means
#' @template field_stDevs
#' @template field_overallSampleSizes
#' @template field_overallMeans
#' @template field_overallStDevs
#'
#' @details
#' This object cannot be created directly; better use \code{\link{getDataset}}
#' with suitable arguments to create a dataset of means.
#'
#' @include class_core_parameter_set.R
#' @include class_core_plot_settings.R
#' @include f_core_constants.R
#'
#' @keywords internal
#'
#' @importFrom methods new
#'
DatasetMeans <- setRefClass("DatasetMeans",
    contains = "Dataset",
    fields = list(
        sampleSizes = "numeric",
        means = "numeric",
        stDevs = "numeric",
        overallSampleSizes = "numeric",
        overallMeans = "numeric",
        overallStDevs = "numeric"
    ),
    methods = list(
        getSampleSize = function(stage, group = 1, subset = NA_character_) {
            return(.data$sampleSize[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getMean = function(stage, group = 1, subset = NA_character_) {
            return(.data$mean[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getStDev = function(stage, group = 1, subset = NA_character_) {
            return(.data$stDev[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getSampleSizes = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$sampleSize[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getMeans = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$mean[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getStDevs = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$stDev[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getSampleSizesUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$sampleSize[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getMeansUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$mean[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getStDevsUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$stDev[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getOverallSampleSize = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallSampleSize[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallMean = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallMean[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallStDev = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallStDev[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallSampleSizes = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallSampleSize[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallMeans = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallMean[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallStDevs = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallStDev[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallSampleSizesUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallSampleSize[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getOverallMeansUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallMean[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getOverallStDevsUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallStDev[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        .initByDataFrame = function(dataFrame) {
            callSuper(dataFrame)

            # case: one mean - stage wise
            if (.paramExists(dataFrame, C_KEY_WORDS_SAMPLE_SIZES)) {
                .inputType <<- "stagewise"
                sampleSizes <<- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
                    dataFrame,
                    C_KEY_WORDS_SAMPLE_SIZES
                ), parameterName = "Sample sizes")
                .validateValues(sampleSizes, "n")
                if (any(stats::na.omit(sampleSizes) <= 0)) {
                    stop(
                        C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                        "all sample sizes must be > 0, but 'n' = ",
                        .arrayToString(sampleSizes, vectorLookAndFeelEnabled = TRUE)
                    )
                }

                means <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_MEANS)
                .validateValues(means, "means")

                stDevs <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_ST_DEVS)
                .validateValues(stDevs, "stDevs")
            }

            # case: one mean - overall
            else if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES)) {
                .inputType <<- "overall"
                overallSampleSizes <<- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
                    dataFrame,
                    C_KEY_WORDS_OVERALL_SAMPLE_SIZES
                ), parameterName = "Cumulative sample sizes ")
                .validateValues(overallSampleSizes, "overallSampleSizes")

                overallMeans <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_MEANS)
                .validateValues(overallMeans, "overallMeans")

                overallStDevs <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_ST_DEVS)
                .validateValues(overallStDevs, "overallStDevs")
            }

            # case: two or more means - stage wise
            else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_SAMPLE_SIZES, 1)) &&
                    .paramExists(dataFrame, paste0(C_KEY_WORDS_SAMPLE_SIZES, 2))) {
                .inputType <<- "stagewise"
                numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_SAMPLE_SIZES)
                stages <<- rep(stages, numberOfTreatmentGroups)
                groups <<- integer(0)
                sampleSizes <<- numeric(0)
                means <<- numeric(0)
                stDevs <<- numeric(0)
                for (group in 1:numberOfTreatmentGroups) {
                    sampleSizesTemp <- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
                        dataFrame, C_KEY_WORDS_SAMPLE_SIZES,
                        suffix = group
                    ), parameterName = "Sample sizes")
                    .validateValues(sampleSizesTemp, paste0("n", group))
                    if (any(stats::na.omit(sampleSizesTemp) <= 0)) {
                        stop(
                            C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                            "all sample sizes must be > 0, but 'n", group, "' = ",
                            .arrayToString(sampleSizesTemp, vectorLookAndFeelEnabled = TRUE)
                        )
                    }
                    sampleSizes <<- c(sampleSizes, sampleSizesTemp)

                    meansTemp <- .getValuesByParameterName(dataFrame, C_KEY_WORDS_MEANS, suffix = group)
                    .validateValues(meansTemp, paste0("means", group))
                    means <<- c(means, meansTemp)

                    stDevsTemp <- .getValuesByParameterName(dataFrame, C_KEY_WORDS_ST_DEVS, suffix = group)
                    .validateValues(stDevsTemp, paste0("stDevs", group))
                    stDevs <<- c(stDevs, stDevsTemp)

                    groups <<- c(groups, rep(as.integer(group), length(sampleSizesTemp)))
                }
            }

            # case: two or more means - overall
            else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_SAMPLE_SIZES, 1)) &&
                    .paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_SAMPLE_SIZES, 2))) {
                .inputType <<- "overall"
                numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES)
                stages <<- rep(stages, numberOfTreatmentGroups)
                groups <<- integer(0)
                sampleSizes <<- numeric(0)
                means <<- numeric(0)
                stDevs <<- numeric(0)
                overallSampleSizes <<- numeric(0)
                overallMeans <<- numeric(0)
                overallStDevs <<- numeric(0)
                for (group in 1:numberOfTreatmentGroups) {
                    overallSampleSizesTemp <- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
                        dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES,
                        suffix = group
                    ), parameterName = "Cumulative sample sizes")
                    .validateValues(overallSampleSizesTemp, paste0("overallSampleSizes", group))
                    overallSampleSizes <<- c(overallSampleSizes, overallSampleSizesTemp)

                    overallMeansTemp <- .getValuesByParameterName(dataFrame,
                        C_KEY_WORDS_OVERALL_MEANS,
                        suffix = group
                    )
                    .validateValues(overallMeansTemp, paste0("overallMeans", group))
                    overallMeans <<- c(overallMeans, overallMeansTemp)

                    overallStDevsTemp <- .getValuesByParameterName(dataFrame,
                        C_KEY_WORDS_OVERALL_ST_DEVS,
                        suffix = group
                    )
                    .validateValues(overallStDevsTemp, paste0("overallStDevs", group))
                    overallStDevs <<- c(overallStDevs, overallStDevsTemp)

                    groups <<- c(groups, rep(as.integer(group), length(overallSampleSizesTemp)))
                }
            } else {
                stop(
                    C_EXCEPTION_TYPE_MISSING_ARGUMENT,
                    "sample sizes are missing or not correctly specified"
                )
            }

            if (.inputType == "stagewise") {
                n <- length(sampleSizes)
                overallSampleSizes <<- rep(NA_real_, n)
                overallMeans <<- rep(NA_real_, n)
                overallStDevs <<- rep(NA_real_, n)

                .setParameterType("sampleSizes", C_PARAM_USER_DEFINED)
                .setParameterType("means", C_PARAM_USER_DEFINED)
                .setParameterType("stDevs", C_PARAM_USER_DEFINED)

                .setParameterType("overallSampleSizes", C_PARAM_GENERATED)
                .setParameterType("overallMeans", C_PARAM_GENERATED)
                .setParameterType("overallStDevs", C_PARAM_GENERATED)

                .recreateDataFrame()
                .createOverallData()
            } else {
                n <- length(overallSampleSizes)
                sampleSizes <<- rep(NA_real_, n)
                means <<- rep(NA_real_, n)
                stDevs <<- rep(NA_real_, n)

                .setParameterType("sampleSizes", C_PARAM_GENERATED)
                .setParameterType("means", C_PARAM_GENERATED)
                .setParameterType("stDevs", C_PARAM_GENERATED)

                .setParameterType("overallSampleSizes", C_PARAM_USER_DEFINED)
                .setParameterType("overallMeans", C_PARAM_USER_DEFINED)
                .setParameterType("overallStDevs", C_PARAM_USER_DEFINED)

                .recreateDataFrame()
                .createStageWiseData()
            }

            if (sum(stats::na.omit(sampleSizes) < 0) > 0) {
                stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be >= 0")
            }
            if (sum(stats::na.omit(stDevs) < 0) > 0) {
                stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all standard deviations must be >= 0")
            }
        },
        .recreateDataFrame = function() {
            callSuper()
            .data <<- cbind(.data, data.frame(
                sampleSize = sampleSizes,
                mean = means,
                stDev = stDevs,
                overallSampleSize = overallSampleSizes,
                overallMean = overallMeans,
                overallStDev = overallStDevs
            ))
            .orderDataByStageAndGroup()
            .setDataToVariables()
        },
        .setDataToVariables = function() {
            callSuper()
            sampleSizes <<- .data$sampleSize
            means <<- .data$mean
            stDevs <<- .data$stDev
            overallSampleSizes <<- .data$overallSampleSize
            overallMeans <<- .data$overallMean
            overallStDevs <<- .data$overallStDev
        },
        .fillWithNAs = function(kMax) {
            callSuper(kMax)
            n <- .getNumberOfNAsToAdd(kMax)

            naRealsToAdd <- rep(NA_real_, n)

            sampleSizes <<- c(sampleSizes, naRealsToAdd)
            means <<- c(means, naRealsToAdd)
            stDevs <<- c(stDevs, naRealsToAdd)

            overallSampleSizes <<- c(overallSampleSizes, naRealsToAdd)
            overallMeans <<- c(overallMeans, naRealsToAdd)
            overallStDevs <<- c(overallStDevs, naRealsToAdd)

            .recreateDataFrame()
        },
        .trim = function(kMax = NA_integer_) {
            indices <- callSuper(kMax)
            if (length(indices) == 0) {
                return(invisible(FALSE))
            }

            sampleSizes <<- sampleSizes[indices]
            means <<- means[indices]
            stDevs <<- stDevs[indices]

            overallSampleSizes <<- overallSampleSizes[indices]
            overallMeans <<- overallMeans[indices]
            overallStDevs <<- overallStDevs[indices]

            .recreateDataFrame()
            return(invisible(TRUE))
        },
        .getOverallMeans = function(sampleSizes, means) {
            return(cumsum(sampleSizes * means) / cumsum(sampleSizes))
        },
        .getOverallStDevs = function(sampleSizes, means, stDevs, overallMeans) {
            kMax <- length(sampleSizes)
            overallStDev <- rep(NA_real_, kMax)
            for (k in 1:kMax) {
                overallStDev[k] <- sqrt((sum((sampleSizes[1:k] - 1) * stDevs[1:k]^2) +
                    sum(sampleSizes[1:k] * (means[1:k] - overallMeans[k])^2)) /
                    (sum(sampleSizes[1:k]) - 1))
            }
            return(overallStDev)
        },
        .createOverallData = function() {
            .data$overallSampleSize <<- rep(NA_real_, nrow(.data))
            .data$overallMean <<- rep(NA_real_, nrow(.data))
            .data$overallStDev <<- rep(NA_real_, nrow(.data))
            subsetLevels <- NA_character_
            if (.enrichmentEnabled) {
                subsetLevels <- levels(.data$subset)
            }
            for (s in subsetLevels) {
                for (g in levels(.data$group)) {
                    if (!is.na(s)) {
                        indices <- which(.data$subset == s & .data$group == g)
                    } else {
                        indices <- which(.data$group == g)
                    }
                    .data$overallSampleSize[indices] <<- cumsum(.data$sampleSize[indices])
                    .data$overallMean[indices] <<- .getOverallMeans(
                        .data$sampleSize[indices], .data$mean[indices]
                    )
                    .data$overallStDev[indices] <<- .getOverallStDevs(
                        .data$sampleSize[indices],
                        .data$mean[indices], .data$stDev[indices], .data$overallMean[indices]
                    )
                }
            }
            .setDataToVariables()
        },
        .getStageWiseSampleSizes = function(overallSampleSizes) {
            result <- overallSampleSizes
            if (length(overallSampleSizes) == 1) {
                return(result)
            }

            kMax <- length(overallSampleSizes)
            result[2:kMax] <- overallSampleSizes[2:kMax] - overallSampleSizes[1:(kMax - 1)]
            return(result)
        },
        .getStageWiseMeans = function(sampleSizes, overallSampleSizes, overallMeans) {
            result <- overallMeans
            if (length(overallMeans) == 1) {
                return(result)
            }

            for (k in 2:length(overallMeans)) {
                result[k] <- (overallSampleSizes[k] * overallMeans[k] -
                    overallSampleSizes[k - 1] * overallMeans[k - 1]) / sampleSizes[k]
            }
            return(result)
        },
        .getStageWiseStDev = function(overallStDevs, sampleSizes, overallSampleSizes, means, overallMeans, k) {
            numBeforeK <- (overallSampleSizes[k - 1] - 1) * overallStDevs[k - 1]^2
            numK <- (overallSampleSizes[k] - 1) * overallStDevs[k]^2
            numSumBeforeK <- sum(sampleSizes[1:(k - 1)] * (means[1:(k - 1)] - overallMeans[k - 1])^2)
            numSumK <- sum(sampleSizes[1:k] * (means[1:k] - overallMeans[k])^2)
            denom <- (sampleSizes[k] - 1)
            value <- (numK - numBeforeK + numSumBeforeK - numSumK) / denom
            if (is.null(value) || length(value) != 1 || is.na(value) || value < 0) {
                warning("No calculation of stage-wise standard deviation from ",
                    "overall standard deviations possible at stage ", k,
                    call. = FALSE
                )
                return(NA_real_)
            }

            return(sqrt(value))
        },
        .getStageWiseStDevs = function(overallStDevs, sampleSizes, overallSampleSizes, means, overallMeans) {
            result <- overallStDevs
            if (length(overallStDevs) == 1) {
                return(result)
            }

            for (k in 2:length(overallStDevs)) {
                result[k] <- .getStageWiseStDev(overallStDevs, sampleSizes, overallSampleSizes, means, overallMeans, k)
            }
            return(result)
        },
        .createStageWiseData = function() {
            "Calculates stage-wise means and standard deviation if cunulative data is available"

            .data$sampleSize <<- rep(NA_real_, nrow(.data))
            .data$mean <<- rep(NA_real_, nrow(.data))
            .data$stDev <<- rep(NA_real_, nrow(.data))

            subsetLevels <- NA_character_
            if (.enrichmentEnabled) {
                subsetLevels <- levels(.data$subset)
            }

            for (s in subsetLevels) {
                for (g in levels(.data$group)) {
                    if (!is.na(s)) {
                        indices <- which(.data$subset == s & .data$group == g)
                    } else {
                        indices <- which(.data$group == g)
                    }

                    .assertValuesAreStrictlyIncreasing(.data$overallSampleSize[indices],
                        paste0("overallSampleSizes", g),
                        endingNasAllowed = TRUE
                    )

                    .data$sampleSize[indices] <<- .getStageWiseSampleSizes(.data$overallSampleSize[indices])
                    .data$mean[indices] <<- .getStageWiseMeans(
                        .data$sampleSize[indices],
                        .data$overallSampleSize[indices], .data$overallMean[indices]
                    )
                    .data$stDev[indices] <<- .getStageWiseStDevs(
                        .data$overallStDev[indices], .data$sampleSize[indices],
                        .data$overallSampleSize[indices], .data$mean[indices], .data$overallMean[indices]
                    )
                }
            }
            .setDataToVariables()
        },
        getRandomData = function() {
            return(.getRandomDataMeans(.self))
        }
    )
)

#' @examples
#'
#' datasetExample <- getDataset(
#'     means1 = c(112.3, 105.1, 121.3),
#'     means2 = c(98.1, 99.3, 100.1),
#'     means3 = c(98.1, 99.3, 100.1),
#'     stDevs1 = c(44.4, 42.9, 41.4),
#'     stDevs2 = c(46.7, 41.1, 39.5),
#'     stDevs3 = c(46.7, 41.1, 39.5),
#'     n1 = c(84, 81, 82),
#'     n2 = c(87, 83, 81),
#'     n3 = c(87, 82, 84)
#' )
#' .getRandomDataMeans(datasetExample,
#'     randomDataParamName = "outcome", numberOfVisits = 3,
#'     fixedCovariates = list(gender = c("f", "m"), bmi = c(17, 40))
#' )
#'
#' @noRd
#'
.getRandomDataMeans <- function(dataset, ...,
        treatmentName = "Treatment group",
        controlName = "Control group",
        randomDataParamName = "randomData",
        numberOfVisits = 1L,
        fixedCovariates = NULL,
        covariateEffects = NULL,
        seed = NA_real_) {
    if (!is.null(fixedCovariates)) {
        if (!is.list(fixedCovariates)) {
            stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote("fixedCovariates"), " must be a named list")
        }
    }
    if (!is.null(covariateEffects)) {
        if (!is.list(covariateEffects)) {
            stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote("covariateEffects"), " must be a named list")
        }
    }

    .assertIsSingleCharacter(treatmentName, "treatmentName")
    .assertIsSingleCharacter(controlName, "controlName")
    .assertIsSingleCharacter(randomDataParamName, "randomDataParamName")
    .assertIsSinglePositiveInteger(numberOfVisits, "numberOfVisits", validateType = FALSE)
    .assertIsSingleNumber(seed, "seed", naAllowed = TRUE)

    seed <- .setSeed(seed)

    numberOfGroups <- dataset$getNumberOfGroups()

    sampleSize <- 0
    for (stage in 1:dataset$getNumberOfStages()) {
        for (group in 1:numberOfGroups) {
            if (dataset$.enrichmentEnabled) {
                for (subset in levels(dataset$.data$subset)) {
                    n <- dataset$getSampleSize(stage = stage, group = group, subset = subset)
                    if (n > sampleSize) {
                        sampleSize <- n
                    }
                }
            } else {
                n <- dataset$getSampleSize(stage = stage, group = group)
                n <- round(n / numberOfVisits)
                if (n > sampleSize) {
                    sampleSize <- n
                }
            }
        }
    }

    idFactor <- 10^nchar(as.character(sampleSize))

    data <- NULL
    for (stage in 1:dataset$getNumberOfStages()) {
        for (group in 1:numberOfGroups) {
            for (visit in 1:numberOfVisits) {
                if (dataset$.enrichmentEnabled) {
                    for (subset in levels(dataset$.data$subset)) {
                        n <- dataset$getSampleSize(stage = stage, group = group, subset = subset)
                        randomData <- stats::rnorm(
                            n    = n,
                            mean = dataset$getMean(stage = stage, group = group, subset = subset),
                            sd   = dataset$getStDev(stage = stage, group = group, subset = subset)
                        )
                        row <- data.frame(
                            subject    = idFactor * group + c(1:n),
                            stage      = rep(stage, n),
                            group      = rep(group, n),
                            subset     = rep(subset, n),
                            randomData = randomData
                        )
                        if (is.null(data)) {
                            data <- row
                        } else {
                            data <- rbind(data, row)
                        }
                    }
                } else {
                    n <- dataset$getSampleSize(stage = stage, group = group)
                    n <- floor(n / numberOfVisits)
                    randomData <- stats::rnorm(
                        n    = sampleSize,
                        mean = dataset$getMean(stage = stage, group = group),
                        sd   = dataset$getStDev(stage = stage, group = group)
                    )

                    subjectIds <- (idFactor * 10 * stage) + (idFactor * group) + c(1:sampleSize)
                    indices <- 1:sampleSize
                    randomDataBefore <- NULL
                    numberOfDropOutsBefore <- 0
                    if (visit > 1 && !is.null(data)) {
                        randomDataBefore <- data$randomData[data$stage == visit - 1 & data$subject %in% subjectIds]
                        numberOfDropOutsBefore <- sum(is.na(randomDataBefore))
                        indices <- which(!is.na(randomDataBefore))
                    }
                    sampleSizeBefore <- sampleSize - numberOfDropOutsBefore
                    if (n < sampleSizeBefore) {
                        numberOfDropOuts <- sampleSizeBefore - n
                        dropOuts <- sample(c(rep(1, n - numberOfDropOuts), rep(0, numberOfDropOuts)))
                        randomData[indices[dropOuts == 0]] <- NA_real_
                        if (!is.null(randomDataBefore)) {
                            randomData[is.na(randomDataBefore)] <- NA_real_
                        }
                    }

                    row <- data.frame(
                        subject    = subjectIds,
                        stage      = rep(stage, sampleSize),
                        group      = rep(group, sampleSize),
                        visit      = rep(visit - 1, sampleSize),
                        randomData = randomData
                    )

                    if (is.null(data)) {
                        data <- row
                    } else {
                        data <- rbind(data, row)
                    }
                }
            }
        }
    }
    data$stage <- factor(data$stage)
    groupLevels <- paste(treatmentName, c(1:numberOfGroups))
    if (numberOfGroups > 1) {
        if (numberOfGroups == 2) {
            groupLevels[1] <- treatmentName
        }
        groupLevels[numberOfGroups] <- controlName
    }

    data$group <- factor(data$group, labels = groupLevels)
    if (dataset$.enrichmentEnabled) {
        data$subset <- factor(data$subset)
    }

    if (!is.null(randomDataParamName) && length(randomDataParamName) == 1 && !is.na(randomDataParamName)) {
        colNames <- colnames(data)
        colNames[colNames == "randomData"] <- randomDataParamName
        colnames(data) <- colNames
    }

    if (!is.null(fixedCovariates)) {
        fixedCovariateNames <- names(fixedCovariates)
        if (is.null(fixedCovariateNames) || any(nchar(trimws(fixedCovariateNames)) == 0)) {
            stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote("fixedCovariates"), " must be a named list")
        }

        subjects <- sort(unique(data$subject))
        for (fixedCovariateName in fixedCovariateNames) {
            data[[fixedCovariateName]] <- rep(NA, nrow(data))
            values <- fixedCovariates[[fixedCovariateName]]
            if (is.null(values) || length(values) < 2 || any(is.na(values))) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(paste0("fixedCovariates$", fixedCovariateName)),
                    " (", .arrayToString(values), ") must be a valid numeric or character vector with a minimum of 2 values"
                )
            }

            if (is.character(values)) {
                if (length(unique(values)) < length(values)) {
                    stop(
                        C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(paste0("fixedCovariates$", fixedCovariateName)),
                        " (", .arrayToString(values, maxLength = 20), ") must be a unique vector"
                    )
                }

                fixedCovariateSample <- sample(values, length(subjects), replace = TRUE)
                for (i in 1:length(subjects)) {
                    data[[fixedCovariateName]][data$subject == subjects[i]] <- fixedCovariateSample[i]
                }
            } else if (is.numeric(values)) {
                if (length(values) == 2) {
                    minValue <- min(values)
                    maxValue <- max(values)
                    covMean <- runif(1, minValue, maxValue)
                    covSD <- covMean * 0.1
                    showMessage <- TRUE
                    for (i in 1:length(subjects)) {
                        groupName <- as.character(data$group[data$subject == subjects[i]])[1]
                        covEffect <- 1
                        if (groupName == controlName && !is.null(covariateEffects)) {
                            covEffect <- covariateEffects[[fixedCovariateName]]
                            if (is.null(covEffect)) {
                                covEffect <- 1
                            } else {
                                .assertIsNumericVector(covEffect, paste0("covariateEffects$", fixedCovariateName))
                                if (showMessage) {
                                    message(
                                        "Add effect ", covEffect, " to ",
                                        sQuote(fixedCovariateName), " of ", sQuote(groupName)
                                    )
                                    showMessage <- FALSE
                                }
                            }
                        }
                        continuesExample <- rnorm(sum(data$subject == subjects[i]), covMean * covEffect, covSD)
                        data[[fixedCovariateName]][data$subject == subjects[i]] <- continuesExample
                    }
                }
            }
        }
    }

    data$seed <- rep(seed, nrow(data))

    return(data)
}

#'
#' @title
#' Dataset Plotting
#'
#' @description
#' Plots a dataset.
#'
#' @param x The \code{\link{Dataset}} object to plot.
#' @param y Not available for this kind of plot (is only defined to be compatible
#'        to the generic plot function).
#' @param main The main title, default is \code{"Dataset"}.
#' @param xlab The x-axis label, default is \code{"Stage"}.
#' @param ylab The y-axis label.
#' @param legendTitle The legend title, default is \code{"Group"}.
#' @inheritParams param_palette
#' @inheritParams param_showSource
#' @inheritParams param_plotSettings
#' @inheritParams param_three_dots_plot
#'
#' @details
#' Generic function to plot all kinds of datasets.
#'
#' @template return_object_ggplot
#'
#' @examples
#' # Plot a dataset of means
#' dataExample <- getDataset(
#'     n1 = c(22, 11, 22, 11),
#'     n2 = c(22, 13, 22, 13),
#'     means1 = c(1, 1.1, 1, 1),
#'     means2 = c(1.4, 1.5, 3, 2.5),
#'     stDevs1 = c(1, 2, 2, 1.3),
#'     stDevs2 = c(1, 2, 2, 1.3)
#' )
#' \dontrun{
#' if (require(ggplot2)) plot(dataExample, main = "Comparison of Means")
#' }
#'
#' # Plot a dataset of rates
#' dataExample <- getDataset(
#'     n1 = c(8, 10, 9, 11),
#'     n2 = c(11, 13, 12, 13),
#'     events1 = c(3, 5, 5, 6),
#'     events2 = c(8, 10, 12, 12)
#' )
#' \dontrun{
#' if (require(ggplot2)) plot(dataExample, main = "Comparison of Rates")
#' }
#'
#' @export
#'
plot.Dataset <- function(x, y, ..., main = "Dataset", xlab = "Stage", ylab = NA_character_,
        legendTitle = "Group", palette = "Set1", showSource = FALSE, plotSettings = NULL) {
    if (x$.enrichmentEnabled) {
        stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "plot of enrichment data is not implemented yet")
    }

    .assertGgplotIsInstalled()

    if (x$isDatasetMeans()) {
        data <- x$getRandomData()
        if (is.na(ylab)) {
            ylab <- "Random data"
        }
    } else if (x$isDatasetRates()) {
        data <- x$.data
        if (is.na(ylab)) {
            ylab <- "Frequency (Events and Sample Size)"
        }
    } else if (x$isDatasetSurvival()) {
        # Open work: implement dataset plot of survival data
        stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "plot of survival data is not implemented yet")
    }

    if (!is.logical(showSource) || isTRUE(showSource)) {
        warning("'showSource' != FALSE is not implemented yet for class ", .getClassName(x))
    }

    if (is.null(plotSettings)) {
        plotSettings <- x$getPlotSettings()
    }

    if (x$getNumberOfGroups() == 1) {
        if (x$isDatasetMeans()) {
            p <- ggplot2::ggplot(
                data = data,
                ggplot2::aes(y = .data[["randomData"]], x = factor(.data[["stage"]]))
            )
            p <- p + ggplot2::geom_boxplot(ggplot2::aes(fill = .data[["stage"]]))
            p <- p + ggplot2::geom_point(
                colour = "#0e414e", shape = 20,
                position = ggplot2::position_jitter(width = .1),
                size = plotSettings$pointSize
            )
            p <- p + ggplot2::stat_summary(
                fun = "mean", geom = "point",
                shape = 21, position = ggplot2::position_dodge(.75), size = 4, fill = "white",
                colour = "black", show.legend = FALSE
            )
        } else if (x$isDatasetRates()) {
            p <- ggplot2::ggplot(show.legend = FALSE)

            # plot sample size
            p <- p + ggplot2::geom_bar(
                data = data,
                ggplot2::aes(
                    y = .data[["sampleSize"]],
                    x = factor(.data[["stage"]]), fill = factor(.data[["stage"]])
                ),
                position = "dodge", stat = "identity", alpha = 0.4
            )

            # plot events
            p <- p + ggplot2::geom_bar(
                data = data,
                ggplot2::aes(
                    y = .data[["event"]], x = factor(.data[["stage"]]),
                    fill = factor(.data[["stage"]])
                ),
                position = "dodge", stat = "identity"
            )
        } else if (x$isDatasetSurvival()) {
            # implement survival plot here
        }
    } else {
        data$stageGroup <- interaction(data$stage, data$group)

        if (x$isDatasetMeans()) {
            p <- ggplot2::ggplot(ggplot2::aes(
                y = .data[["randomData"]], x = factor(.data[["stage"]]),
                fill = factor(.data[["group"]])
            ), data = data)
            p <- p + ggplot2::geom_point(ggplot2::aes(colour = .data[["group"]]),
                shape = 20,
                position = ggplot2::position_dodge(.75),
                size = plotSettings$pointSize
            )
            p <- p + ggplot2::geom_boxplot()
            p <- p + ggplot2::stat_summary(ggplot2::aes(colour = .data[["group"]]),
                fun = "mean", geom = "point",
                shape = 21, position = ggplot2::position_dodge(.75), size = 4, fill = "white",
                show.legend = FALSE
            )
        } else if (x$isDatasetRates()) {
            p <- ggplot2::ggplot(show.legend = FALSE)

            # plot sample size
            p <- p + ggplot2::geom_bar(
                ggplot2::aes(
                    y = .data[["sampleSize"]],
                    x = factor(.data[["stage"]]), fill = factor(.data[["group"]])
                ),
                data = data, position = "dodge", stat = "identity", alpha = 0.4
            )

            # plot events
            p <- p + ggplot2::geom_bar(
                data = data,
                ggplot2::aes(
                    y = .data[["event"]], x = factor(.data[["stage"]]),
                    fill = factor(.data[["group"]])
                ),
                position = "dodge", stat = "identity"
            )
        } else if (x$isDatasetSurvival()) {
            # implement survival plot here
        }
    }

    # hide second legend
    if (x$getNumberOfGroups() == 1) {
        p <- p + ggplot2::guides(fill = FALSE, colour = FALSE)
    } else {
        p <- p + ggplot2::guides(colour = FALSE)
    }

    # set theme
    p <- plotSettings$setTheme(p)
    # p <- designSet$getPlotSettings()$hideGridLines(p)

    # set main title
    p <- plotSettings$setMainTitle(p, main)

    # set axes labels
    p <- plotSettings$setAxesLabels(p, xlab = xlab, ylab = ylab)

    # set legend
    if (x$getNumberOfGroups() > 1) {
        p <- plotSettings$setLegendPosition(p, legendPosition = C_POSITION_OUTSIDE_PLOT)
        p <- plotSettings$setLegendBorder(p)
        p <- plotSettings$setLegendTitle(p, legendTitle, mode = "fill")
        p <- plotSettings$setLegendLabelSize(p)
    }

    p <- plotSettings$setAxesAppearance(p)
    p <- plotSettings$setColorPalette(p, palette, mode = "all")
    p <- plotSettings$enlargeAxisTicks(p)

    companyAnnotationEnabled <- .getOptionalArgument("companyAnnotationEnabled", ...)
    if (is.null(companyAnnotationEnabled) || !is.logical(companyAnnotationEnabled)) {
        companyAnnotationEnabled <- FALSE
    }
    p <- plotSettings$addCompanyAnnotation(p, enabled = companyAnnotationEnabled)

    p
}

#'
#' @name DatasetRates
#'
#' @title
#' Dataset of Rates
#'
#' @description
#' Class for a dataset of rates.
#'
#' @template field_groups
#' @template field_stages
#' @template field_sampleSizes
#' @template field_overallSampleSizes
#' @template field_events
#' @template field_overallEvents
#'
#' @details
#' This object cannot be created directly; better use \code{\link{getDataset}}
#' with suitable arguments to create a dataset of rates.
#'
#' @include class_core_parameter_set.R
#' @include class_core_plot_settings.R
#' @include f_core_constants.R
#'
#' @keywords internal
#'
#' @importFrom methods new
#'
DatasetRates <- setRefClass("DatasetRates",
    contains = "Dataset",
    fields = list(
        sampleSizes = "numeric",
        events = "numeric",
        overallSampleSizes = "numeric",
        overallEvents = "numeric"
    ),
    methods = list(
        getSampleSize = function(stage, group = 1, subset = NA_character_) {
            return(.data$sampleSize[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getSampleSizes = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$sampleSize[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getSampleSizesUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$sampleSize[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getEvent = function(stage, group = 1, subset = NA_character_) {
            return(.data$event[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$event[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getEventsUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$event[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getOverallSampleSize = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallSampleSize[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallSampleSizes = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallSampleSize[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallSampleSizesUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallSampleSize[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getOverallEvent = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallEvent[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallEvent[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallEventsUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallEvent[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        .initByDataFrame = function(dataFrame) {
            callSuper(dataFrame)

            # case: one rate - stage wise
            if (.paramExists(dataFrame, C_KEY_WORDS_SAMPLE_SIZES)) {
                .inputType <<- "stagewise"

                sampleSizes <<- .getValidatedFloatingPointNumbers(
                    .getValuesByParameterName(dataFrame, C_KEY_WORDS_SAMPLE_SIZES),
                    parameterName = "Sample sizes"
                )
                .validateValues(sampleSizes, "n")
                if (any(stats::na.omit(sampleSizes) <= 0)) {
                    stop(
                        C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                        "all sample sizes must be > 0, but 'n' = ",
                        .arrayToString(sampleSizes, vectorLookAndFeelEnabled = TRUE)
                    )
                }

                events <<- .getValidatedFloatingPointNumbers(
                    .getValuesByParameterName(dataFrame, C_KEY_WORDS_EVENTS),
                    parameterName = "Events"
                )
                .validateValues(events, "events")
                if (any(stats::na.omit(events) < 0)) {
                    stop(
                        C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0, but 'events' = ",
                        .arrayToString(events, vectorLookAndFeelEnabled = TRUE)
                    )
                }

                kMax <- length(sampleSizes)
                stageNumber <- length(stats::na.omit(sampleSizes))
                dataInput <- data.frame(
                    sampleSizes = sampleSizes,
                    events = events
                )
                dataInput <- .getOverallData(dataInput, kMax, stage = stageNumber)
                overallSampleSizes <<- dataInput$overallSampleSizes
                overallEvents <<- dataInput$overallEvents

                .setParameterType("sampleSizes", C_PARAM_USER_DEFINED)
                .setParameterType("events", C_PARAM_USER_DEFINED)

                .setParameterType("overallSampleSizes", C_PARAM_GENERATED)
                .setParameterType("overallEvents", C_PARAM_GENERATED)
            }

            # case: one rate - overall
            else if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES)) {
                .inputType <<- "overall"
                overallSampleSizes <<- .getValidatedFloatingPointNumbers(
                    .getValuesByParameterName(
                        dataFrame,
                        C_KEY_WORDS_OVERALL_SAMPLE_SIZES
                    ),
                    parameterName = "Cumulative sample sizes"
                )
                .validateValues(overallSampleSizes, "overallSampleSizes")
                .assertValuesAreStrictlyIncreasing(overallSampleSizes, "overallSampleSizes", endingNasAllowed = TRUE)

                overallEvents <<- .getValidatedFloatingPointNumbers(
                    .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EVENTS),
                    parameterName = "Cumulative events"
                )
                .validateValues(overallEvents, "overallEvents")
                .assertValuesAreMonotoneIncreasing(overallEvents, "overallEvents", endingNasAllowed = TRUE)

                kMax <- length(overallSampleSizes)
                stageNumber <- length(stats::na.omit(overallSampleSizes))
                stageWiseData <- .getStageWiseData(data.frame(
                    overallSampleSizes = overallSampleSizes,
                    overallEvents = overallEvents
                ), kMax, stage = stageNumber)
                sampleSizes <<- stageWiseData$sampleSizes
                events <<- stageWiseData$events

                .setParameterType("sampleSizes", C_PARAM_GENERATED)
                .setParameterType("events", C_PARAM_GENERATED)

                .setParameterType("overallSampleSizes", C_PARAM_USER_DEFINED)
                .setParameterType("overallEvents", C_PARAM_USER_DEFINED)
            }

            # case: two or more rates - stage wise
            else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_SAMPLE_SIZES, 1)) &&
                    .paramExists(dataFrame, paste0(C_KEY_WORDS_SAMPLE_SIZES, 2))) {
                .inputType <<- "stagewise"

                numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_SAMPLE_SIZES)

                stages <<- rep(stages, numberOfTreatmentGroups)

                groups <<- integer(0)
                sampleSizes <<- numeric(0)
                events <<- numeric(0)
                overallSampleSizes <<- numeric(0)
                overallEvents <<- numeric(0)
                for (group in 1:numberOfTreatmentGroups) {
                    sampleSizesTemp <- .getValidatedFloatingPointNumbers(
                        .getValuesByParameterName(
                            dataFrame, C_KEY_WORDS_SAMPLE_SIZES,
                            suffix = group
                        ),
                        parameterName = "Sample sizes"
                    )
                    .validateValues(sampleSizesTemp, paste0("n", group))
                    if (any(stats::na.omit(sampleSizesTemp) <= 0)) {
                        stop(
                            C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                            "all sample sizes must be > 0, but 'n", group, "' = ",
                            .arrayToString(sampleSizesTemp, vectorLookAndFeelEnabled = TRUE)
                        )
                    }
                    sampleSizes <<- c(sampleSizes, sampleSizesTemp)

                    eventsTemp <- .getValidatedFloatingPointNumbers(
                        .getValuesByParameterName(dataFrame, C_KEY_WORDS_EVENTS, suffix = group),
                        parameterName = "Events"
                    )
                    .validateValues(eventsTemp, paste0("events", group))
                    if (any(stats::na.omit(eventsTemp) < 0)) {
                        stop(
                            C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0, but 'events", group, "' = ",
                            .arrayToString(eventsTemp, vectorLookAndFeelEnabled = TRUE)
                        )
                    }
                    events <<- c(events, eventsTemp)

                    groups <<- c(groups, rep(as.integer(group), length(sampleSizesTemp)))

                    kMax <- length(sampleSizesTemp)
                    numberOfValidStages <- length(stats::na.omit(sampleSizesTemp))
                    overallData <- .getOverallData(data.frame(
                        sampleSizes = sampleSizesTemp,
                        events = eventsTemp
                    ), kMax, stage = numberOfValidStages)

                    overallSampleSizes <<- c(overallSampleSizes, overallData$overallSampleSizes)
                    overallEvents <<- c(overallEvents, overallData$overallEvents)
                }
                if (sum(stats::na.omit(sampleSizes) < 0) > 0) {
                    stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be >= 0")
                }

                .setParameterType("sampleSizes", C_PARAM_USER_DEFINED)
                .setParameterType("events", C_PARAM_USER_DEFINED)

                .setParameterType("overallSampleSizes", C_PARAM_GENERATED)
                .setParameterType("overallEvents", C_PARAM_GENERATED)
            }

            # case: two or more rates - overall
            else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_SAMPLE_SIZES, 1)) &&
                    .paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_SAMPLE_SIZES, 2))) {
                .inputType <<- "overall"

                numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES)

                stages <<- rep(stages, numberOfTreatmentGroups)

                groups <<- integer(0)
                sampleSizes <<- numeric(0)
                events <<- numeric(0)
                overallSampleSizes <<- numeric(0)
                overallEvents <<- numeric(0)
                for (group in 1:numberOfTreatmentGroups) {
                    overallSampleSizesTemp <- .getValidatedFloatingPointNumbers(
                        .getValuesByParameterName(
                            dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES,
                            suffix = group
                        ),
                        parameterName = "Cumulative sample sizes"
                    )
                    .validateValues(overallSampleSizesTemp, paste0("overallSampleSizes", group))
                    .assertValuesAreStrictlyIncreasing(overallSampleSizesTemp,
                        paste0("overallSampleSizes", group),
                        endingNasAllowed = TRUE
                    )
                    overallSampleSizes <<- c(overallSampleSizes, overallSampleSizesTemp)

                    overallEventsTemp <- .getValidatedFloatingPointNumbers(
                        .getValuesByParameterName(dataFrame,
                            C_KEY_WORDS_OVERALL_EVENTS,
                            suffix = group
                        ),
                        parameterName = "Cumulative events"
                    )
                    .validateValues(overallEventsTemp, paste0("overallEvents", group))
                    .assertValuesAreMonotoneIncreasing(overallEventsTemp,
                        paste0("overallEvents", group),
                        endingNasAllowed = TRUE
                    )
                    overallEvents <<- c(overallEvents, overallEventsTemp)

                    groups <<- c(groups, rep(as.integer(group), length(overallSampleSizesTemp)))

                    kMax <- length(overallSampleSizesTemp)
                    numberOfValidStages <- length(stats::na.omit(overallSampleSizesTemp))
                    stageWiseData <- .getStageWiseData(data.frame(
                        overallSampleSizes = overallSampleSizesTemp,
                        overallEvents = overallEventsTemp
                    ), kMax, stage = numberOfValidStages)

                    validatedSampleSizes <- stageWiseData$sampleSizes
                    .validateValues(validatedSampleSizes, paste0("n", group))
                    sampleSizes <<- c(sampleSizes, validatedSampleSizes)
                    events <<- c(events, stageWiseData$events)

                    if (sum(stats::na.omit(sampleSizes) < 0) > 0) {
                        stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be >= 0")
                    }
                }

                .setParameterType("sampleSizes", C_PARAM_GENERATED)
                .setParameterType("events", C_PARAM_GENERATED)

                .setParameterType("overallSampleSizes", C_PARAM_USER_DEFINED)
                .setParameterType("overallEvents", C_PARAM_USER_DEFINED)
            } else {
                stop(
                    C_EXCEPTION_TYPE_MISSING_ARGUMENT,
                    "sample sizes are missing or not correctly specified"
                )
            }

            if (sum(stats::na.omit(events) < 0) > 0) {
                stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0")
            }

            .recreateDataFrame()
            if (.enrichmentEnabled) {
                .createOverallDataEnrichment()
            }
        },
        .recreateDataFrame = function() {
            callSuper()
            .data <<- cbind(.data, data.frame(
                sampleSize = sampleSizes,
                event = events,
                overallSampleSize = overallSampleSizes,
                overallEvent = overallEvents
            ))
            .orderDataByStageAndGroup()
            .setDataToVariables()
        },
        .setDataToVariables = function() {
            callSuper()
            sampleSizes <<- .data$sampleSize
            events <<- .data$event
            overallSampleSizes <<- .data$overallSampleSize
            overallEvents <<- .data$overallEvent
        },
        .fillWithNAs = function(kMax) {
            callSuper(kMax)
            n <- .getNumberOfNAsToAdd(kMax)

            sampleSizes <<- c(sampleSizes, rep(NA_real_, n))
            events <<- c(events, rep(NA_real_, n))

            overallSampleSizes <<- c(overallSampleSizes, rep(NA_real_, n))
            overallEvents <<- c(overallEvents, rep(NA_real_, n))

            .recreateDataFrame()
        },
        .trim = function(kMax = NA_integer_) {
            indices <- callSuper(kMax)
            if (length(indices) == 0) {
                return(invisible(FALSE))
            }

            sampleSizes <<- sampleSizes[indices]
            events <<- events[indices]

            overallSampleSizes <<- overallSampleSizes[indices]
            overallEvents <<- overallEvents[indices]

            .recreateDataFrame()

            return(invisible(TRUE))
        },
        getRandomData = function() {
            data <- NULL
            for (stage in 1:getNumberOfStages()) {
                for (group in 1:getNumberOfGroups()) {
                    if (.enrichmentEnabled) {
                        for (subset in levels(.data$subset)) {
                            n <- getSampleSize(stage = stage, group = group, subset = subset)
                            numberOfEvents <- getEvent(stage = stage, group = group, subset = subset)
                            randomIndizes <- sample(x = c(1:n), size = numberOfEvents, replace = FALSE)
                            randomData <- rep(0, n)
                            randomData[randomIndizes] <- 1

                            row <- data.frame(
                                stage = stage,
                                group = group,
                                subset = subset,
                                randomData = randomData
                            )
                            if (is.null(data)) {
                                data <- row
                            } else {
                                data <- rbind(data, row)
                            }
                        }
                    } else {
                        n <- getSampleSize(stage = stage, group = group)
                        numberOfEvents <- getEvent(stage = stage, group = group)
                        randomIndizes <- sample(x = c(1:n), size = numberOfEvents, replace = FALSE)
                        randomData <- rep(0, n)
                        randomData[randomIndizes] <- 1

                        row <- data.frame(
                            stage = stage,
                            group = group,
                            randomData = randomData
                        )
                        if (is.null(data)) {
                            data <- row
                        } else {
                            data <- rbind(data, row)
                        }
                    }
                }
            }
            data$stage <- factor(data$stage)
            data$group <- factor(data$group, label = paste("Group", c(1:getNumberOfGroups())))
            return(data)
        },
        .createOverallDataEnrichment = function() {
            if (!.enrichmentEnabled) {
                return(invisible())
            }

            .data$overallSampleSize <<- rep(NA_real_, nrow(.data))
            .data$overallEvent <<- rep(NA_real_, nrow(.data))
            for (s in levels(.data$subset)) {
                for (g in levels(.data$group)) {
                    indices <- which(.data$subset == s & .data$group == g)
                    .data$overallSampleSize[indices] <<- cumsum(.data$sampleSize[indices])
                    .data$overallEvent[indices] <<- cumsum(.data$event[indices])
                }
            }

            .setDataToVariables()
        },
        .getOverallData = function(dataInput, kMax, stage) {
            "Calculates cumulative values if stage-wise data is available"
            if (is.null(dataInput[["sampleSizes"]])) {
                stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "data input must contain variable 'sampleSizes'")
            }
            if (is.null(dataInput[["events"]])) {
                stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "data input must contain variable 'events'")
            }

            dataInput$overallSampleSizes <- c(
                cumsum(dataInput$sampleSizes[1:stage]),
                rep(NA_real_, kMax - stage)
            )

            dataInput$overallEvents <- c(
                cumsum(dataInput$events[1:stage]),
                rep(NA_real_, kMax - stage)
            )

            return(dataInput)
        },
        .getStageWiseData = function(dataInput, kMax, stage) {
            "Calculates stage-wise values if cumulative data is available"
            if (is.null(dataInput[["overallSampleSizes"]])) {
                stop(
                    C_EXCEPTION_TYPE_MISSING_ARGUMENT,
                    "data input must contain variable 'overallSampleSizes'"
                )
            }
            if (is.null(dataInput[["overallEvents"]])) {
                stop(
                    C_EXCEPTION_TYPE_MISSING_ARGUMENT,
                    "data input must contain variable 'overallEvents'"
                )
            }

            dataInput$sampleSizes <- c(dataInput$overallSampleSizes[1:stage], rep(NA_real_, kMax - stage))
            if (stage > 1) {
                dataInput$sampleSizes[2:stage] <- dataInput$overallSampleSizes[2:stage] -
                    dataInput$overallSampleSizes[1:(stage - 1)]
            }

            dataInput$events <- c(dataInput$overallEvents[1:stage], rep(NA_real_, kMax - stage))
            if (stage > 1) {
                dataInput$events[2:stage] <- dataInput$overallEvents[2:stage] -
                    dataInput$overallEvents[1:(stage - 1)]
            }

            return(dataInput)
        }
    )
)

#'
#' @name DatasetSurvival
#'
#' @title
#' Dataset of Survival Data
#'
#' @description
#' Class for a dataset of survival data.
#'
#' @template field_groups
#' @template field_stages
#' @template field_events
#' @template field_overallEvents
#' @template field_allocationRatios
#' @template field_overallAllocationRatios
#' @template field_logRanks
#' @template field_overallLogRanks
#'
#'
#' @details
#' This object cannot be created directly; better use \code{\link{getDataset}}
#' with suitable arguments to create a dataset of survival data.
#'
#' @include class_core_parameter_set.R
#' @include class_core_plot_settings.R
#' @include f_core_constants.R
#'
#' @keywords internal
#'
#' @importFrom methods new
#'
DatasetSurvival <- setRefClass("DatasetSurvival",
    contains = "Dataset",
    fields = list(
        overallEvents = "numeric",
        overallAllocationRatios = "numeric",
        overallLogRanks = "numeric",
        events = "numeric",
        allocationRatios = "numeric",
        logRanks = "numeric"
    ),
    methods = list(
        getEvent = function(stage, group = 1, subset = NA_character_) {
            return(.data$event[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$event[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getEventsUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$event[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getAllocationRatio = function(stage, group = 1, subset = NA_character_) {
            return(.data$allocationRatio[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getAllocationRatios = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$allocationRatio[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getAllocationRatiosUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$allocationRatio[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getLogRank = function(stage, group = 1, subset = NA_character_) {
            return(.data$logRank[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getLogRanks = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$logRank[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getLogRanksUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$logRank[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getOverallEvent = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallEvent[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallEvent[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallEventsUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallEvent[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getOverallAllocationRatio = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallAllocationRatio[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallAllocationRatios = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallAllocationRatio[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallAllocationRatiosUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallAllocationRatio[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getOverallLogRank = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallLogRank[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallLogRanks = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallLogRank[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallLogRanksUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallLogRank[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        .getAllocationRatioDefaultValues = function(stages, events, logRanks) {
            allocationRatioDefaultValues <- rep(C_ALLOCATION_RATIO_DEFAULT, length(stages))
            indices <- which(is.na(events) | is.na(logRanks))
            allocationRatioDefaultValues[indices] <- NA_real_
            return(allocationRatioDefaultValues)
        },
        .initByDataFrame = function(dataFrame) {
            callSuper(dataFrame)

            if (inherits(.self, "DatasetEnrichmentSurvival")) {
                if (.paramExists(dataFrame, C_KEY_WORDS_EXPECTED_EVENTS) ||
                        .paramExists(dataFrame, C_KEY_WORDS_VARIANCE_EVENTS)) {
                    .inputType <<- "stagewise"

                    events <<- .getValidatedFloatingPointNumbers(
                        .getValuesByParameterName(dataFrame, C_KEY_WORDS_EVENTS),
                        parameterName = "Events"
                    )
                    .validateValues(events, "events")

                    allocationRatios <<- .getValuesByParameterName(
                        dataFrame, C_KEY_WORDS_ALLOCATION_RATIOS,
                        defaultValues = .getAllocationRatioDefaultValues(stages, events, expectedEvents)
                    )
                    .validateValues(allocationRatios, "allocationRatios")
                } else if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_EXPECTED_EVENTS) ||
                        .paramExists(dataFrame, C_KEY_WORDS_OVERALL_VARIANCE_EVENTS)) {
                    .inputType <<- "overall"

                    overallEvents <<- .getValidatedFloatingPointNumbers(
                        .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EVENTS),
                        parameterName = "Cumulative events"
                    )
                    .validateValues(overallEvents, "overallEvents")

                    overallAllocationRatios <<- .getValuesByParameterName(
                        dataFrame,
                        parameterNameVariants = C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
                        defaultValues = .getAllocationRatioDefaultValues(stages, overallEvents, overallExpectedEvents)
                    )
                    .validateValues(overallAllocationRatios, "overallAllocationRatios")
                }

                # stratified enrichment: do nothing more here
            }

            # case: survival, two groups - overall
            else if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_LOG_RANKS)) {
                .inputType <<- "overall"
                overallEvents <<- .getValidatedFloatingPointNumbers(
                    .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EVENTS),
                    parameterName = "Cumulative events"
                )
                .validateValues(overallEvents, "overallEvents")
                if (!.enrichmentEnabled) {
                    .assertValuesAreStrictlyIncreasing(overallEvents, "overallEvents", endingNasAllowed = TRUE)
                }

                overallLogRanks <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_LOG_RANKS)
                .validateValues(overallLogRanks, "overallLogRanks")

                overallAllocationRatios <<- .getValuesByParameterName(
                    dataFrame,
                    parameterNameVariants = C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
                    defaultValues = .getAllocationRatioDefaultValues(stages, overallEvents, overallLogRanks)
                )
                .validateValues(overallAllocationRatios, "overallAllocationRatios")

                .setParameterType("groups", C_PARAM_NOT_APPLICABLE)
            }

            # case: survival, two groups - stage wise
            else if (.paramExists(dataFrame, C_KEY_WORDS_LOG_RANKS)) {
                .inputType <<- "stagewise"
                events <<- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
                    dataFrame, C_KEY_WORDS_EVENTS
                ), parameterName = "Events")
                .validateValues(events, "events")
                if (any(stats::na.omit(events) < 0)) {
                    stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0")
                }

                logRanks <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_LOG_RANKS)
                .validateValues(logRanks, "logRanks")

                allocationRatios <<- .getValuesByParameterName(
                    dataFrame, C_KEY_WORDS_ALLOCATION_RATIOS,
                    defaultValues = .getAllocationRatioDefaultValues(stages, events, logRanks)
                )
                .validateValues(allocationRatios, "allocationRatios")

                .setParameterType("groups", C_PARAM_NOT_APPLICABLE)
            }

            # case: survival, three ore more groups - overall
            else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_LOG_RANKS, 1)) &&
                    .paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_LOG_RANKS, 2))) {
                .inputType <<- "overall"

                numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_OVERALL_LOG_RANKS)

                stages <<- rep(stages, numberOfTreatmentGroups)

                groups <<- integer(0)
                overallEvents <<- numeric(0)
                overallAllocationRatios <<- numeric(0)
                overallLogRanks <<- numeric(0)
                for (group in 1:numberOfTreatmentGroups) {
                    overallEventsTemp <- .getValuesByParameterName(dataFrame,
                        C_KEY_WORDS_OVERALL_EVENTS,
                        suffix = group
                    )
                    .validateValues(overallEventsTemp, paste0("overallEvents", group))
                    if (is.null(dataFrame[["subset"]]) || length(unique(dataFrame[["subset"]])) <= 1) {
                        .assertValuesAreStrictlyIncreasing(overallEventsTemp,
                            paste0("overallEvents", group),
                            endingNasAllowed = TRUE
                        )
                    }
                    overallEvents <<- c(overallEvents, overallEventsTemp)

                    overallLogRanksTemp <- .getValuesByParameterName(
                        dataFrame, C_KEY_WORDS_OVERALL_LOG_RANKS,
                        suffix = group
                    )
                    .validateValues(overallLogRanksTemp, paste0("overallLogRanks", group))
                    overallLogRanks <<- c(overallLogRanks, overallLogRanksTemp)

                    overallAllocationRatiosTemp <- .getValuesByParameterName(
                        dataFrame, C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
                        suffix = group,
                        defaultValues = .getAllocationRatioDefaultValues(
                            overallEventsTemp,
                            overallEventsTemp, overallLogRanksTemp
                        )
                    )
                    .validateValues(overallAllocationRatiosTemp, paste0("overallAllocationRatios", group))
                    overallAllocationRatios <<- c(overallAllocationRatios, overallAllocationRatiosTemp)

                    groups <<- c(groups, rep(as.integer(group), length(overallLogRanksTemp)))
                }
            }

            # case: survival, three ore more groups - stage wise
            else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_LOG_RANKS, 1)) &&
                    .paramExists(dataFrame, paste0(C_KEY_WORDS_LOG_RANKS, 2))) {
                .inputType <<- "stagewise"
                numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_LOG_RANKS)

                stages <<- rep(stages, numberOfTreatmentGroups)

                groups <<- integer(0)
                events <<- numeric(0)
                allocationRatios <<- numeric(0)
                logRanks <<- numeric(0)
                for (group in 1:numberOfTreatmentGroups) {
                    eventsTemp <- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
                        dataFrame, C_KEY_WORDS_EVENTS,
                        suffix = group
                    ), parameterName = "Events")
                    if (any(stats::na.omit(eventsTemp) < 0)) {
                        stop(
                            C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0, but 'events", group, "' = ",
                            .arrayToString(eventsTemp, vectorLookAndFeelEnabled = TRUE)
                        )
                    }
                    events <<- c(events, eventsTemp)

                    logRanksTemp <- .getValuesByParameterName(
                        dataFrame, C_KEY_WORDS_LOG_RANKS,
                        suffix = group
                    )
                    .validateValues(logRanksTemp, paste0("n", group))
                    logRanks <<- c(logRanks, logRanksTemp)

                    allocationRatiosTemp <- .getValuesByParameterName(
                        dataFrame, C_KEY_WORDS_ALLOCATION_RATIOS,
                        suffix = group,
                        defaultValues = .getAllocationRatioDefaultValues(
                            eventsTemp,
                            eventsTemp, logRanksTemp
                        )
                    )
                    .validateValues(allocationRatiosTemp, paste0("allocationRatios", group))
                    allocationRatios <<- c(allocationRatios, allocationRatiosTemp)

                    groups <<- c(groups, rep(as.integer(group), length(eventsTemp)))
                }
            } else {
                stop(
                    C_EXCEPTION_TYPE_RUNTIME_ISSUE, "unable to identify case for ", .getClassName(.self), " and columns ",
                    .arrayToString(colnames(dataFrame))
                )
            }

            if (.inputType == "stagewise") {
                n <- length(events)
                overallEvents <<- rep(NA_real_, n)
                overallAllocationRatios <<- rep(NA_real_, n)
                overallLogRanks <<- rep(NA_real_, n)

                .setParameterType("events", C_PARAM_USER_DEFINED)
                .setParameterType("allocationRatios", C_PARAM_USER_DEFINED)
                if (!inherits(.self, "DatasetEnrichmentSurvival")) {
                    .setParameterType("logRanks", C_PARAM_USER_DEFINED)
                }

                .setParameterType("overallEvents", C_PARAM_GENERATED)
                .setParameterType("overallAllocationRatios", C_PARAM_GENERATED)
                if (!inherits(.self, "DatasetEnrichmentSurvival")) {
                    .setParameterType("overallLogRanks", C_PARAM_GENERATED)
                }

                if (!inherits(.self, "DatasetEnrichmentSurvival")) {
                    .recreateDataFrame()
                    .createOverallData()
                }
            } else {
                n <- length(overallEvents)
                events <<- rep(NA_real_, n)
                allocationRatios <<- rep(NA_real_, n)
                logRanks <<- rep(NA_real_, n)

                .setParameterType("events", C_PARAM_GENERATED)
                .setParameterType("allocationRatios", C_PARAM_GENERATED)
                if (!inherits(.self, "DatasetEnrichmentSurvival")) {
                    .setParameterType("logRanks", C_PARAM_GENERATED)
                }

                .setParameterType("overallEvents", C_PARAM_USER_DEFINED)
                .setParameterType("overallAllocationRatios", C_PARAM_USER_DEFINED)
                if (!inherits(.self, "DatasetEnrichmentSurvival")) {
                    .setParameterType("overallLogRanks", C_PARAM_USER_DEFINED)
                }

                if (!inherits(.self, "DatasetEnrichmentSurvival")) {
                    .recreateDataFrame()
                    .createStageWiseData()
                }
            }
        },
        .recreateDataFrame = function() {
            callSuper()

            if (inherits(.self, "DatasetEnrichmentSurvival")) {
                .data <<- cbind(.data, data.frame(
                    overallEvent = overallEvents,
                    overallExpectedEvent = overallExpectedEvents,
                    overallVarianceEvent = overallVarianceEvents,
                    overallAllocationRatio = overallAllocationRatios,
                    event = events,
                    expectedEvent = expectedEvents,
                    # varianceEvent = varianceEvents, # maybe implemented later
                    allocationRatio = allocationRatios
                ))
            } else {
                .data <<- cbind(.data, data.frame(
                    overallEvent = overallEvents,
                    overallAllocationRatio = overallAllocationRatios,
                    overallLogRank = overallLogRanks,
                    event = events,
                    allocationRatio = allocationRatios,
                    logRank = logRanks
                ))
            }
            .orderDataByStageAndGroup()
            .setDataToVariables()
        },
        .setDataToVariables = function() {
            callSuper()
            overallEvents <<- .data$overallEvent
            overallAllocationRatios <<- .data$overallAllocationRatio
            events <<- .data$event
            allocationRatios <<- .data$allocationRatio
            if (!inherits(.self, "DatasetEnrichmentSurvival")) {
                overallLogRanks <<- .data$overallLogRank
                logRanks <<- .data$logRank
            }
        },
        .fillWithNAs = function(kMax) {
            callSuper(kMax)
            n <- .getNumberOfNAsToAdd(kMax)

            overallEvents <<- c(overallEvents, rep(NA_real_, n))
            overallAllocationRatios <<- c(overallAllocationRatios, rep(NA_real_, n))
            overallLogRanks <<- c(overallLogRanks, rep(NA_real_, n))

            events <<- c(events, rep(NA_real_, n))
            allocationRatios <<- c(allocationRatios, rep(NA_real_, n))
            logRanks <<- c(logRanks, rep(NA_real_, n))

            .recreateDataFrame()
        },
        .trim = function(kMax = NA_integer_) {
            indices <- callSuper(kMax)
            if (length(indices) == 0) {
                return(invisible(FALSE))
            }

            events <<- events[indices]
            allocationRatios <<- allocationRatios[indices]
            logRanks <<- logRanks[indices]

            overallEvents <<- overallEvents[indices]
            overallAllocationRatios <<- overallAllocationRatios[indices]
            overallLogRanks <<- overallLogRanks[indices]

            .recreateDataFrame()

            return(invisible(TRUE))
        },
        getRandomData = function() {
            stop(
                C_EXCEPTION_TYPE_RUNTIME_ISSUE,
                "the function 'DatasetSurvival.getRandomData()' is not implemented yet"
            )
        },
        .getOverallLogRanks = function(logRanks, events, overallEvents,
                kMax = length(logRanks), stage = length(logRanks)) {
            result <- c(logRanks[1:stage], rep(NA_real_, kMax - stage))
            if (stage == 1) {
                return(result)
            }
            for (k in 2:stage) {
                result[k] <-
                    (sqrt(events[k]) * logRanks[k] +
                        sqrt(overallEvents[k - 1]) *
                            result[k - 1]) / sqrt(overallEvents[k])
            }
            return(result)
        },
        .getOverallAllocationRatios = function(allocationRatios, events, overallEvents,
                kMax = length(allocationRatios), stage = length(allocationRatios)) {
            result <- c(
                allocationRatios[1:stage],
                rep(NA_real_, kMax - stage)
            )
            if (stage == 1) {
                return(result)
            }
            for (k in 2:stage) {
                result[k] <- (events[k] *
                    allocationRatios[k] + overallEvents[k - 1] *
                        result[k - 1]) / overallEvents[k]
            }
            return(result)
        },
        .createOverallData = function() {
            .data$overallEvent <<- rep(NA_real_, nrow(.data))
            if (inherits(.self, "DatasetEnrichmentSurvival")) {
                .data$overallExpectedEvent <<- rep(NA_real_, nrow(.data))
                .data$overallVarianceEvent <<- rep(NA_real_, nrow(.data))
            } else {
                .data$overallLogRank <<- rep(NA_real_, nrow(.data))
            }
            .data$overallAllocationRatio <<- rep(NA_real_, nrow(.data))
            subsetLevels <- NA_character_
            if (.enrichmentEnabled) {
                subsetLevels <- levels(.data$subset)
            }
            for (s in subsetLevels) {
                for (g in levels(.data$group)) {
                    if (!is.na(s)) {
                        indices <- which(.data$subset == s & .data$group == g)
                    } else {
                        indices <- which(.data$group == g)
                    }
                    .data$overallEvent[indices] <<- cumsum(.data$event[indices])
                    .data$overallExpectedEvent[indices] <<- cumsum(.data$expectedEvent[indices])
                    # .data$overallVarianceEvent[indices] <<- # maybe implemented later
                    .data$overallLogRank[indices] <<- .getOverallLogRanks(
                        .data$logRank[indices], .data$event[indices], .data$overallEvent[indices]
                    )
                    .data$overallAllocationRatio[indices] <<- .getOverallAllocationRatios(
                        .data$allocationRatio[indices], .data$event[indices], .data$overallEvent[indices]
                    )
                }
            }
            .setDataToVariables()
        },
        .getStageWiseEvents = function(overallEvents) {
            result <- overallEvents
            if (length(result) == 1) {
                return(result)
            }

            kMax <- length(result)
            result[2:kMax] <- overallEvents[2:kMax] - overallEvents[1:(kMax - 1)]
            return(result)
        },
        .getStageWiseLogRanks = function(overallLogRanks, overallEvents) {
            result <- overallLogRanks
            if (length(result) == 1) {
                return(result)
            }

            kMax <- length(result)
            result[2:kMax] <- (sqrt(overallEvents[2:kMax]) *
                overallLogRanks[2:kMax] -
                sqrt(overallEvents[1:(kMax - 1)]) *
                    overallLogRanks[1:(kMax - 1)]) /
                sqrt(overallEvents[2:kMax] - overallEvents[1:(kMax - 1)])
            return(result)
        },
        .getStageWiseAllocationRatios = function(overallAllocationRatios, events, overallEvents) {
            result <- overallAllocationRatios
            if (length(result) == 1) {
                return(result)
            }

            kMax <- length(result)
            result[2:kMax] <- (
                overallAllocationRatios[2:kMax] -
                    overallAllocationRatios[1:(kMax - 1)] *
                        overallEvents[1:(kMax - 1)] / overallEvents[2:kMax]
            ) / (events[2:kMax] / overallEvents[2:kMax])
            if (any(stats::na.omit(result) <= 0)) {
                stop(
                    C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
                    "overall allocation ratios not correctly specified: ",
                    "one or more calculated stage-wise allocation ratios <= 0"
                )
            }
            return(result)
        },
        .createStageWiseData = function() {
            "Calculates stage-wise logrank statistics, events, and allocation ratios if cumulative data is available"

            .data$event <<- rep(NA_real_, nrow(.data))
            if (inherits(.self, "DatasetEnrichmentSurvival")) {
                .data$expectedEvent <<- rep(NA_real_, nrow(.data))
                .data$varianceEvent <<- rep(NA_real_, nrow(.data))
            } else {
                .data$logRank <<- rep(NA_real_, nrow(.data))
            }
            .data$allocationRatio <<- rep(NA_real_, nrow(.data))

            subsetLevels <- NA_character_
            if (.enrichmentEnabled) {
                subsetLevels <- levels(.data$subset)
            }

            for (s in subsetLevels) {
                for (g in levels(.data$group)) {
                    if (!is.na(s)) {
                        indices <- which(.data$subset == s & .data$group == g)
                    } else {
                        indices <- which(.data$group == g)
                    }

                    groupNumber <- ifelse(levels(.data$group) > 1, g, "")
                    if (.enrichmentEnabled) {
                        .assertValuesAreStrictlyIncreasing(.data$overallEvent[indices],
                            paste0("overallEvents", groupNumber, "[subset == \"", s, "\"]"),
                            endingNasAllowed = TRUE
                        )
                    } else {
                        .assertValuesAreStrictlyIncreasing(.data$overallEvent[indices],
                            paste0("overallEvents", groupNumber),
                            endingNasAllowed = TRUE
                        )
                    }

                    .data$event[indices] <<- .getStageWiseEvents(.data$overallEvent[indices])
                    if (inherits(.self, "DatasetEnrichmentSurvival")) {
                        .data$expectedEvent[indices] <<- .getStageWiseEvents(.data$overallExpectedEvent[indices])
                        # .data$varianceEvent[indices] <<- # maybe implemented later
                    } else {
                        .data$logRank[indices] <<- .getStageWiseLogRanks(
                            .data$overallLogRank[indices], .data$overallEvent[indices]
                        )
                    }
                    .data$allocationRatio[indices] <<- .getStageWiseAllocationRatios(
                        .data$overallAllocationRatio[indices],
                        .data$event[indices], .data$overallEvent[indices]
                    )
                }
            }
            .setDataToVariables()
        }
    )
)

#'
#' @rdname DatasetSurvival
#'
#' @keywords internal
#'
DatasetEnrichmentSurvival <- setRefClass("DatasetEnrichmentSurvival",
    contains = "DatasetSurvival",
    fields = list(
        expectedEvents = "numeric",
        varianceEvents = "numeric",
        overallExpectedEvents = "numeric",
        overallVarianceEvents = "numeric"
    ),
    methods = list(
        .initByDataFrame = function(dataFrame) {
            callSuper(dataFrame)

            if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_EXPECTED_EVENTS) ||
                    .paramExists(dataFrame, C_KEY_WORDS_OVERALL_VARIANCE_EVENTS)) {
                if (!.paramExists(dataFrame, C_KEY_WORDS_OVERALL_EXPECTED_EVENTS)) {
                    stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "'overallExpectedEvents' is missing")
                }
                if (!.paramExists(dataFrame, C_KEY_WORDS_OVERALL_VARIANCE_EVENTS)) {
                    stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "'overallVarianceEvents' is missing")
                }

                .inputType <<- "overall"

                overallEvents <<- .getValidatedFloatingPointNumbers(
                    .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EVENTS),
                    parameterName = "Cumulative events"
                )
                .validateValues(overallEvents, "overallEvents")

                overallExpectedEvents <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EXPECTED_EVENTS)
                .validateValues(overallExpectedEvents, "overallExpectedEvents")

                overallVarianceEvents <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_VARIANCE_EVENTS)
                .validateValues(overallVarianceEvents, "overallVarianceEvents")

                overallAllocationRatios <<- .getValuesByParameterName(
                    dataFrame,
                    parameterNameVariants = C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
                    defaultValues = .getAllocationRatioDefaultValues(stages, overallEvents, overallExpectedEvents)
                )
                .validateValues(overallAllocationRatios, "overallAllocationRatios")
            } else if (.paramExists(dataFrame, C_KEY_WORDS_EXPECTED_EVENTS) ||
                    .paramExists(dataFrame, C_KEY_WORDS_VARIANCE_EVENTS)) {
                if (!.paramExists(dataFrame, C_KEY_WORDS_EXPECTED_EVENTS)) {
                    stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "'expectedEvents' is missing")
                }
                if (!.paramExists(dataFrame, C_KEY_WORDS_VARIANCE_EVENTS)) {
                    stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "'varianceEvents' is missing")
                }

                .inputType <<- "stagewise"

                events <<- .getValidatedFloatingPointNumbers(
                    .getValuesByParameterName(dataFrame, C_KEY_WORDS_EVENTS),
                    parameterName = "Events"
                )
                .validateValues(events, "events")

                expectedEvents <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_EXPECTED_EVENTS)
                .validateValues(expectedEvents, "expectedEvents")

                varianceEvents <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_VARIANCE_EVENTS)
                .validateValues(varianceEvents, "varianceEvents")

                allocationRatios <<- .getValuesByParameterName(
                    dataFrame,
                    parameterNameVariants = C_KEY_WORDS_ALLOCATION_RATIOS,
                    defaultValues = .getAllocationRatioDefaultValues(stages, events, expectedEvents)
                )
                .validateValues(allocationRatios, "allocationRatios")
            }

            .setParameterType("groups", C_PARAM_NOT_APPLICABLE)

            if (.inputType == "stagewise") {
                n <- length(events)
                overallExpectedEvents <<- rep(NA_real_, n)
                overallVarianceEvents <<- rep(NA_real_, n)

                .setParameterType("events", C_PARAM_USER_DEFINED)
                .setParameterType("allocationRatios", C_PARAM_USER_DEFINED)
                .setParameterType("expectedEvents", C_PARAM_USER_DEFINED)
                .setParameterType("varianceEvents", C_PARAM_USER_DEFINED)

                .setParameterType("overallEvents", C_PARAM_GENERATED)
                .setParameterType("overallAllocationRatios", C_PARAM_GENERATED)
                .setParameterType("overallExpectedEvents", C_PARAM_GENERATED)
                .setParameterType("overallVarianceEvents", C_PARAM_GENERATED)

                .recreateDataFrame()
                .createOverallData()
            } else {
                n <- length(overallEvents)
                expectedEvents <<- rep(NA_real_, n)
                varianceEvents <<- rep(NA_real_, n)

                .setParameterType("events", C_PARAM_GENERATED)
                .setParameterType("allocationRatios", C_PARAM_GENERATED)
                .setParameterType("expectedEvents", C_PARAM_GENERATED)
                .setParameterType("varianceEvents", C_PARAM_GENERATED)

                .setParameterType("overallEvents", C_PARAM_USER_DEFINED)
                .setParameterType("overallAllocationRatios", C_PARAM_USER_DEFINED)
                .setParameterType("overallExpectedEvents", C_PARAM_USER_DEFINED)
                .setParameterType("overallVarianceEvents", C_PARAM_USER_DEFINED)

                .recreateDataFrame()
                .createStageWiseData()
            }
        },
        .getVisibleFieldNames = function() {
            visibleFieldNames <- callSuper()
            visibleFieldNames <- visibleFieldNames[!(visibleFieldNames %in% c("logRanks", "overallLogRanks"))]
            return(visibleFieldNames)
        },
        .setDataToVariables = function() {
            callSuper()
            overallExpectedEvents <<- .data$overallExpectedEvent
            overallVarianceEvents <<- .data$overallVarianceEvent
            expectedEvents <<- .data$expectedEvent
        },
        getOverallExpectedEvent = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallExpectedEvent[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallExpectedEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallExpectedEvent[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallExpectedEventsUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallExpectedEvent[.getIndices(stage = c(1:to), group = group, subset = subset)])
        },
        getOverallVarianceEvent = function(stage, group = 1, subset = NA_character_) {
            return(.data$overallVarianceEvent[.getIndices(stage = stage, group = group, subset = subset)])
        },
        getOverallVarianceEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
            return(.data$overallVarianceEvent[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
        },
        getOverallVarianceEventsUpTo = function(to, group = 1, subset = NA_character_) {
            return(.data$overallVarianceEvent[.getIndices(stage = c(1:to), group = group, subset = subset)])
        }
    )
)

.isFloatingPointSampleSize <- function(object, param) {
    values <- object[[param]]
    if (is.null(values)) {
        return(FALSE)
    }

    values <- na.omit(values)
    if (length(values) == 0) {
        return(FALSE)
    }

    if (any(floor(values) != values)) {
        return(TRUE)
    }

    return(FALSE)
}

.getMaxDigits <- function(values) {
    values <- na.omit(values)
    if (length(values) == 0) {
        return(0)
    }

    values <- trimws(format(values, scientific = FALSE, digits = 15))
    values <- gsub("^\\d*\\.", "", values)
    values <- gsub("\\D", "", values)
    max(nchar(values))
}


#'
#' @title
#' Dataset Summary
#'
#' @description
#' Displays a summary of \code{\link{Dataset}} object.
#'
#' @param object A \code{\link{Dataset}} object.
#' @inheritParams param_digits
#' @inheritParams param_three_dots
#'
#' @details
#' Summarizes the parameters and results of a dataset.
#'
#' @template details_summary
#'
#' @template return_object_summary
#' @template how_to_get_help_for_generics
#'
#' @export
#'
#' @keywords internal
#'
summary.Dataset <- function(object, ..., type = 1, digits = NA_integer_) {
    .warnInCaseOfUnknownArguments(functionName = "summary", ...)

    if (type == 1 && inherits(object, "SummaryFactory")) {
        return(object)
    }

    if (type != 1) {
        return(summary.ParameterSet(object, type = type, digits = digits, ...))
    }

    intervalFormat <- getOption("rpact.summary.intervalFormat", "[%s; %s]")
    .assertIsValidSummaryIntervalFormat(intervalFormat)

    summaryFactory <- SummaryFactory(object = object, intervalFormat = intervalFormat)

    s <- object$.toString()

    kMax <- object$getNumberOfStages()
    summaryFactory$title <- .firstCharacterToUpperCase(s)

    numberOfGroups <- object$getNumberOfGroups()

    if (numberOfGroups == 1) {
        groups <- "one sample"
    } else if (numberOfGroups == 2) {
        groups <- c("one treatment", "one control group")
        if (object$isDatasetSurvival()) {
            groups <- paste0(groups, c(" (1)", " (2)"))
        }
    } else {
        groups <- c(paste0(
            .integerToWrittenNumber(numberOfGroups - 1),
            " treatment groups"
        ), "one control group")
        if (object$isDatasetSurvival()) {
            groups <- paste0(groups, c(
                paste0(" (", .arrayToString(1:(numberOfGroups - 1)), ")"),
                paste0(" (", numberOfGroups, ")")
            ))
        }
    }

    prefix <- ""
    if (object$isDatasetMeans()) {
        prefix <- "the sample sizes, means, and standard deviations of "
    } else if (object$isDatasetRates()) {
        prefix <- "the sample sizes and events of "
    } else if (object$isDatasetSurvival()) {
        prefix <- "the events and log rank statistics of the comparison of "
    }
    if (numberOfGroups > 1) {
        prefix <- paste0(prefix, "\n")
    }
    header <- paste0(
        "The dataset contains ", prefix,
        paste0(groups, collapse = ifelse(object$isDatasetSurvival(), " with ", " and "))
    )
    if (object$.enrichmentEnabled) {
        header <- paste0(header, ". The data will be analyzed ", ifelse(object$isStratified(), "", "non-"), "stratified")
    }
    if (kMax > 1) {
        header <- paste0(
            header, ".\nThe total number of looks is ", .integerToWrittenNumber(kMax),
            "; stage-wise and cumulative data are included"
        )
    }
    header <- paste0(header, ".")
    summaryFactory$header <- header

    digitSettings <- .getSummaryDigits(digits)
    digits <- digitSettings$digits
    digitsSampleSize <- 0
    digitsGeneral <- digitSettings$digitsGeneral
    digitsProbabilities <- digitSettings$digitsProbabilities

    paramsToCheck <- character(0)
    if (object$isDatasetMeans() || object$isDatasetRates()) {
        paramsToCheck <- c(paramsToCheck, "sampleSizes")
        if (kMax > 1) {
            paramsToCheck <- c(paramsToCheck, "overallSampleSizes")
        }
    } else if (object$isDatasetRates() || object$isDatasetSurvival()) {
        paramsToCheck <- c(paramsToCheck, "events")
        if (kMax > 1) {
            paramsToCheck <- c(paramsToCheck, "overallEvents")
        }
    }
    if (length(paramsToCheck) > 0) {
        for (param in paramsToCheck) {
            if (.isFloatingPointSampleSize(object, param)) {
                digitsSampleSize <- max(digitsSampleSize, .getMaxDigits(object[[param]]))
            }
        }
        digitsSampleSize <- min(digitsSampleSize, digits)
    }

    summaryFactory$addItem("Stage", object$stages)

    if (numberOfGroups > 1) {
        groupNumbers <- object$groups
        if (object$isDatasetSurvival()) {
            groupNumbers <- paste0(object$groups, " vs ", numberOfGroups)
            summaryFactory$addItem("Comparison", groupNumbers)
        } else {
            summaryFactory$addItem("Group", groupNumbers)
        }
    }

    if (object$.enrichmentEnabled) {
        summaryFactory$addItem("Subset", object$subsets)
    }

    parameterCaptionPrefix <- ifelse(kMax == 1, "", "Stage-wise ")

    if (object$isDatasetMeans() || object$isDatasetRates()) {
        summaryFactory$addParameter(object,
            parameterName = "sampleSizes",
            parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "sample size"),
            roundDigits = digitsSampleSize
        )
        if (kMax > 1) {
            summaryFactory$addParameter(object,
                parameterName = "overallSampleSizes",
                parameterCaption = "Cumulative sample size", roundDigits = digitsSampleSize
            )
        }
    }

    if (object$isDatasetMeans()) {
        summaryFactory$addParameter(object,
            parameterName = "means",
            parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "mean"),
            roundDigits = digitsGeneral
        )
        if (kMax > 1) {
            summaryFactory$addParameter(object,
                parameterName = "overallMeans",
                parameterCaption = "Cumulative mean", roundDigits = digitsGeneral
            )
        }
        summaryFactory$addParameter(object,
            parameterName = "stDevs",
            parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "standard deviation"),
            roundDigits = digitsGeneral
        )
        if (kMax > 1) {
            summaryFactory$addParameter(object,
                parameterName = "overallStDevs",
                parameterCaption = "Cumulative standard deviation", roundDigits = digitsGeneral
            )
        }
    } else if (object$isDatasetRates()) {
        summaryFactory$addParameter(object,
            parameterName = "events",
            parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "number of events"),
            roundDigits = digitsSampleSize
        )
        if (kMax > 1) {
            summaryFactory$addParameter(object,
                parameterName = "overallEvents",
                parameterCaption = "Cumulative number of events", roundDigits = digitsSampleSize
            )
        }
    } else if (object$isDatasetSurvival()) {
        summaryFactory$addParameter(object,
            parameterName = "events",
            parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "number of events"),
            roundDigits = digitsSampleSize
        )
        if (kMax > 1) {
            summaryFactory$addParameter(object,
                parameterName = "overallEvents",
                parameterCaption = "Cumulative number of events", roundDigits = digitsSampleSize
            )
        }
        summaryFactory$addParameter(object,
            parameterName = "logRanks",
            parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "log rank statistic"),
            roundDigits = digitsGeneral
        )
        if (kMax > 1) {
            summaryFactory$addParameter(object,
                parameterName = "overallLogRanks",
                parameterCaption = "Cumulative log rank statistic", roundDigits = digitsGeneral
            )
        }
        if (!any(is.na(object$allocationRatios)) && any(object$allocationRatios != 1)) {
            summaryFactory$addParameter(object,
                parameterName = "allocationRatios",
                parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "allocation ratio"),
                roundDigits = digitsGeneral
            )
            if (kMax > 1) {
                summaryFactory$addParameter(object,
                    parameterName = "overallAllocationRatios",
                    parameterCaption = "Cumulative allocation ratio", roundDigits = digitsGeneral
                )
            }
        }
    }

    return(summaryFactory)
}

.getDatasetArgumentsRCodeLines <- function(x, complete = FALSE, digits = 4) {
    m <- getWideFormat(x)
    lines <- character(0)
    paramNames <- colnames(m)
    if (!complete) {
        if (x$.inputType == "stagewise") {
            paramNames <- paramNames[!grepl("^overall", paramNames)]
        } else {
            paramNames <- paramNames[grepl("^(stage|group|subset|overall)", paramNames)]
        }
    }

    for (paramName in paramNames) {
        encapsulate <- grepl("^subset", paramName)
        if (!encapsulate || isTRUE(x$.enrichmentEnabled)) {
            values <- m[[paramName]]
            if (!encapsulate && is.numeric(values) && !is.null(digits) && length(digits) == 1 && !is.na(digits)) {
                values <- round(values, digits = digits)
            }
            lines <- c(lines, paste0(paramName, " = ", .arrayToString(values,
                vectorLookAndFeelEnabled = TRUE, encapsulate = encapsulate, digits = NA_integer_
            )))
        }
    }

    return(lines)
}

#'
#' @title
#' Print Dataset Values
#'
#' @description
#' \code{print} prints its \code{\link{Dataset}} argument and returns it invisibly (via \code{invisible(x)}).
#'
#' @param x A \code{\link{Dataset}} object.
#' @param markdown If \code{TRUE}, the output will be created in Markdown.
#' @param output A character defining the output type, default is "list".
#' @inheritParams param_three_dots
#'
#' @details
#' Prints the dataset.
#'
#' @export
#'
#' @keywords internal
#'
print.Dataset <- function(x, ..., markdown = FALSE, output = c("list", "long", "wide", "r", "rComplete")) {
    fCall <- match.call(expand.dots = FALSE)
    datasetName <- deparse(fCall$x)

    output <- match.arg(output)

    if (markdown) {
        if (output != "list") {
            warning("'output' (\"", output, "\") will be ignored ",
                "because only \"list\" is supported yet if markdown is enabled",
                call. = FALSE
            )
        }

        x$.catMarkdownText()
        return(invisible(x))
    }

    if (output == "long") {
        m <- getLongFormat(x)
        m <- prmatrix(m, rowlab = rep("", nrow(m)))
        print(m, quote = FALSE, right = FALSE)
        return(invisible(x))
    } else if (output == "wide") {
        m <- getWideFormat(x)
        m <- prmatrix(m, rowlab = rep("", nrow(m)))
        print(m, quote = FALSE, right = FALSE)
        return(invisible(x))
    } else if (output %in% c("r", "rComplete")) {
        lines <- .getDatasetArgumentsRCodeLines(x, complete = (output == "rComplete"))
        lines <- paste0("\t", lines)

        if (is.null(datasetName) || length(datasetName) != 1 || is.na(datasetName)) {
            datasetName <- "dataInput"
        }

        cat(datasetName, " <- getDataset(\n", sep = "")
        cat(paste0(lines, collapse = ",\n"), "\n")
        cat(")\n")
        return(invisible(x))
    }

    x$show()
    return(invisible(x))
}

Try the rpact package in your browser

Any scripts or data that you put into this service are public.

rpact documentation built on July 9, 2023, 6:30 p.m.