Nothing
## |
## | *Dataset classes*
## |
## | This file is part of the R package rpact:
## | Confirmatory Adaptive Clinical Trial Design and Analysis
## |
## | Author: Gernot Wassmer, PhD, and Friedrich Pahlke, PhD
## | Licensed under "GNU Lesser General Public License" version 3
## | License text can be found here: https://www.r-project.org/Licenses/LGPL-3
## |
## | RPACT company website: https://www.rpact.com
## | rpact package website: https://www.rpact.org
## |
## | Contact us for information about our services: info@rpact.com
## |
## | File version: $Revision: 7139 $
## | Last changed: $Date: 2023-06-28 08:15:31 +0200 (Mi, 28 Jun 2023) $
## | Last changed by: $Author: pahlke $
## |
#' @include f_analysis_utilities.R
#' @include f_core_utilities.R
#' @include f_object_r_code.R
NULL
C_KEY_WORDS_GROUPS <- c("group", "groups")
C_KEY_WORDS_STAGES <- c("stage", "stages")
C_KEY_WORDS_SUBSETS <- c("subset", "subsets")
C_KEY_WORDS_SAMPLE_SIZES <- .getAllParameterNameVariants(c("n", "N", "sampleSizes", "sampleSize"))
C_KEY_WORDS_MEANS <- c("means", "mean")
C_KEY_WORDS_ST_DEVS <- .getAllParameterNameVariants(c("stDevs", "stDev", "stds", "sd"))
C_KEY_WORDS_EVENTS <- c("event", "events")
C_KEY_WORDS_OVERALL_EVENTS <- .getAllParameterNameVariants(c("overallEvents", "overallEvent"))
C_KEY_WORDS_EXPECTED_EVENTS <- .getAllParameterNameVariants(c("expectedEvents", "expectedEvent"))
C_KEY_WORDS_VARIANCE_EVENTS <- .getAllParameterNameVariants(c("varianceEvents", "varianceEvent"))
C_KEY_WORDS_OVERALL_EXPECTED_EVENTS <- .getAllParameterNameVariants(c("overallExpectedEvents", "overallExpectedEvent"))
C_KEY_WORDS_OVERALL_VARIANCE_EVENTS <- .getAllParameterNameVariants(c("overallVarianceEvents", "overallVarianceEvent"))
C_KEY_WORDS_OVERALL_SAMPLE_SIZES <- .getAllParameterNameVariants(c(
"overallN", "overallSampleSizes", "overallSampleSize"
))
C_KEY_WORDS_OVERALL_MEANS <- .getAllParameterNameVariants(c("overallMeans", "overallMean"))
C_KEY_WORDS_OVERALL_ST_DEVS <- .getAllParameterNameVariants(c(
"overallStDevs", "overallStDev", "overall.sd", "overall_sd"
))
C_KEY_WORDS_ALLOCATION_RATIOS <- .getAllParameterNameVariants(c("ar", "allocationRatios", "allocationRatio"))
C_KEY_WORDS_LOG_RANKS <- .getAllParameterNameVariants(c("logRanks", "logRank", "lr"))
C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS <- .getAllParameterNameVariants(c(
"oar", "car", "overallAllocationRatios", "overallAllocationRatio"
))
C_KEY_WORDS_OVERALL_LOG_RANKS <- .getAllParameterNameVariants(c("olr", "clr", "overallLogRanks", "overallLogRank"))
C_KEY_WORDS <- c(
C_KEY_WORDS_GROUPS,
C_KEY_WORDS_STAGES,
C_KEY_WORDS_SUBSETS,
C_KEY_WORDS_SAMPLE_SIZES,
C_KEY_WORDS_MEANS,
C_KEY_WORDS_ST_DEVS,
C_KEY_WORDS_EVENTS,
C_KEY_WORDS_OVERALL_EVENTS,
C_KEY_WORDS_OVERALL_SAMPLE_SIZES,
C_KEY_WORDS_OVERALL_MEANS,
C_KEY_WORDS_OVERALL_ST_DEVS,
C_KEY_WORDS_ALLOCATION_RATIOS,
C_KEY_WORDS_LOG_RANKS,
C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
C_KEY_WORDS_OVERALL_LOG_RANKS
)
#' @title
#' Read Dataset
#'
#' @description
#' Reads a data file and returns it as dataset object.
#'
#' @param file A CSV file (see \code{\link[utils]{read.table}}).
#' @param header A logical value indicating whether the file contains the names of
#' the variables as its first line.
#' @param sep The field separator character. Values on each line of the file are separated
#' by this character. If sep = "," (the default for \code{readDataset}) the separator is a comma.
#' @param quote The set of quoting characters. To disable quoting altogether, use
#' quote = "". See scan for the behavior on quotes embedded in quotes. Quoting is only
#' considered for columns read as character, which is all of them unless \code{colClasses} is specified.
#' @param dec The character used in the file for decimal points.
#' @param fill logical. If \code{TRUE} then in case the rows have unequal length, blank fields
#' are implicitly added.
#' @param comment.char character: a character vector of length one containing a single character
#' or an empty string. Use "" to turn off the interpretation of comments altogether.
#' @param fileEncoding character string: if non-empty declares the encoding used on a file
#' (not a connection) so the character data can be re-encoded.
#' See the 'Encoding' section of the help for file, the 'R Data Import/Export Manual' and 'Note'.
#' @param ... Further arguments to be passed to code{\link[utils]{read.table}}.
#'
#' @details
#' \code{readDataset} is a wrapper function that uses \code{\link[utils]{read.table}} to read the
#' CSV file into a data frame, transfers it from long to wide format with \code{\link[stats]{reshape}}
#' and puts the data to \code{\link[=getDataset]{getDataset()}}.
#'
#' @template return_object_dataset
#'
#' @seealso
#' \itemize{
#' \item \code{\link[=readDatasets]{readDatasets()}} for reading multiple datasets,
#' \item \code{\link[=writeDataset]{writeDataset()}} for writing a single dataset,
#' \item \code{\link[=writeDatasets]{writeDatasets()}} for writing multiple datasets.
#' }
#'
#' @examples
#' \dontrun{
#' dataFileRates <- system.file("extdata",
#' "dataset_rates.csv",
#' package = "rpact"
#' )
#' if (dataFileRates != "") {
#' datasetRates <- readDataset(dataFileRates)
#' datasetRates
#' }
#'
#' dataFileMeansMultiArm <- system.file("extdata",
#' "dataset_means_multi-arm.csv",
#' package = "rpact"
#' )
#' if (dataFileMeansMultiArm != "") {
#' datasetMeansMultiArm <- readDataset(dataFileMeansMultiArm)
#' datasetMeansMultiArm
#' }
#'
#' dataFileRatesMultiArm <- system.file("extdata",
#' "dataset_rates_multi-arm.csv",
#' package = "rpact"
#' )
#' if (dataFileRatesMultiArm != "") {
#' datasetRatesMultiArm <- readDataset(dataFileRatesMultiArm)
#' datasetRatesMultiArm
#' }
#'
#' dataFileSurvivalMultiArm <- system.file("extdata",
#' "dataset_survival_multi-arm.csv",
#' package = "rpact"
#' )
#' if (dataFileSurvivalMultiArm != "") {
#' datasetSurvivalMultiArm <- readDataset(dataFileSurvivalMultiArm)
#' datasetSurvivalMultiArm
#' }
#' }
#'
#' @export
#'
readDataset <- function(file, ..., header = TRUE, sep = ",", quote = "\"",
dec = ".", fill = TRUE, comment.char = "", fileEncoding = "UTF-8") {
if (!file.exists(file)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "the file '", file, "' does not exist")
}
data <- utils::read.table(
file = file, header = header, sep = sep,
quote = quote, dec = dec, fill = fill, fileEncoding = fileEncoding, ...
)
dataWide <- stats::reshape(data = data, direction = "wide", idvar = "stages", timevar = "groups")
colnames(dataWide) <- gsub("\\.", "", colnames(dataWide))
return(getDataset(dataWide))
}
#' @title
#' Write Dataset
#'
#' @description
#' Writes a dataset to a CSV file.
#'
#' @param dataset A dataset.
#' @param file The target CSV file.
#' @param append Logical. Only relevant if file is a character string.
#' If \code{TRUE}, the output is appended to the file. If \code{FALSE}, any existing file of the name is destroyed.
#' @param sep The field separator character. Values on each line of the file are separated
#' by this character. If sep = "," (the default for \code{writeDataset}) the separator is a comma.
#' @param quote The set of quoting characters. To disable quoting altogether, use
#' quote = "". See scan for the behavior on quotes embedded in quotes. Quoting is only
#' considered for columns read as character, which is all of them unless \code{colClasses} is specified.
#' @param dec The character used in the file for decimal points.
#' @param eol The character(s) to print at the end of each line (row).
#' @param na The string to use for missing values in the data.
#' @param row.names Either a logical value indicating whether the row names of \code{dataset} are
#' to be written along with \code{dataset}, or a character vector of row names to be written.
#' @param col.names Either a logical value indicating whether the column names of \code{dataset} are
#' to be written along with \code{dataset}, or a character vector of column names to be written.
#' See the section on 'CSV files' for the meaning of \code{col.names = NA}.
#' @param qmethod A character string specifying how to deal with embedded double quote characters
#' when quoting strings. Must be one of "double" (default in \code{writeDataset}) or "escape".
#' @param fileEncoding Character string: if non-empty declares the encoding used on a file
#' (not a connection) so the character data can be re-encoded.
#' See the 'Encoding' section of the help for file, the 'R Data Import/Export Manual' and 'Note'.
#' @param ... Further arguments to be passed to \code{\link[utils]{write.table}}.
#'
#' @details
#' \code{\link[=writeDataset]{writeDataset()}} is a wrapper function that coerces the dataset to a data frame and uses \cr
#' \code{\link[utils]{write.table}} to write it to a CSV file.
#'
#' @seealso
#' \itemize{
#' \item \code{\link[=writeDatasets]{writeDatasets()}} for writing multiple datasets,
#' \item \code{\link[=readDataset]{readDataset()}} for reading a single dataset,
#' \item \code{\link[=readDatasets]{readDatasets()}} for reading multiple datasets.
#' }
#'
#' @examples
#' \dontrun{
#' datasetOfRates <- getDataset(
#' n1 = c(11, 13, 12, 13),
#' n2 = c(8, 10, 9, 11),
#' events1 = c(10, 10, 12, 12),
#' events2 = c(3, 5, 5, 6)
#' )
#' writeDataset(datasetOfRates, "dataset_rates.csv")
#' }
#'
#' @export
#'
writeDataset <- function(dataset, file, ..., append = FALSE, quote = TRUE, sep = ",",
eol = "\n", na = "NA", dec = ".", row.names = TRUE,
col.names = NA, qmethod = "double",
fileEncoding = "UTF-8") {
.assertIsDataset(dataset)
x <- as.data.frame(dataset, niceColumnNamesEnabled = FALSE)
utils::write.table(
x = x, file = file, append = append, quote = quote, sep = sep,
eol = eol, na = na, dec = dec, row.names = FALSE,
col.names = TRUE, qmethod = qmethod,
fileEncoding = fileEncoding
)
}
#' @title
#' Read Multiple Datasets
#'
#' @description
#' Reads a data file and returns it as a list of dataset objects.
#'
#' @param file A CSV file (see \code{\link[utils]{read.table}}).
#' @param header A logical value indicating whether the file contains the names of
#' the variables as its first line.
#' @param sep The field separator character. Values on each line of the file are separated
#' by this character. If sep = "," (the default for \code{readDatasets}) the separator is a comma.
#' @param quote The set of quoting characters. To disable quoting altogether, use
#' quote = "". See scan for the behavior on quotes embedded in quotes. Quoting is only
#' considered for columns read as character, which is all of them unless \code{colClasses} is specified.
#' @param dec The character used in the file for decimal points.
#' @param fill logical. If \code{TRUE} then in case the rows have unequal length, blank fields
#' are implicitly added.
#' @param comment.char character: a character vector of length one containing a single character
#' or an empty string. Use "" to turn off the interpretation of comments altogether.
#' @param fileEncoding character string: if non-empty declares the encoding used on a file
#' (not a connection) so the character data can be re-encoded.
#' See the 'Encoding' section of the help for file, the 'R Data Import/Export Manual' and 'Note'.
#' @param ... Further arguments to be passed to \code{\link[utils]{read.table}}.
#'
#' @details
#' Reads a file that was written by \code{\link[=writeDatasets]{writeDatasets()}} before.
#'
#' @return Returns a \code{\link[base]{list}} of \code{\link{Dataset}} objects.
#'
#' @seealso
#' \itemize{
#' \item \code{\link[=readDataset]{readDataset()}} for reading a single dataset,
#' \item \code{\link[=writeDatasets]{writeDatasets()}} for writing multiple datasets,
#' \item \code{\link[=writeDataset]{writeDataset()}} for writing a single dataset.
#' }
#'
#' @examples
#' dataFile <- system.file("extdata", "datasets_rates.csv", package = "rpact")
#' if (dataFile != "") {
#' datasets <- readDatasets(dataFile)
#' datasets
#' }
#' @export
#'
readDatasets <- function(file, ..., header = TRUE, sep = ",", quote = "\"",
dec = ".", fill = TRUE, comment.char = "", fileEncoding = "UTF-8") {
if (!file.exists(file)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "the file '", file, "' does not exist")
}
data <- utils::read.table(
file = file, header = header, sep = sep,
quote = quote, dec = dec, fill = fill, fileEncoding = fileEncoding, ...
)
if (is.null(data[["datasetId"]])) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "data file must contain the column 'datasetId'")
}
datasets <- list()
for (datasetId in unique(data$datasetId)) {
subData <- data[data$datasetId == datasetId, ]
dataFrame <- subset(subData, select = -datasetId)
description <- NA_character_
if (!is.null(dataFrame[["description"]])) {
description <- as.character(dataFrame$description[1])
dataFrame <- subset(dataFrame, select = -description)
}
if (length(unique(subData$groups)) == 2) {
dataWide <- stats::reshape(dataFrame, direction = "wide", idvar = "stages", timevar = "groups")
colnames(dataWide) <- gsub("\\.", "", colnames(dataWide))
dataset <- getDataset(dataWide)
} else {
dataset <- getDataset(dataFrame)
}
dataset$setDescription(description)
datasets <- c(datasets, dataset)
}
return(datasets)
}
#' @title
#' Write Multiple Datasets
#'
#' @description
#' Writes a list of datasets to a CSV file.
#'
#' @param datasets A list of datasets.
#' @param file The target CSV file.
#' @param append Logical. Only relevant if file is a character string.
#' If \code{TRUE}, the output is appended to the file. If FALSE, any existing file of the name is destroyed.
#' @param sep The field separator character. Values on each line of the file are separated
#' by this character. If sep = "," (the default for \code{writeDatasets}) the separator is a comma.
#' @param quote The set of quoting characters. To disable quoting altogether, use
#' quote = "". See scan for the behavior on quotes embedded in quotes. Quoting is only
#' considered for columns read as character, which is all of them unless \code{colClasses} is specified.
#' @param dec The character used in the file for decimal points.
#' @param eol The character(s) to print at the end of each line (row).
#' @param na The string to use for missing values in the data.
#' @param row.names Either a logical value indicating whether the row names of \code{dataset} are
#' to be written along with \code{dataset}, or a character vector of row names to be written.
#' @param col.names Either a logical value indicating whether the column names of \code{dataset} are
#' to be written along with \code{dataset}, or a character vector of column names to be written.
#' See the section on 'CSV files' for the meaning of \code{col.names = NA}.
#' @param qmethod A character string specifying how to deal with embedded double quote characters
#' when quoting strings. Must be one of "double" (default in \code{writeDatasets}) or "escape".
#' @param fileEncoding Character string: if non-empty declares the encoding used on a file
#' (not a connection) so the character data can be re-encoded.
#' See the 'Encoding' section of the help for file, the 'R Data Import/Export Manual' and 'Note'.
#' @param ... Further arguments to be passed to \code{\link[utils]{write.table}}.
#'
#' @details
#' The format of the CSV file is optimized for usage of \code{\link[=readDatasets]{readDatasets()}}.
#'
#' @seealso
#' \itemize{
#' \item \code{\link[=writeDataset]{writeDataset()}} for writing a single dataset,
#' \item \code{\link[=readDatasets]{readDatasets()}} for reading multiple datasets,
#' \item \code{\link[=readDataset]{readDataset()}} for reading a single dataset.
#' }
#'
#' @examples
#' \dontrun{
#' d1 <- getDataset(
#' n1 = c(11, 13, 12, 13),
#' n2 = c(8, 10, 9, 11),
#' events1 = c(10, 10, 12, 12),
#' events2 = c(3, 5, 5, 6)
#' )
#' d2 <- getDataset(
#' n1 = c(9, 13, 12, 13),
#' n2 = c(6, 10, 9, 11),
#' events1 = c(10, 10, 12, 12),
#' events2 = c(4, 5, 5, 6)
#' )
#' datasets <- list(d1, d2)
#' writeDatasets(datasets, "datasets_rates.csv")
#' }
#'
#' @export
#'
writeDatasets <- function(datasets, file, ..., append = FALSE, quote = TRUE, sep = ",",
eol = "\n", na = "NA", dec = ".", row.names = TRUE,
col.names = NA, qmethod = "double",
fileEncoding = "UTF-8") {
if (!is.list(datasets)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'datasets' must be a list of datasets")
}
if (length(datasets) == 0) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'datasets' is empty")
}
datasetType <- NA_character_
dataFrames <- NULL
for (i in 1:length(datasets)) {
dataset <- datasets[[i]]
.assertIsDataset(dataset)
if (is.na(datasetType)) {
datasetType <- .getClassName(dataset)
} else if (.getClassName(dataset) != datasetType) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all datasets must have the same type")
}
data <- as.data.frame(dataset, niceColumnNamesEnabled = FALSE)
datasetId <- ifelse(!is.null(dataset$getId()) && !is.na(dataset$getId()), dataset$getId(), i)
data <- cbind(rep(datasetId, nrow(data)), data)
colnames(data)[1] <- "datasetId"
if (!is.null(dataset$getDescription()) && !is.na(dataset$getDescription())) {
data <- cbind(data, rep(dataset$getDescription(), nrow(data)))
colnames(data)[ncol(data)] <- "description"
}
if (is.null(dataFrames)) {
dataFrames <- data
} else {
dataFrames <- rbind(dataFrames, data)
}
}
if (is.null(dataFrames)) {
stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "failed to bind datasets")
}
utils::write.table(
x = dataFrames, file = file, append = append, quote = quote, sep = sep,
eol = eol, na = na, dec = dec, row.names = FALSE,
col.names = TRUE, qmethod = qmethod,
fileEncoding = fileEncoding
)
}
.getDataset <- function(..., floatingPointNumbersEnabled = FALSE) {
args <- list(...)
if (length(args) == 0) {
stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "data.frame, data vectors, or datasets expected")
}
if (.optionalArgsContainsDatasets(...)) {
if (length(args) == 1) {
return(args[[1]])
}
design <- .getDesignFromArgs(...)
if (length(args) == 2 && !is.null(design)) {
dataset <- .getDatasetFromArgs(...)
if (!is.null(dataset)) {
dataset <- dataset$copy(shallow = FALSE)
dataset$.design <- design
return(dataset)
}
}
return(.getEnrichmentDatasetFromArgs(...))
}
exampleType <- args[["example"]]
if (!is.null(exampleType) && exampleType %in% c("means", "rates", "survival")) {
return(.getDatasetExample(exampleType = exampleType))
}
if (length(args) == 1 && !is.null(args[[1]]) && is.list(args[[1]]) && !is.data.frame(args[[1]])) {
return(.getDatasetMeansFromModelsByStage(emmeansResults = args[[1]]))
}
emmeansResults <- .getDatasetMeansModelObjectsList(args)
if (!is.null(emmeansResults) && length(emmeansResults) > 0) {
return(.getDatasetMeansFromModelsByStage(emmeansResults = emmeansResults))
}
dataFrame <- .getDataFrameFromArgs(...)
design <- .getDesignFromArgs(...)
if (is.null(dataFrame)) {
args <- .removeDesignFromArgs(args)
paramNames <- names(args)
paramNames <- paramNames[paramNames != ""]
numberOfParameters <- length(args)
if (numberOfParameters > 0 && names(args)[1] == "" && .isTrialDesign(args[[1]])) {
numberOfParameters <- numberOfParameters - 1
}
if (length(paramNames) != numberOfParameters) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all parameters must be named")
}
if (length(paramNames) != length(unique(paramNames))) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "the parameter names must be unique")
}
dataFrame <- .createDataFrame(...)
}
enrichmentEnabled <- .isDataObjectEnrichment(...)
if (.isDataObjectMeans(...)) {
return(DatasetMeans(
dataFrame = dataFrame,
floatingPointNumbersEnabled = floatingPointNumbersEnabled,
enrichmentEnabled = enrichmentEnabled,
.design = design
))
}
if (.isDataObjectRates(...)) {
return(DatasetRates(
dataFrame = dataFrame,
floatingPointNumbersEnabled = floatingPointNumbersEnabled,
enrichmentEnabled = enrichmentEnabled,
.design = design
))
}
if (.isDataObjectNonStratifiedEnrichmentSurvival(...)) {
return(DatasetEnrichmentSurvival(
dataFrame = dataFrame,
floatingPointNumbersEnabled = floatingPointNumbersEnabled,
enrichmentEnabled = enrichmentEnabled,
.design = design
))
}
if (.isDataObjectSurvival(...)) {
return(DatasetSurvival(
dataFrame = dataFrame,
floatingPointNumbersEnabled = floatingPointNumbersEnabled,
enrichmentEnabled = enrichmentEnabled,
.design = design
))
}
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "failed to identify dataset type")
}
#' @title
#' Get Dataset
#'
#' @description
#' Creates a dataset object and returns it.
#'
#' @param ... A \code{data.frame} or some data vectors defining the dataset.
#' @param floatingPointNumbersEnabled If \code{TRUE},
#' sample sizes and event numbers can be specified as floating-point numbers
#' (this make sense, e.g., for theoretical comparisons); \cr
#' by default \code{floatingPointNumbersEnabled = FALSE}, i.e.,
#' samples sizes and event numbers defined as floating-point numbers will be truncated.
#'
#' @details
#' The different dataset types \code{DatasetMeans}, of \code{DatasetRates}, or
#' \code{DatasetSurvival} can be created as follows:
#' \itemize{
#' \item An element of \code{\link{DatasetMeans}} for one sample is created by \cr
#' \code{getDataset(sampleSizes =, means =, stDevs =)} where \cr
#' \code{sampleSizes}, \code{means}, \code{stDevs} are vectors with stage-wise sample sizes,
#' means and standard deviations of length given by the number of available stages.
#' \item An element of \code{\link{DatasetMeans}} for two samples is created by \cr
#' \code{getDataset(sampleSizes1 =, sampleSizes2 =, means1 =, means2 =, } \cr
#' \code{stDevs1 =, stDevs2 =)} where
#' \code{sampleSizes1}, \code{sampleSizes2}, \code{means1}, \code{means2},
#' \code{stDevs1}, \code{stDevs2} are vectors with
#' stage-wise sample sizes, means and standard deviations for the two treatment groups
#' of length given by the number of available stages.
#' \item An element of \code{\link{DatasetRates}} for one sample is created by \cr
#' \code{getDataset(sampleSizes =, events =)} where \code{sampleSizes}, \code{events} are vectors
#' with stage-wise sample sizes and events of length given by the number of available stages.
#' \item An element of \code{\link{DatasetRates}} for two samples is created by \cr
#' \code{getDataset(sampleSizes1 =, sampleSizes2 =, events1 =, events2 =)} where
#' \code{sampleSizes1}, \code{sampleSizes2}, \code{events1}, \code{events2}
#' are vectors with stage-wise sample sizes
#' and events for the two treatment groups of length given by the number of available stages.
#' \item An element of \code{\link{DatasetSurvival}} is created by \cr
#' \code{getDataset(events =, logRanks =, allocationRatios =)} where
#' \code{events}, \code{logRanks}, and \code{allocation ratios} are the stage-wise events,
#' (one-sided) logrank statistics, and allocation ratios.
#' \item An element of \code{\link{DatasetMeans}}, \code{\link{DatasetRates}}, and \code{\link{DatasetSurvival}}
#' for more than one comparison is created by adding subsequent digits to the variable names.
#' The system can analyze these data in a multi-arm many-to-one comparison setting where the
#' group with the highest index represents the control group.
#' }
#' Prefix \code{overall[Capital case of first letter of variable name]...} for the variable
#' names enables entering the overall (cumulative) results and calculates stage-wise statistics.
#' Since rpact version 3.2, the prefix \code{cumulative[Capital case of first letter of variable name]...} or
#' \code{cum[Capital case of first letter of variable name]...} can alternatively be used for this.
#'
#' \code{n} can be used in place of \code{samplesizes}.
#'
#' Note that in survival design usually the overall (cumulative) events and logrank test statistics are provided
#' in the output, so \cr
#' \code{getDataset(cumulativeEvents=, cumulativeLogRanks =, cumulativeAllocationRatios =)} \cr
#' is the usual command for entering survival data. Note also that for \code{cumulativeLogranks} also the
#' z scores from a Cox regression can be used.
#'
#' For multi-arm designs, the index refers to the considered comparison. For example,\cr
#' \code{
#' getDataset(events1=c(13, 33), logRanks1 = c(1.23, 1.55), events2 = c(16, NA), logRanks2 = c(1.55, NA))
#' } \cr
#' refers to the case where one active arm (1) is considered at both stages whereas active arm 2
#' was dropped at interim. Number of events and logrank statistics are entered for the corresponding
#' comparison to control (see Examples).
#'
#' For enrichment designs, the comparison of two samples is provided for an unstratified
#' (sub-population wise) or stratified data input.\cr
#' For unstratified (sub-population wise) data input the data sets are defined for the sub-populations
#' S1, S2, ..., F, where F refers to the full populations. Use of \code{getDataset(S1 = , S2, ..., F = )}
#' defines the data set to be used in \code{\link[=getAnalysisResults]{getAnalysisResults()}} (see examples)\cr
#' For stratified data input the data sets are defined for the strata S1, S12, S2, ..., R, where R
#' refers to the remainder of the strata such that the union of all sets is the full population.
#' Use of \code{getDataset(S1 = , S12 = , S2, ..., R = )} defines the data set to be used in
#' \code{\link[=getAnalysisResults]{getAnalysisResults()}} (see examples)\cr
#' For survival data, for enrichment designs the log-rank statistics should be entered as stratified
#' log-rank statistics in order to provide strong control of Type I error rate. For stratified data input,
#' the variables to be specified in \code{getDataset()} are \code{events}, \code{expectedEvents},
#' \code{varianceEvents}, and \code{allocationRatios} or \code{overallEvents}, \code{overallExpectedEvents},
#' \code{overallVarianceEvents}, and \code{overallAllocationRatios}. From this, (stratified) log-rank tests are
#' calculated.
#'
#' @template return_object_dataset
#'
#' @template examples_get_dataset
#'
#' @include f_analysis_base.R
#' @include f_analysis_utilities.R
#'
#' @export
#'
getDataset <- function(..., floatingPointNumbersEnabled = FALSE) {
dataset <- .getDataset(floatingPointNumbersEnabled = floatingPointNumbersEnabled, ...)
if (dataset$.enrichmentEnabled && dataset$getNumberOfGroups() != 2) {
warning("Only population enrichment data with 2 groups can be analyzed but ",
dataset$getNumberOfGroups(), " group",
ifelse(dataset$getNumberOfGroups() == 1, " is", "s are"), " defined",
call. = FALSE
)
}
return(dataset)
}
#' @rdname getDataset
#' @export
getDataSet <- function(..., floatingPointNumbersEnabled = FALSE) {
return(getDataset(floatingPointNumbersEnabled = floatingPointNumbersEnabled, ...))
}
.getDatasetMeansModelObjectsList <- function(args) {
if (is.null(args) || length(args) == 0 || !is.list(args)) {
return(NULL)
}
emmeansResults <- list()
for (arg in args) {
if (inherits(arg, "emmGrid")) {
emmeansResults[[length(emmeansResults) + 1]] <- arg
}
}
if (length(emmeansResults) == 0) {
return(NULL)
}
argNames <- names(args)
for (i in 1:length(args)) {
arg <- args[[i]]
if (!inherits(arg, "emmGrid")) {
argName <- argNames[i]
argInfo <- ""
if (length(argName) == 1 && argName != "") {
argInfo <- paste0(sQuote(argName), " ")
}
argInfo <- paste0(argInfo, "(", .arrayToString(arg), ")")
warning("Argument ", argInfo, " will be ignored because only 'emmGrid' objects will be respected")
}
}
return(emmeansResults)
}
.getStandardDeviationFromStandardError <- function(sampleSize, standardError, ...,
dfValue = NA_real_, alpha = 0.05, lmEnabled = TRUE, stDevCalcMode = "auto") {
qtCalcEnablbled <- length(stDevCalcMode) == 1 && !is.na(stDevCalcMode) && stDevCalcMode == "t"
if ((qtCalcEnablbled || !lmEnabled) && !is.na(dfValue) && !is.infinite(dfValue) && dfValue > 0) {
qValue <- stats::qt(1 - alpha / 2, df = dfValue)
stDev <- standardError * 2 / qValue * sqrt(sampleSize)
} else {
stDev <- standardError * sqrt(sampleSize)
}
return(stDev)
}
.getDatasetMeansFromModelsByStage <- function(emmeansResults, correctGroupOrder = TRUE) {
if (is.null(emmeansResults)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(emmeansResults), " must be a non-empty list")
}
if (!is.list(emmeansResults)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(emmeansResults), " must be a list")
}
if (length(emmeansResults) == 0) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(emmeansResults), " must be not empty")
}
for (stage in 1:length(emmeansResults)) {
if (!inherits(emmeansResults[[stage]], "emmGrid")) {
stop(sprintf(
paste0(
"%s%s must contain %s objects created by emmeans(x), ",
"where x is a linear model result (one object per stage; class is %s at stage %s)"
),
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote("emmeansResults"), sQuote("emmGrid"),
.getClassName(emmeansResults[[stage]]), stage
))
}
}
stages <- integer(0)
groups <- integer(0)
means <- numeric(0)
stDevs <- numeric(0)
sampleSizes <- numeric(0)
lmEnabled <- TRUE
tryCatch(
{
modelCall <- emmeansResults[[1]]@model.info$call
modelFunction <- as.character(modelCall)[1]
lmEnabled <- modelFunction == "lm"
if (!grepl(paste0("::", modelFunction), modelFunction)) {
packageName <- .getPackageName(modelFunction)
if (!is.na(packageName)) {
modelFunction <- paste0(packageName, "::", modelFunction)
}
}
if (lmEnabled) {
warning("When using ", modelFunction, "() ",
"the estimated marginal means and standard deviations can be inaccurate ",
"and analysis results based on this values may be imprecise",
call. = FALSE
)
} else {
warning("Using ", modelFunction, " emmeans result objects as ",
"arguments of getDataset() is experminental in this rpact version and not fully validated",
call. = FALSE
)
}
},
error = function(e) {
warning("Using emmeans result objects as ",
"arguments of getDataset() is experminental in this rpact version and not fully validated",
call. = FALSE
)
}
)
stDevCalcMode <- getOption("rpact.dataset.stdev.calc.mode", "auto") # auto, sigma, norm, t
for (stage in 1:length(emmeansResults)) {
emmeansResult <- emmeansResults[[stage]]
emmeansResultsSummary <- summary(emmeansResult)
emmeansResultsList <- as.list(emmeansResult)
if (is.null(emmeansResultsSummary[["emmean"]])) {
stop(
C_EXCEPTION_TYPE_RUNTIME_ISSUE,
"the objects in summary(emmeansResults) must contain the field 'emmean'"
)
}
for (expectedField in c("sigma", "extras")) {
if (is.null(emmeansResultsList[[expectedField]])) {
stop(
C_EXCEPTION_TYPE_RUNTIME_ISSUE,
"the objects in as.list(emmeansResults) must contain the field ", sQuote(expectedField)
)
}
}
numberOfGroups <- length(emmeansResultsSummary$emmean)
rpactGroupNumbers <- 1:numberOfGroups
if (correctGroupOrder) {
rpactGroupNumbers <- 1
if (numberOfGroups > 1) {
rpactGroupNumbers <- c(2:numberOfGroups, rpactGroupNumbers)
}
}
for (group in 1:length(emmeansResultsSummary$emmean)) {
stages <- c(stages, stage)
groups <- c(groups, group)
rpactGroupNumber <- rpactGroupNumbers[group]
standardError <- emmeansResultsSummary$SE[rpactGroupNumber]
sampleSize <- emmeansResultsList$extras[rpactGroupNumber, ]
meanValue <- emmeansResultsSummary$emmean[rpactGroupNumber]
dfValue <- emmeansResultsSummary$df[rpactGroupNumber]
if (length(stDevCalcMode) == 1 && !is.na(stDevCalcMode) && stDevCalcMode == "sigma") {
# pooled standard deviation from emmeans
stDev <- emmeansResultsList$sigma
} else {
stDev <- .getStandardDeviationFromStandardError(sampleSize, standardError,
dfValue = dfValue, lmEnabled = lmEnabled, stDevCalcMode = stDevCalcMode
)
}
means <- c(means, meanValue)
stDevs <- c(stDevs, stDev)
sampleSizes <- c(sampleSizes, sampleSize)
}
}
data <- data.frame(
stages = stages,
groups = groups,
means = means,
stDevs = stDevs,
sampleSizes = sampleSizes
)
data <- data[order(data$stages, data$groups), ]
dataWide <- stats::reshape(data = data, direction = "wide", idvar = "stages", timevar = "groups")
colnames(dataWide) <- gsub("\\.", "", colnames(dataWide))
return(getDataset(dataWide))
}
.optionalArgsContainsDatasets <- function(...) {
args <- list(...)
if (length(args) == 0) {
return(FALSE)
}
for (arg in args) {
if (inherits(arg, "Dataset")) {
return(TRUE)
}
}
return(FALSE)
}
.getSubsetsFromArgs <- function(...) {
args <- list(...)
if (length(args) == 0) {
stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "one or more subset datasets expected")
}
subsetNames <- names(args)
if (is.null(subsetNames)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all subsets must be named")
}
if (!("R" %in% subsetNames) && !("F" %in% subsetNames)) {
stop(
C_EXCEPTION_TYPE_MISSING_ARGUMENT,
'"R" (stratified analysis)" or "F" (non-stratified analysis) must be defined as subset'
)
}
subsetNumbers <- gsub("\\D", "", subsetNames)
subsetNumbers <- subsetNumbers[subsetNumbers != ""] # & nchar(subsetNumbers) == 1
if (length(subsetNumbers) == 0) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all subset names (",
.arrayToString(subsetNames), ") must be \"S[n]\", \"R\", or \"F\", ",
"where [n] is a number with increasing digits (starting with 1)"
)
}
stratifiedInput <- "R" %in% subsetNames
subsetNumbers <- paste0(subsetNumbers, collapse = "")
subsetNumbers <- strsplit(subsetNumbers, "")[[1]]
subsetNumbers <- as.integer(subsetNumbers)
gMax <- max(subsetNumbers) + 1
validSubsetNames <- .createSubsetsByGMax(gMax, stratifiedInput = stratifiedInput, all = FALSE)
for (subsetName in subsetNames) {
if (subsetName == "") {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all subsets must be named")
}
if (!(subsetName %in% validSubsetNames)) {
suffix <- ifelse(stratifiedInput, " (stratified analysis)", " (non-stratified analysis)")
if (length(validSubsetNames) < 10) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "invalid subset name (", subsetName, "); ",
"valid names are ", .arrayToString(validSubsetNames), suffix
)
} else {
restFull <- ifelse(stratifiedInput, '"R"', '"F"')
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "invalid subset name (", subsetName, "): ",
"all subset names must be \"S[n]\" or ", restFull, ", ",
"where [n] is a number with increasing digits", suffix
)
}
}
}
subsets <- NULL
subsetType <- NA_character_
emptySubsetNames <- validSubsetNames[!(validSubsetNames %in% subsetNames)]
for (subsetName in subsetNames) {
subset <- args[[subsetName]]
if (is.null(subset) || (!isS4(subset) && is.na(subset))) {
emptySubsetNames <- c(emptySubsetNames, subsetName)
} else {
if (!.isDataset(subset)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"subset ", subsetName, " is not a dataset (is ", .getClassName(subset), ")"
)
}
if (!is.na(subsetType) && subsetType != .getClassName(subset)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"all subsets must have the same type (found ", subsetType, " and ", .getClassName(subset), ")"
)
}
subsetType <- .getClassName(subset)
if (is.null(subset[[".data"]])) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"subset ", subsetName, " does not contain field '.data'"
)
}
subset <- subset$.data
subset$subset <- rep(subsetName, nrow(subset))
if (is.null(subsets)) {
subsets <- subset
} else {
subsets <- rbind(subsets, subset)
}
}
}
if (length(emptySubsetNames) > 0) {
emptySubsetNames <- unique(emptySubsetNames)
template <- subsets[subsets$subset == ifelse(stratifiedInput, "R", "F"), ]
colNames <- colnames(template)
colNames <- colNames[!(colNames %in% c("stage", "group", "subset"))]
for (colName in colNames) {
template[[colName]] <- rep(NA_real_, nrow(template))
}
for (subsetName in emptySubsetNames) {
template$subset <- rep(subsetName, nrow(template))
subsets <- rbind(subsets, template)
}
if (length(emptySubsetNames) == 1) {
warning("The undefined subset ", emptySubsetNames,
" was defined as empty subset",
call. = FALSE
)
} else {
warning(gettextf(
"The %s undefined subsets %s were defined as empty subsets",
length(emptySubsetNames), .arrayToString(emptySubsetNames)
), call. = FALSE)
}
}
return(subsets)
}
.validateEnrichmentDataFrameAtFirstStage <- function(dataFrame, params) {
dataFrameStage1 <- dataFrame[dataFrame$stage == 1, ]
for (param in params) {
paramValue <- dataFrameStage1[[param]]
if (any(is.null(paramValue) || any(is.infinite(paramValue)))) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
gettextf(
"all %s values (%s) at first stage must be valid",
sQuote(param), .arrayToString(paramValue, maxLength = 10)
)
)
}
if (any(is.na(paramValue))) {
subsets <- unique(dataFrame$subset)
for (s in subsets) {
subData <- dataFrame[dataFrame$subset == s, ]
subsetParamValues <- subData[[param]]
if (!all(is.na(subsetParamValues)) && any(is.na(subsetParamValues[subData$stage == 1]))) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
gettextf(
"all %s values (%s) at first stage must be valid (NA is not allowed)",
sQuote(param), .arrayToString(paramValue, maxLength = 10)
)
)
}
}
}
}
}
.getEndpointSpecificDataFrameParameterNames <- function(dataFrame) {
paramNames <- colnames(dataFrame)
paramNames <- paramNames[!(paramNames %in% c("stage", "group", "subset"))]
return(paramNames)
}
.validateEnrichmentDataFrameDeselection <- function(dataFrame) {
paramNames <- .getEndpointSpecificDataFrameParameterNames(dataFrame)
for (i in 1:nrow(dataFrame)) {
row <- dataFrame[i, paramNames]
if (any(is.na(row)) && !all(is.na(row))) {
stop(
C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
gettextf(
paste0(
"inconsistent deselection in group %s at stage %s (",
"%s: all or none must be NA)"
),
dataFrame$group[i], dataFrame$stage[i], .arrayToString(paramNames, maxCharacters = 40)
)
)
}
}
subsets <- unique(dataFrame$subset)
for (s in subsets) {
deselectedStage <- 0
for (stage in unique(dataFrame$stage)) {
subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage, paramNames]
if (deselectedStage > 0 && !all(is.na(subData))) {
stop(
C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
gettextf(paste0(
"%s was deselected at stage %s ",
"and therefore must be also deselected in the following stages, ",
"but is no longer deselected in stage %s"
), s, deselectedStage, stage)
)
}
if (any(is.na(subData))) {
deselectedStage <- stage
}
}
}
}
.validateEnrichmentDataFrameMeans <- function(dataFrame) {
if (any(na.omit(dataFrame$stDev) <= 0) || any(na.omit(dataFrame$overallStDev) <= 0)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all standard deviations must be > 0")
}
if (any(na.omit(dataFrame$sampleSize) <= 0) || any(na.omit(dataFrame$overallSampleSize) <= 0)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be > 0")
}
.validateEnrichmentDataFrameAtFirstStage(dataFrame,
params = c("sampleSize", "overallSampleSize", "mean", "overallMean", "stDev", "overallStDev")
)
.validateEnrichmentDataFrameDeselection(dataFrame)
subsets <- unique(dataFrame$subset)
if ("F" %in% subsets) {
subsets <- subsets[subsets != "F"]
fullData <- dataFrame[dataFrame$subset == "F", ]
for (s in subsets) {
for (stage in unique(dataFrame$stage)) {
for (group in unique(dataFrame$group)) {
subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage & dataFrame$group == group, ]
stDevFull <- na.omit(fullData$stDev[fullData$stage == stage & fullData$group == group])
stDevSubset <- na.omit(subData$stDev)
if (length(stDevFull) > 0 && length(stDevSubset) > 0 && any(stDevFull <= stDevSubset)) {
stop(
C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
gettextf(
"'stDev' F (%s) must be > 'stDev' %s (%s) in group %s at stage %s",
.arrayToString(stDevFull), s,
.arrayToString(stDevSubset), group, stage
)
)
}
sampleSizeFull <- na.omit(fullData$sampleSize[fullData$stage == stage & fullData$group == group])
sampleSizeSubset <- na.omit(subData$sampleSize)
if (length(sampleSizeFull) > 0 && length(sampleSizeSubset) > 0 && any(sampleSizeFull < sampleSizeSubset)) {
stop(
C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
gettextf(
"'sampleSize' F (%s) must be >= 'sampleSize' %s (%s) in group %s at stage %s",
.arrayToString(sampleSizeFull), s,
.arrayToString(sampleSizeSubset), group, stage
)
)
}
}
}
}
}
}
.validateEnrichmentDataFrameSurvival <- function(dataFrame) {
if (any(na.omit(dataFrame$event) < 0) || any(na.omit(dataFrame$overallEvent) < 0)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0")
}
.validateEnrichmentDataFrameAtFirstStage(dataFrame,
params = c("event", "overallEvent")
)
.validateEnrichmentDataFrameDeselection(dataFrame)
subsets <- unique(dataFrame$subset)
if ("F" %in% subsets) {
subsets <- subsets[subsets != "F"]
fullData <- dataFrame[dataFrame$subset == "F", ]
for (s in subsets) {
for (stage in unique(dataFrame$stage)) {
for (group in unique(dataFrame$group)) {
subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage & dataFrame$group == group, ]
eventFull <- na.omit(fullData$event[fullData$stage == stage & fullData$group == group])
eventSubset <- na.omit(subData$event)
if (length(eventFull) > 0 && length(eventSubset) > 0 && any(eventFull < eventSubset)) {
stop(
C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
gettextf(
"'event' F (%s) must be >= 'event' %s (%s) in group %s at stage %s",
.arrayToString(eventFull), s,
.arrayToString(eventSubset), group, stage
)
)
}
}
}
}
}
}
.validateEnrichmentDataFrameRates <- function(dataFrame) {
if (any(na.omit(dataFrame$sampleSize) <= 0) || any(na.omit(dataFrame$overallSampleSize) <= 0)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be > 0")
}
.validateEnrichmentDataFrameAtFirstStage(dataFrame,
params = c("sampleSize", "overallSampleSize")
)
.validateEnrichmentDataFrameDeselection(dataFrame)
subsets <- unique(dataFrame$subset)
if ("F" %in% subsets) {
subsets <- subsets[subsets != "F"]
fullData <- dataFrame[dataFrame$subset == "F", ]
for (s in subsets) {
for (stage in unique(dataFrame$stage)) {
for (group in unique(dataFrame$group)) {
subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage & dataFrame$group == group, ]
sampleSizeFull <- na.omit(fullData$sampleSize[fullData$stage == stage & fullData$group == group])
sampleSizeSubset <- na.omit(subData$sampleSize)
if (length(sampleSizeFull) > 0 && length(sampleSizeSubset) > 0 && any(sampleSizeFull < sampleSizeSubset)) {
stop(
C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
gettextf(
"'sampleSize' F (%s) must be >= 'sampleSize' %s (%s) in group %s at stage %s",
.arrayToString(sampleSizeFull), s,
.arrayToString(sampleSizeSubset), group, stage
)
)
}
}
}
}
}
.validateEnrichmentDataFrameSurvival(dataFrame)
}
.validateEnrichmentDataFrameHasConsistentNumberOfStages <- function(dataFrame) {
subsets <- unique(dataFrame$subset)
kMaxList <- list()
for (s in subsets) {
subsetStages <- as.integer(sort(unique(na.omit(as.character(dataFrame$stage[dataFrame$subset == s])))))
kMax <- max(subsetStages)
if (!identical(1:kMax, subsetStages)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
gettextf("subset %s has incomplete stages (%s)", s, .arrayToString(subsetStages))
)
}
kMaxList[[s]] <- kMax
}
kMax <- unique(unlist(kMaxList))
if (length(kMax) > 1) {
stop(
C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
"all subsets must have the identical number of stages defined (kMax: ", .listToString(kMaxList), ")"
)
}
}
.validateEnrichmentDataFrame <- function(dataFrame) {
paramNames <- colnames(dataFrame)
if (any(grepl("(S|s)tDev", paramNames))) {
.validateEnrichmentDataFrameMeans(dataFrame)
} else if (any(grepl("(S|s)ampleSize", paramNames)) && any(grepl("(E|e)vent", paramNames))) {
.validateEnrichmentDataFrameRates(dataFrame)
} else if (any(grepl("(L|l)ogRank", paramNames)) || any(grepl("(E|e)xpectedEvent", paramNames))) {
.validateEnrichmentDataFrameSurvival(dataFrame)
} else {
print(paramNames)
stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "could not identify the endpoint of the specified dataset")
}
subsets <- unique(dataFrame$subset)
if ("R" %in% subsets) {
paramNames <- .getEndpointSpecificDataFrameParameterNames(dataFrame)
paramName <- paramNames[1]
subsets <- subsets[subsets != "R"]
subsets <- subsets[grepl("^S\\d$", subsets)]
if (length(subsets) > 0) {
restData <- dataFrame[dataFrame$subset == "R", ]
for (s in subsets) {
stages <- unique(dataFrame$stage)
stages <- stages[stages != 1]
if (length(stages) > 0) {
for (stage in stages) {
for (group in unique(dataFrame$group)) {
subData <- dataFrame[dataFrame$subset == s & dataFrame$stage == stage & dataFrame$group == group, ]
paramValueRest <- restData[[paramName]][restData$stage == stage & restData$group == group]
paramValueSubset <- subData[[paramName]]
if (length(paramValueRest) > 0 && length(paramValueSubset) > 0 &&
any(is.na(paramValueSubset)) && !all(is.na(paramValueRest))) {
stop(
C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
gettextf(
paste0(
"if %s is deselected (NA) then R also must be deselected (NA) but, e.g., ",
"%s R is %s in group %s at stage %s"
),
s, sQuote(paramName), .arrayToString(paramValueRest, vectorLookAndFeelEnabled = TRUE),
group, stage
)
)
}
}
}
}
}
}
}
.validateEnrichmentDataFrameHasConsistentNumberOfStages(dataFrame)
}
.getEnrichmentDataFrameFromArgs <- function(...) {
dataFrame <- .getSubsetsFromArgs(...)
validColumns <- c()
for (j in 1:ncol(dataFrame)) {
if (!all(is.na(dataFrame[, j]))) {
validColumns <- c(validColumns, j)
}
}
if (length(validColumns) > 0) {
dataFrame <- dataFrame[, validColumns]
}
return(dataFrame)
}
.getEnrichmentDatasetFromArgs <- function(...) {
dataFrame <- .getEnrichmentDataFrameFromArgs(...)
.validateEnrichmentDataFrame(dataFrame)
dataFrame <- .getWideFormat(dataFrame)
return(.getDataset(dataFrame = dataFrame))
}
.getDatasetExample <- function(exampleType) {
if (exampleType == "means") {
return(getDataset(
n1 = c(13, 25),
n2 = c(15, NA),
n3 = c(14, 27),
n4 = c(12, 29),
means1 = c(24.2, 22.2),
means2 = c(18.8, NA),
means3 = c(26.7, 27.7),
means4 = c(9.2, 12.2),
stDevs1 = c(24.4, 22.1),
stDevs2 = c(21.2, NA),
stDevs3 = c(25.6, 23.2),
stDevs4 = c(21.5, 22.7)
))
} else if (exampleType == "rates") {
return(getDataset(
n1 = c(23, 25),
n2 = c(25, NA),
n3 = c(24, 27),
n4 = c(22, 29),
events1 = c(15, 12),
events2 = c(19, NA),
events3 = c(18, 22),
events4 = c(12, 13)
))
} else if (exampleType == "survival") {
return(getDataset(
events1 = c(25, 32),
events2 = c(18, NA),
events3 = c(22, 36),
logRanks1 = c(2.2, 1.8),
logRanks2 = c(1.99, NA),
logRanks3 = c(2.32, 2.11)
))
}
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'exampleType' (", exampleType, ") is not allowed")
}
#'
#' @name Dataset
#'
#' @title
#' Dataset
#'
#' @description
#' Basic class for datasets.
#'
#' @template field_stages
#' @template field_groups
#'
#' @details
#' \code{Dataset} is the basic class for
#' \itemize{
#' \item \code{\link{DatasetMeans}},
#' \item \code{\link{DatasetRates}},
#' \item \code{\link{DatasetSurvival}}, and
#' \item \code{\link{DatasetEnrichmentSurvival}}.
#' }
#' This basic class contains the fields \code{stages} and \code{groups} and several commonly used
#' functions.
#'
#' @include class_core_parameter_set.R
#' @include class_core_plot_settings.R
#' @include class_design.R
#' @include f_core_constants.R
#' @include f_core_assertions.R
#'
#' @keywords internal
#'
#' @importFrom methods new
#'
Dataset <- setRefClass("Dataset",
contains = "ParameterSet",
fields = list(
.data = "data.frame",
.plotSettings = "PlotSettings",
.id = "integer",
.description = "character",
.floatingPointNumbersEnabled = "logical",
.kMax = "integer",
.enrichmentEnabled = "logical",
.inputType = "character",
.design = "ANY",
stages = "integer",
groups = "integer",
subsets = "character"
),
methods = list(
initialize = function(dataFrame, ..., floatingPointNumbersEnabled = FALSE, enrichmentEnabled = FALSE) {
callSuper(
.floatingPointNumbersEnabled = floatingPointNumbersEnabled,
.enrichmentEnabled = enrichmentEnabled, ...
)
.plotSettings <<- PlotSettings()
.parameterNames <<- .getParameterNames(dataset = .self)
.parameterFormatFunctions <<- C_PARAMETER_FORMAT_FUNCTIONS
.id <<- NA_integer_
.description <<- NA_character_
.inputType <<- NA_character_
if (!missing(dataFrame)) {
.initByDataFrame(dataFrame)
.kMax <<- getNumberOfStages()
if (!.enrichmentEnabled) {
.validateDataset()
}
}
},
getPlotSettings = function() {
return(.plotSettings)
},
show = function(showType = 1, digits = NA_integer_) {
"Method for automatically printing dataset objects"
.show(showType = showType, digits = digits, consoleOutputEnabled = TRUE)
},
.show = function(showType = 1, digits = NA_integer_, consoleOutputEnabled = TRUE) {
.resetCat()
if (!is.null(showType) && length(showType) == 1 && !is.na(showType) &&
is.character(showType) && showType == "rcmd") {
s <- strsplit(getObjectRCode(.self, stringWrapParagraphWidth = NULL), "), *")[[1]]
s[2:length(s)] <- paste0("\t", s[2:length(s)])
s <- paste0(s, collapse = "),\n")
cat(s, "\n")
} else if (showType == 2) {
callSuper(showType = showType, digits = digits, consoleOutputEnabled = consoleOutputEnabled)
} else {
.showParametersOfOneGroup(.getUserDefinedParameters(),
title = .toString(startWithUpperCase = TRUE), orderByParameterName = FALSE,
consoleOutputEnabled = consoleOutputEnabled
)
.showParametersOfOneGroup(.getGeneratedParameters(),
title = "Calculated data", orderByParameterName = FALSE,
consoleOutputEnabled = consoleOutputEnabled
)
.showUnknownParameters(consoleOutputEnabled = consoleOutputEnabled)
if (!is.na(.description) && nchar(.description) > 0) {
.cat("Description: ", .description, "\n\n",
consoleOutputEnabled = consoleOutputEnabled
)
}
}
},
.initByDataFrame = function(dataFrame) {
if (!is.data.frame(dataFrame)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"'dataFrame' must be a data.frame (is an instance of class ", .getClassName(dataFrame), ")"
)
}
if (!.paramExists(dataFrame, "stage") && !.paramExists(dataFrame, "stages")) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"'dataFrame' must contain parameter 'stages' or 'stage'"
)
}
stages <<- as.integer(.getValuesByParameterName(dataFrame, c("stages", "stage")))
if (!.enrichmentEnabled && length(unique(stages)) < length(stages)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'stages' (", .arrayToString(stages),
") must be a unique vector of stage numbers"
)
}
groups <<- rep(1L, length(stages))
.setParameterType("groups", C_PARAM_USER_DEFINED)
.setParameterType("stages", C_PARAM_USER_DEFINED)
if (any(grepl("^subsets?\\d*$", colnames(dataFrame)))) {
numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, c(C_KEY_WORDS_SAMPLE_SIZES, C_KEY_WORDS_LOG_RANKS))
subsets <<- character(0)
for (group in 1:numberOfTreatmentGroups) {
suffix <- ifelse(any(grepl("^subsets?\\d+$", colnames(dataFrame))), group, "")
subsets <<- c(subsets, .getValuesByParameterName(dataFrame, C_KEY_WORDS_SUBSETS, suffix = suffix))
}
.setParameterType("subsets", C_PARAM_USER_DEFINED)
} else {
subsets <<- rep(NA_character_, length(stages))
}
},
.validateDataset = function() {
.assertIsValidKMax(kMax = getNumberOfStages())
for (var in names(.self)) {
values <- .self[[var]]
if (any(is.nan(values)) || any(is.infinite(values))) {
stop(
C_EXCEPTION_TYPE_RUNTIME_ISSUE, "'", var, "' (", .arrayToString(values),
") contains illegal values, i.e., something went wrong"
)
}
}
},
.validateValues = function(values, name) {
if (.enrichmentEnabled) {
return(invisible())
}
l1 <- length(unique(stages))
l2 <- length(values)
if (l1 != l2) {
stop(
C_EXCEPTION_TYPE_CONFLICTING_ARGUMENTS,
"there ", ifelse(l1 == 1, paste("is", l1, "stage"),
paste("are", l1, "stages")
), " defined",
" (", .arrayToString(unique(stages)), ") and '", name, "' has length ", l2
)
}
},
.recreateDataFrame = function() {
.data <<- data.frame(
stage = factor(stages),
group = factor(groups),
subset = factor(subsets)
)
},
.setDataToVariables = function() {
stages <<- as.integer(.data$stage)
groups <<- as.integer(.data$group)
subsets <<- as.character(.data$subset)
},
.fillWithNAs = function(kMax) {
numberOfStages <- getNumberOfStages()
.kMax <<- numberOfStages
if (numberOfStages >= kMax) {
return(invisible())
}
numberOfGroups <- getNumberOfGroups(survivalCorrectionEnabled = FALSE)
if (.enrichmentEnabled) {
for (stage in (numberOfStages + 1):kMax) {
for (group in 1:numberOfGroups) {
for (subset in levels(.data$subset)) {
stages <<- c(stages, stage)
groups <<- c(groups, group)
subsets <<- c(subsets, subset)
}
}
}
} else {
for (stage in (numberOfStages + 1):kMax) {
for (group in 1:numberOfGroups) {
stages <<- c(stages, stage)
groups <<- c(groups, group)
subsets <<- c(subsets, NA_character_)
}
}
}
},
.trim = function(kMax) {
if (is.na(kMax)) {
kMax <- .kMax
}
numberOfStages <- getNumberOfStages(FALSE)
if (numberOfStages <= kMax) {
return(invisible(numeric(0)))
}
indices <- which(stages <= kMax)
stages <<- stages[indices]
groups <<- groups[indices]
subsets <<- subsets[indices]
return(indices)
},
.orderDataByStageAndGroup = function() {
if (.enrichmentEnabled) {
dat <- .data
dat$char <- gsub("\\d", "", as.character(.data$subset))
dat$char[dat$char == "R"] <- "Z"
dat$char[dat$char == "F"] <- "Z"
dat$num <- as.integer(gsub("\\D", "", as.character(.data$subset)))
.data <<- .data[order(.data$stage, .data$group, dat$char, dat$num), ]
} else {
.data <<- .data[order(.data$stage, .data$group), ]
}
},
.getNumberOfNAsToAdd = function(kMax) {
n <- kMax - getNumberOfStages()
if (n <= 0) {
return(0)
}
n <- n * getNumberOfGroups(survivalCorrectionEnabled = FALSE)
if (.enrichmentEnabled) {
n <- n * getNumberOfSubsets()
}
return(n)
},
.paramExists = function(dataFrame, parameterName) {
for (p in parameterName) {
value <- dataFrame[[p]]
if (!is.null(value)) {
return(TRUE)
}
}
return(FALSE)
},
.getValuesByParameterName = function(dataFrame, parameterNameVariants, ...,
defaultValues = NULL, suffix = "") {
for (parameterName in parameterNameVariants) {
key <- paste0(parameterName, suffix)
if (.paramExists(dataFrame, key)) {
return(dataFrame[[key]])
}
}
if (!is.null(defaultValues)) {
return(defaultValues)
}
stop(
C_EXCEPTION_TYPE_MISSING_ARGUMENT, "parameter '",
paste0(parameterNameVariants[1], suffix), "' is missing or not correctly specified"
)
},
.getValueLevels = function(values) {
if (is.factor(values)) {
return(levels(values))
}
return(sort(unique(na.omit(values))))
},
.getValues = function(paramName, paramValues) {
values <- .data[[paramName]]
valueLevels <- .getValueLevels(values)
if (!all(paramValues %in% valueLevels)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'", paramName, "' (", .arrayToString(paramValues),
") out of range [", .arrayToString(valueLevels), "]"
)
}
return(values)
},
.getIndexValues = function(paramName, paramValues, subset = NA_character_) {
values <- .getValues(paramName, paramValues)
if (all(is.na(subset))) {
return(which(values %in% paramValues))
}
.assertIsValidSubset(subset)
return(which(values %in% paramValues & .data$subset %in% subset))
},
.assertIsValidSubset = function(subset) {
for (s in subset) {
if (!(s %in% levels(.data$subset))) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "'subset' (", s,
") is not a defined value [", .arrayToString(levels(.data$subset)), "]"
)
}
}
},
.getIndices = function(..., stage, group, subset = NA_character_) {
if (is.null(.data)) {
stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "'.data' must be defined")
}
if (!is.null(stage) && !any(is.na(stage)) && all(stage < 0)) {
index <- 1:getNumberOfStages()
stage <- index[!(index %in% abs(stage))]
}
if (!is.null(group) && !any(is.na(group)) && all(group < 0)) {
index <- 1:getNumberOfGroups(survivalCorrectionEnabled = FALSE)
group <- index[!(index %in% abs(group))]
}
# stage only and optional subset
if (!is.null(group) && length(group) == 1 && is.na(group)) {
return(.getIndexValues("stage", stage, subset))
}
# group only and optional subset
if (!is.null(stage) && length(stage) == 1 && is.na(stage)) {
return(.getIndexValues("group", group, subset))
}
# stage and group and optional subset
stageValues <- .getValues("stage", stage)
groupValues <- .getValues("group", group)
if (all(is.na(subset))) {
return(which(stageValues %in% stage & groupValues %in% group))
}
.assertIsValidSubset(subset)
return(which(stageValues %in% stage & groupValues %in% group & .data$subset %in% subset))
},
.getValidatedFloatingPointNumbers = function(x, parameterName = "Sample sizes") {
if (.floatingPointNumbersEnabled) {
return(x)
}
nToCheck <- stats::na.omit(x)
if (any(nToCheck != as.integer(nToCheck))) {
warning(parameterName, " specified as floating-point numbers were truncated", call. = FALSE)
}
x[!is.na(x)] <- as.integer(x[!is.na(x)])
return(x)
},
.keyWordExists = function(dataFrame, keyWords, suffix = "") {
for (key in keyWords) {
if (.paramExists(dataFrame, paste0(key, suffix))) {
return(TRUE)
}
}
return(FALSE)
},
.getNumberOfGroups = function(dataFrame, keyWords) {
for (group in 2:1000) {
if (!.keyWordExists(dataFrame, keyWords, group)) {
return(group - 1)
}
}
return(1)
},
.getValidatedStage = function(stage = NA_integer_) {
if (all(is.na(stage))) {
stage <- c(1:getNumberOfStages())
}
return(stage)
},
getNumberOfGroups = function(survivalCorrectionEnabled = TRUE) {
data <- stats::na.omit(.data)
if (!survivalCorrectionEnabled) {
return(length(levels(data$group)))
}
return(length(levels(data$group)) + ifelse(inherits(.self, "DatasetSurvival"), 1, 0))
},
getNumberOfStages = function(naOmitEnabled = TRUE) {
if (naOmitEnabled) {
colNames <- colnames(.data)
validColNames <- character(0)
for (colName in colNames) {
colValues <- .data[, colName]
if (length(colValues) > 0 && !all(is.na(colValues))) {
validColNames <- c(validColNames, colName)
}
}
subData <- stats::na.omit(.data[, validColNames])
numberOfStages <- length(unique(as.character(subData$stage)))
if (numberOfStages == 0) {
print(.data[, validColNames])
stop(
C_EXCEPTION_TYPE_RUNTIME_ISSUE,
".data seems to contain an invalid column"
)
}
return(numberOfStages)
}
return(length(levels(.data$stage)))
},
getNumberOfSubsets = function() {
return(length(levels(.data$subset)))
},
isDatasetMeans = function() {
return(inherits(.self, "DatasetMeans"))
},
isDatasetRates = function() {
return(inherits(.self, "DatasetRates"))
},
isDatasetSurvival = function() {
return(inherits(.self, "DatasetSurvival"))
},
isStratified = function() {
return(.enrichmentEnabled && "R" %in% levels(.data$subset))
},
setId = function(id) {
.id <<- as.integer(id)
},
getId = function() {
return(.id)
},
setDescription = function(description) {
.description <<- description
},
getDescription = function() {
return(.description)
},
.toString = function(startWithUpperCase = FALSE) {
s <- "dataset of "
if (.enrichmentEnabled) {
s <- paste0(s, "enrichment ")
} else if (.self$getNumberOfGroups() > 2) {
s <- paste0(s, "multi-arm ")
}
if (isDatasetMeans()) {
s <- paste0(s, "means")
} else if (isDatasetRates()) {
s <- paste0(s, "rates")
} else if (isDatasetSurvival()) {
s <- paste0(s, "survival data")
} else {
s <- paste0(s, "unknown endpoint")
}
return(ifelse(startWithUpperCase, .firstCharacterToUpperCase(s), s))
}
)
)
#'
#' @name DatasetMeans
#'
#' @title
#' Dataset of Means
#'
#' @description
#' Class for a dataset of means.
#'
#' @template field_groups
#' @template field_stages
#' @template field_sampleSizes
#' @template field_means
#' @template field_stDevs
#' @template field_overallSampleSizes
#' @template field_overallMeans
#' @template field_overallStDevs
#'
#' @details
#' This object cannot be created directly; better use \code{\link{getDataset}}
#' with suitable arguments to create a dataset of means.
#'
#' @include class_core_parameter_set.R
#' @include class_core_plot_settings.R
#' @include f_core_constants.R
#'
#' @keywords internal
#'
#' @importFrom methods new
#'
DatasetMeans <- setRefClass("DatasetMeans",
contains = "Dataset",
fields = list(
sampleSizes = "numeric",
means = "numeric",
stDevs = "numeric",
overallSampleSizes = "numeric",
overallMeans = "numeric",
overallStDevs = "numeric"
),
methods = list(
getSampleSize = function(stage, group = 1, subset = NA_character_) {
return(.data$sampleSize[.getIndices(stage = stage, group = group, subset = subset)])
},
getMean = function(stage, group = 1, subset = NA_character_) {
return(.data$mean[.getIndices(stage = stage, group = group, subset = subset)])
},
getStDev = function(stage, group = 1, subset = NA_character_) {
return(.data$stDev[.getIndices(stage = stage, group = group, subset = subset)])
},
getSampleSizes = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$sampleSize[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getMeans = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$mean[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getStDevs = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$stDev[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getSampleSizesUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$sampleSize[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getMeansUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$mean[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getStDevsUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$stDev[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getOverallSampleSize = function(stage, group = 1, subset = NA_character_) {
return(.data$overallSampleSize[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallMean = function(stage, group = 1, subset = NA_character_) {
return(.data$overallMean[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallStDev = function(stage, group = 1, subset = NA_character_) {
return(.data$overallStDev[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallSampleSizes = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallSampleSize[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallMeans = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallMean[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallStDevs = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallStDev[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallSampleSizesUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallSampleSize[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getOverallMeansUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallMean[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getOverallStDevsUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallStDev[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
.initByDataFrame = function(dataFrame) {
callSuper(dataFrame)
# case: one mean - stage wise
if (.paramExists(dataFrame, C_KEY_WORDS_SAMPLE_SIZES)) {
.inputType <<- "stagewise"
sampleSizes <<- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
dataFrame,
C_KEY_WORDS_SAMPLE_SIZES
), parameterName = "Sample sizes")
.validateValues(sampleSizes, "n")
if (any(stats::na.omit(sampleSizes) <= 0)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"all sample sizes must be > 0, but 'n' = ",
.arrayToString(sampleSizes, vectorLookAndFeelEnabled = TRUE)
)
}
means <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_MEANS)
.validateValues(means, "means")
stDevs <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_ST_DEVS)
.validateValues(stDevs, "stDevs")
}
# case: one mean - overall
else if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES)) {
.inputType <<- "overall"
overallSampleSizes <<- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
dataFrame,
C_KEY_WORDS_OVERALL_SAMPLE_SIZES
), parameterName = "Cumulative sample sizes ")
.validateValues(overallSampleSizes, "overallSampleSizes")
overallMeans <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_MEANS)
.validateValues(overallMeans, "overallMeans")
overallStDevs <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_ST_DEVS)
.validateValues(overallStDevs, "overallStDevs")
}
# case: two or more means - stage wise
else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_SAMPLE_SIZES, 1)) &&
.paramExists(dataFrame, paste0(C_KEY_WORDS_SAMPLE_SIZES, 2))) {
.inputType <<- "stagewise"
numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_SAMPLE_SIZES)
stages <<- rep(stages, numberOfTreatmentGroups)
groups <<- integer(0)
sampleSizes <<- numeric(0)
means <<- numeric(0)
stDevs <<- numeric(0)
for (group in 1:numberOfTreatmentGroups) {
sampleSizesTemp <- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
dataFrame, C_KEY_WORDS_SAMPLE_SIZES,
suffix = group
), parameterName = "Sample sizes")
.validateValues(sampleSizesTemp, paste0("n", group))
if (any(stats::na.omit(sampleSizesTemp) <= 0)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"all sample sizes must be > 0, but 'n", group, "' = ",
.arrayToString(sampleSizesTemp, vectorLookAndFeelEnabled = TRUE)
)
}
sampleSizes <<- c(sampleSizes, sampleSizesTemp)
meansTemp <- .getValuesByParameterName(dataFrame, C_KEY_WORDS_MEANS, suffix = group)
.validateValues(meansTemp, paste0("means", group))
means <<- c(means, meansTemp)
stDevsTemp <- .getValuesByParameterName(dataFrame, C_KEY_WORDS_ST_DEVS, suffix = group)
.validateValues(stDevsTemp, paste0("stDevs", group))
stDevs <<- c(stDevs, stDevsTemp)
groups <<- c(groups, rep(as.integer(group), length(sampleSizesTemp)))
}
}
# case: two or more means - overall
else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_SAMPLE_SIZES, 1)) &&
.paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_SAMPLE_SIZES, 2))) {
.inputType <<- "overall"
numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES)
stages <<- rep(stages, numberOfTreatmentGroups)
groups <<- integer(0)
sampleSizes <<- numeric(0)
means <<- numeric(0)
stDevs <<- numeric(0)
overallSampleSizes <<- numeric(0)
overallMeans <<- numeric(0)
overallStDevs <<- numeric(0)
for (group in 1:numberOfTreatmentGroups) {
overallSampleSizesTemp <- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES,
suffix = group
), parameterName = "Cumulative sample sizes")
.validateValues(overallSampleSizesTemp, paste0("overallSampleSizes", group))
overallSampleSizes <<- c(overallSampleSizes, overallSampleSizesTemp)
overallMeansTemp <- .getValuesByParameterName(dataFrame,
C_KEY_WORDS_OVERALL_MEANS,
suffix = group
)
.validateValues(overallMeansTemp, paste0("overallMeans", group))
overallMeans <<- c(overallMeans, overallMeansTemp)
overallStDevsTemp <- .getValuesByParameterName(dataFrame,
C_KEY_WORDS_OVERALL_ST_DEVS,
suffix = group
)
.validateValues(overallStDevsTemp, paste0("overallStDevs", group))
overallStDevs <<- c(overallStDevs, overallStDevsTemp)
groups <<- c(groups, rep(as.integer(group), length(overallSampleSizesTemp)))
}
} else {
stop(
C_EXCEPTION_TYPE_MISSING_ARGUMENT,
"sample sizes are missing or not correctly specified"
)
}
if (.inputType == "stagewise") {
n <- length(sampleSizes)
overallSampleSizes <<- rep(NA_real_, n)
overallMeans <<- rep(NA_real_, n)
overallStDevs <<- rep(NA_real_, n)
.setParameterType("sampleSizes", C_PARAM_USER_DEFINED)
.setParameterType("means", C_PARAM_USER_DEFINED)
.setParameterType("stDevs", C_PARAM_USER_DEFINED)
.setParameterType("overallSampleSizes", C_PARAM_GENERATED)
.setParameterType("overallMeans", C_PARAM_GENERATED)
.setParameterType("overallStDevs", C_PARAM_GENERATED)
.recreateDataFrame()
.createOverallData()
} else {
n <- length(overallSampleSizes)
sampleSizes <<- rep(NA_real_, n)
means <<- rep(NA_real_, n)
stDevs <<- rep(NA_real_, n)
.setParameterType("sampleSizes", C_PARAM_GENERATED)
.setParameterType("means", C_PARAM_GENERATED)
.setParameterType("stDevs", C_PARAM_GENERATED)
.setParameterType("overallSampleSizes", C_PARAM_USER_DEFINED)
.setParameterType("overallMeans", C_PARAM_USER_DEFINED)
.setParameterType("overallStDevs", C_PARAM_USER_DEFINED)
.recreateDataFrame()
.createStageWiseData()
}
if (sum(stats::na.omit(sampleSizes) < 0) > 0) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be >= 0")
}
if (sum(stats::na.omit(stDevs) < 0) > 0) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all standard deviations must be >= 0")
}
},
.recreateDataFrame = function() {
callSuper()
.data <<- cbind(.data, data.frame(
sampleSize = sampleSizes,
mean = means,
stDev = stDevs,
overallSampleSize = overallSampleSizes,
overallMean = overallMeans,
overallStDev = overallStDevs
))
.orderDataByStageAndGroup()
.setDataToVariables()
},
.setDataToVariables = function() {
callSuper()
sampleSizes <<- .data$sampleSize
means <<- .data$mean
stDevs <<- .data$stDev
overallSampleSizes <<- .data$overallSampleSize
overallMeans <<- .data$overallMean
overallStDevs <<- .data$overallStDev
},
.fillWithNAs = function(kMax) {
callSuper(kMax)
n <- .getNumberOfNAsToAdd(kMax)
naRealsToAdd <- rep(NA_real_, n)
sampleSizes <<- c(sampleSizes, naRealsToAdd)
means <<- c(means, naRealsToAdd)
stDevs <<- c(stDevs, naRealsToAdd)
overallSampleSizes <<- c(overallSampleSizes, naRealsToAdd)
overallMeans <<- c(overallMeans, naRealsToAdd)
overallStDevs <<- c(overallStDevs, naRealsToAdd)
.recreateDataFrame()
},
.trim = function(kMax = NA_integer_) {
indices <- callSuper(kMax)
if (length(indices) == 0) {
return(invisible(FALSE))
}
sampleSizes <<- sampleSizes[indices]
means <<- means[indices]
stDevs <<- stDevs[indices]
overallSampleSizes <<- overallSampleSizes[indices]
overallMeans <<- overallMeans[indices]
overallStDevs <<- overallStDevs[indices]
.recreateDataFrame()
return(invisible(TRUE))
},
.getOverallMeans = function(sampleSizes, means) {
return(cumsum(sampleSizes * means) / cumsum(sampleSizes))
},
.getOverallStDevs = function(sampleSizes, means, stDevs, overallMeans) {
kMax <- length(sampleSizes)
overallStDev <- rep(NA_real_, kMax)
for (k in 1:kMax) {
overallStDev[k] <- sqrt((sum((sampleSizes[1:k] - 1) * stDevs[1:k]^2) +
sum(sampleSizes[1:k] * (means[1:k] - overallMeans[k])^2)) /
(sum(sampleSizes[1:k]) - 1))
}
return(overallStDev)
},
.createOverallData = function() {
.data$overallSampleSize <<- rep(NA_real_, nrow(.data))
.data$overallMean <<- rep(NA_real_, nrow(.data))
.data$overallStDev <<- rep(NA_real_, nrow(.data))
subsetLevels <- NA_character_
if (.enrichmentEnabled) {
subsetLevels <- levels(.data$subset)
}
for (s in subsetLevels) {
for (g in levels(.data$group)) {
if (!is.na(s)) {
indices <- which(.data$subset == s & .data$group == g)
} else {
indices <- which(.data$group == g)
}
.data$overallSampleSize[indices] <<- cumsum(.data$sampleSize[indices])
.data$overallMean[indices] <<- .getOverallMeans(
.data$sampleSize[indices], .data$mean[indices]
)
.data$overallStDev[indices] <<- .getOverallStDevs(
.data$sampleSize[indices],
.data$mean[indices], .data$stDev[indices], .data$overallMean[indices]
)
}
}
.setDataToVariables()
},
.getStageWiseSampleSizes = function(overallSampleSizes) {
result <- overallSampleSizes
if (length(overallSampleSizes) == 1) {
return(result)
}
kMax <- length(overallSampleSizes)
result[2:kMax] <- overallSampleSizes[2:kMax] - overallSampleSizes[1:(kMax - 1)]
return(result)
},
.getStageWiseMeans = function(sampleSizes, overallSampleSizes, overallMeans) {
result <- overallMeans
if (length(overallMeans) == 1) {
return(result)
}
for (k in 2:length(overallMeans)) {
result[k] <- (overallSampleSizes[k] * overallMeans[k] -
overallSampleSizes[k - 1] * overallMeans[k - 1]) / sampleSizes[k]
}
return(result)
},
.getStageWiseStDev = function(overallStDevs, sampleSizes, overallSampleSizes, means, overallMeans, k) {
numBeforeK <- (overallSampleSizes[k - 1] - 1) * overallStDevs[k - 1]^2
numK <- (overallSampleSizes[k] - 1) * overallStDevs[k]^2
numSumBeforeK <- sum(sampleSizes[1:(k - 1)] * (means[1:(k - 1)] - overallMeans[k - 1])^2)
numSumK <- sum(sampleSizes[1:k] * (means[1:k] - overallMeans[k])^2)
denom <- (sampleSizes[k] - 1)
value <- (numK - numBeforeK + numSumBeforeK - numSumK) / denom
if (is.null(value) || length(value) != 1 || is.na(value) || value < 0) {
warning("No calculation of stage-wise standard deviation from ",
"overall standard deviations possible at stage ", k,
call. = FALSE
)
return(NA_real_)
}
return(sqrt(value))
},
.getStageWiseStDevs = function(overallStDevs, sampleSizes, overallSampleSizes, means, overallMeans) {
result <- overallStDevs
if (length(overallStDevs) == 1) {
return(result)
}
for (k in 2:length(overallStDevs)) {
result[k] <- .getStageWiseStDev(overallStDevs, sampleSizes, overallSampleSizes, means, overallMeans, k)
}
return(result)
},
.createStageWiseData = function() {
"Calculates stage-wise means and standard deviation if cunulative data is available"
.data$sampleSize <<- rep(NA_real_, nrow(.data))
.data$mean <<- rep(NA_real_, nrow(.data))
.data$stDev <<- rep(NA_real_, nrow(.data))
subsetLevels <- NA_character_
if (.enrichmentEnabled) {
subsetLevels <- levels(.data$subset)
}
for (s in subsetLevels) {
for (g in levels(.data$group)) {
if (!is.na(s)) {
indices <- which(.data$subset == s & .data$group == g)
} else {
indices <- which(.data$group == g)
}
.assertValuesAreStrictlyIncreasing(.data$overallSampleSize[indices],
paste0("overallSampleSizes", g),
endingNasAllowed = TRUE
)
.data$sampleSize[indices] <<- .getStageWiseSampleSizes(.data$overallSampleSize[indices])
.data$mean[indices] <<- .getStageWiseMeans(
.data$sampleSize[indices],
.data$overallSampleSize[indices], .data$overallMean[indices]
)
.data$stDev[indices] <<- .getStageWiseStDevs(
.data$overallStDev[indices], .data$sampleSize[indices],
.data$overallSampleSize[indices], .data$mean[indices], .data$overallMean[indices]
)
}
}
.setDataToVariables()
},
getRandomData = function() {
return(.getRandomDataMeans(.self))
}
)
)
#' @examples
#'
#' datasetExample <- getDataset(
#' means1 = c(112.3, 105.1, 121.3),
#' means2 = c(98.1, 99.3, 100.1),
#' means3 = c(98.1, 99.3, 100.1),
#' stDevs1 = c(44.4, 42.9, 41.4),
#' stDevs2 = c(46.7, 41.1, 39.5),
#' stDevs3 = c(46.7, 41.1, 39.5),
#' n1 = c(84, 81, 82),
#' n2 = c(87, 83, 81),
#' n3 = c(87, 82, 84)
#' )
#' .getRandomDataMeans(datasetExample,
#' randomDataParamName = "outcome", numberOfVisits = 3,
#' fixedCovariates = list(gender = c("f", "m"), bmi = c(17, 40))
#' )
#'
#' @noRd
#'
.getRandomDataMeans <- function(dataset, ...,
treatmentName = "Treatment group",
controlName = "Control group",
randomDataParamName = "randomData",
numberOfVisits = 1L,
fixedCovariates = NULL,
covariateEffects = NULL,
seed = NA_real_) {
if (!is.null(fixedCovariates)) {
if (!is.list(fixedCovariates)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote("fixedCovariates"), " must be a named list")
}
}
if (!is.null(covariateEffects)) {
if (!is.list(covariateEffects)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote("covariateEffects"), " must be a named list")
}
}
.assertIsSingleCharacter(treatmentName, "treatmentName")
.assertIsSingleCharacter(controlName, "controlName")
.assertIsSingleCharacter(randomDataParamName, "randomDataParamName")
.assertIsSinglePositiveInteger(numberOfVisits, "numberOfVisits", validateType = FALSE)
.assertIsSingleNumber(seed, "seed", naAllowed = TRUE)
seed <- .setSeed(seed)
numberOfGroups <- dataset$getNumberOfGroups()
sampleSize <- 0
for (stage in 1:dataset$getNumberOfStages()) {
for (group in 1:numberOfGroups) {
if (dataset$.enrichmentEnabled) {
for (subset in levels(dataset$.data$subset)) {
n <- dataset$getSampleSize(stage = stage, group = group, subset = subset)
if (n > sampleSize) {
sampleSize <- n
}
}
} else {
n <- dataset$getSampleSize(stage = stage, group = group)
n <- round(n / numberOfVisits)
if (n > sampleSize) {
sampleSize <- n
}
}
}
}
idFactor <- 10^nchar(as.character(sampleSize))
data <- NULL
for (stage in 1:dataset$getNumberOfStages()) {
for (group in 1:numberOfGroups) {
for (visit in 1:numberOfVisits) {
if (dataset$.enrichmentEnabled) {
for (subset in levels(dataset$.data$subset)) {
n <- dataset$getSampleSize(stage = stage, group = group, subset = subset)
randomData <- stats::rnorm(
n = n,
mean = dataset$getMean(stage = stage, group = group, subset = subset),
sd = dataset$getStDev(stage = stage, group = group, subset = subset)
)
row <- data.frame(
subject = idFactor * group + c(1:n),
stage = rep(stage, n),
group = rep(group, n),
subset = rep(subset, n),
randomData = randomData
)
if (is.null(data)) {
data <- row
} else {
data <- rbind(data, row)
}
}
} else {
n <- dataset$getSampleSize(stage = stage, group = group)
n <- floor(n / numberOfVisits)
randomData <- stats::rnorm(
n = sampleSize,
mean = dataset$getMean(stage = stage, group = group),
sd = dataset$getStDev(stage = stage, group = group)
)
subjectIds <- (idFactor * 10 * stage) + (idFactor * group) + c(1:sampleSize)
indices <- 1:sampleSize
randomDataBefore <- NULL
numberOfDropOutsBefore <- 0
if (visit > 1 && !is.null(data)) {
randomDataBefore <- data$randomData[data$stage == visit - 1 & data$subject %in% subjectIds]
numberOfDropOutsBefore <- sum(is.na(randomDataBefore))
indices <- which(!is.na(randomDataBefore))
}
sampleSizeBefore <- sampleSize - numberOfDropOutsBefore
if (n < sampleSizeBefore) {
numberOfDropOuts <- sampleSizeBefore - n
dropOuts <- sample(c(rep(1, n - numberOfDropOuts), rep(0, numberOfDropOuts)))
randomData[indices[dropOuts == 0]] <- NA_real_
if (!is.null(randomDataBefore)) {
randomData[is.na(randomDataBefore)] <- NA_real_
}
}
row <- data.frame(
subject = subjectIds,
stage = rep(stage, sampleSize),
group = rep(group, sampleSize),
visit = rep(visit - 1, sampleSize),
randomData = randomData
)
if (is.null(data)) {
data <- row
} else {
data <- rbind(data, row)
}
}
}
}
}
data$stage <- factor(data$stage)
groupLevels <- paste(treatmentName, c(1:numberOfGroups))
if (numberOfGroups > 1) {
if (numberOfGroups == 2) {
groupLevels[1] <- treatmentName
}
groupLevels[numberOfGroups] <- controlName
}
data$group <- factor(data$group, labels = groupLevels)
if (dataset$.enrichmentEnabled) {
data$subset <- factor(data$subset)
}
if (!is.null(randomDataParamName) && length(randomDataParamName) == 1 && !is.na(randomDataParamName)) {
colNames <- colnames(data)
colNames[colNames == "randomData"] <- randomDataParamName
colnames(data) <- colNames
}
if (!is.null(fixedCovariates)) {
fixedCovariateNames <- names(fixedCovariates)
if (is.null(fixedCovariateNames) || any(nchar(trimws(fixedCovariateNames)) == 0)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote("fixedCovariates"), " must be a named list")
}
subjects <- sort(unique(data$subject))
for (fixedCovariateName in fixedCovariateNames) {
data[[fixedCovariateName]] <- rep(NA, nrow(data))
values <- fixedCovariates[[fixedCovariateName]]
if (is.null(values) || length(values) < 2 || any(is.na(values))) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(paste0("fixedCovariates$", fixedCovariateName)),
" (", .arrayToString(values), ") must be a valid numeric or character vector with a minimum of 2 values"
)
}
if (is.character(values)) {
if (length(unique(values)) < length(values)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, sQuote(paste0("fixedCovariates$", fixedCovariateName)),
" (", .arrayToString(values, maxLength = 20), ") must be a unique vector"
)
}
fixedCovariateSample <- sample(values, length(subjects), replace = TRUE)
for (i in 1:length(subjects)) {
data[[fixedCovariateName]][data$subject == subjects[i]] <- fixedCovariateSample[i]
}
} else if (is.numeric(values)) {
if (length(values) == 2) {
minValue <- min(values)
maxValue <- max(values)
covMean <- runif(1, minValue, maxValue)
covSD <- covMean * 0.1
showMessage <- TRUE
for (i in 1:length(subjects)) {
groupName <- as.character(data$group[data$subject == subjects[i]])[1]
covEffect <- 1
if (groupName == controlName && !is.null(covariateEffects)) {
covEffect <- covariateEffects[[fixedCovariateName]]
if (is.null(covEffect)) {
covEffect <- 1
} else {
.assertIsNumericVector(covEffect, paste0("covariateEffects$", fixedCovariateName))
if (showMessage) {
message(
"Add effect ", covEffect, " to ",
sQuote(fixedCovariateName), " of ", sQuote(groupName)
)
showMessage <- FALSE
}
}
}
continuesExample <- rnorm(sum(data$subject == subjects[i]), covMean * covEffect, covSD)
data[[fixedCovariateName]][data$subject == subjects[i]] <- continuesExample
}
}
}
}
}
data$seed <- rep(seed, nrow(data))
return(data)
}
#'
#' @title
#' Dataset Plotting
#'
#' @description
#' Plots a dataset.
#'
#' @param x The \code{\link{Dataset}} object to plot.
#' @param y Not available for this kind of plot (is only defined to be compatible
#' to the generic plot function).
#' @param main The main title, default is \code{"Dataset"}.
#' @param xlab The x-axis label, default is \code{"Stage"}.
#' @param ylab The y-axis label.
#' @param legendTitle The legend title, default is \code{"Group"}.
#' @inheritParams param_palette
#' @inheritParams param_showSource
#' @inheritParams param_plotSettings
#' @inheritParams param_three_dots_plot
#'
#' @details
#' Generic function to plot all kinds of datasets.
#'
#' @template return_object_ggplot
#'
#' @examples
#' # Plot a dataset of means
#' dataExample <- getDataset(
#' n1 = c(22, 11, 22, 11),
#' n2 = c(22, 13, 22, 13),
#' means1 = c(1, 1.1, 1, 1),
#' means2 = c(1.4, 1.5, 3, 2.5),
#' stDevs1 = c(1, 2, 2, 1.3),
#' stDevs2 = c(1, 2, 2, 1.3)
#' )
#' \dontrun{
#' if (require(ggplot2)) plot(dataExample, main = "Comparison of Means")
#' }
#'
#' # Plot a dataset of rates
#' dataExample <- getDataset(
#' n1 = c(8, 10, 9, 11),
#' n2 = c(11, 13, 12, 13),
#' events1 = c(3, 5, 5, 6),
#' events2 = c(8, 10, 12, 12)
#' )
#' \dontrun{
#' if (require(ggplot2)) plot(dataExample, main = "Comparison of Rates")
#' }
#'
#' @export
#'
plot.Dataset <- function(x, y, ..., main = "Dataset", xlab = "Stage", ylab = NA_character_,
legendTitle = "Group", palette = "Set1", showSource = FALSE, plotSettings = NULL) {
if (x$.enrichmentEnabled) {
stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "plot of enrichment data is not implemented yet")
}
.assertGgplotIsInstalled()
if (x$isDatasetMeans()) {
data <- x$getRandomData()
if (is.na(ylab)) {
ylab <- "Random data"
}
} else if (x$isDatasetRates()) {
data <- x$.data
if (is.na(ylab)) {
ylab <- "Frequency (Events and Sample Size)"
}
} else if (x$isDatasetSurvival()) {
# Open work: implement dataset plot of survival data
stop(C_EXCEPTION_TYPE_RUNTIME_ISSUE, "plot of survival data is not implemented yet")
}
if (!is.logical(showSource) || isTRUE(showSource)) {
warning("'showSource' != FALSE is not implemented yet for class ", .getClassName(x))
}
if (is.null(plotSettings)) {
plotSettings <- x$getPlotSettings()
}
if (x$getNumberOfGroups() == 1) {
if (x$isDatasetMeans()) {
p <- ggplot2::ggplot(
data = data,
ggplot2::aes(y = .data[["randomData"]], x = factor(.data[["stage"]]))
)
p <- p + ggplot2::geom_boxplot(ggplot2::aes(fill = .data[["stage"]]))
p <- p + ggplot2::geom_point(
colour = "#0e414e", shape = 20,
position = ggplot2::position_jitter(width = .1),
size = plotSettings$pointSize
)
p <- p + ggplot2::stat_summary(
fun = "mean", geom = "point",
shape = 21, position = ggplot2::position_dodge(.75), size = 4, fill = "white",
colour = "black", show.legend = FALSE
)
} else if (x$isDatasetRates()) {
p <- ggplot2::ggplot(show.legend = FALSE)
# plot sample size
p <- p + ggplot2::geom_bar(
data = data,
ggplot2::aes(
y = .data[["sampleSize"]],
x = factor(.data[["stage"]]), fill = factor(.data[["stage"]])
),
position = "dodge", stat = "identity", alpha = 0.4
)
# plot events
p <- p + ggplot2::geom_bar(
data = data,
ggplot2::aes(
y = .data[["event"]], x = factor(.data[["stage"]]),
fill = factor(.data[["stage"]])
),
position = "dodge", stat = "identity"
)
} else if (x$isDatasetSurvival()) {
# implement survival plot here
}
} else {
data$stageGroup <- interaction(data$stage, data$group)
if (x$isDatasetMeans()) {
p <- ggplot2::ggplot(ggplot2::aes(
y = .data[["randomData"]], x = factor(.data[["stage"]]),
fill = factor(.data[["group"]])
), data = data)
p <- p + ggplot2::geom_point(ggplot2::aes(colour = .data[["group"]]),
shape = 20,
position = ggplot2::position_dodge(.75),
size = plotSettings$pointSize
)
p <- p + ggplot2::geom_boxplot()
p <- p + ggplot2::stat_summary(ggplot2::aes(colour = .data[["group"]]),
fun = "mean", geom = "point",
shape = 21, position = ggplot2::position_dodge(.75), size = 4, fill = "white",
show.legend = FALSE
)
} else if (x$isDatasetRates()) {
p <- ggplot2::ggplot(show.legend = FALSE)
# plot sample size
p <- p + ggplot2::geom_bar(
ggplot2::aes(
y = .data[["sampleSize"]],
x = factor(.data[["stage"]]), fill = factor(.data[["group"]])
),
data = data, position = "dodge", stat = "identity", alpha = 0.4
)
# plot events
p <- p + ggplot2::geom_bar(
data = data,
ggplot2::aes(
y = .data[["event"]], x = factor(.data[["stage"]]),
fill = factor(.data[["group"]])
),
position = "dodge", stat = "identity"
)
} else if (x$isDatasetSurvival()) {
# implement survival plot here
}
}
# hide second legend
if (x$getNumberOfGroups() == 1) {
p <- p + ggplot2::guides(fill = FALSE, colour = FALSE)
} else {
p <- p + ggplot2::guides(colour = FALSE)
}
# set theme
p <- plotSettings$setTheme(p)
# p <- designSet$getPlotSettings()$hideGridLines(p)
# set main title
p <- plotSettings$setMainTitle(p, main)
# set axes labels
p <- plotSettings$setAxesLabels(p, xlab = xlab, ylab = ylab)
# set legend
if (x$getNumberOfGroups() > 1) {
p <- plotSettings$setLegendPosition(p, legendPosition = C_POSITION_OUTSIDE_PLOT)
p <- plotSettings$setLegendBorder(p)
p <- plotSettings$setLegendTitle(p, legendTitle, mode = "fill")
p <- plotSettings$setLegendLabelSize(p)
}
p <- plotSettings$setAxesAppearance(p)
p <- plotSettings$setColorPalette(p, palette, mode = "all")
p <- plotSettings$enlargeAxisTicks(p)
companyAnnotationEnabled <- .getOptionalArgument("companyAnnotationEnabled", ...)
if (is.null(companyAnnotationEnabled) || !is.logical(companyAnnotationEnabled)) {
companyAnnotationEnabled <- FALSE
}
p <- plotSettings$addCompanyAnnotation(p, enabled = companyAnnotationEnabled)
p
}
#'
#' @name DatasetRates
#'
#' @title
#' Dataset of Rates
#'
#' @description
#' Class for a dataset of rates.
#'
#' @template field_groups
#' @template field_stages
#' @template field_sampleSizes
#' @template field_overallSampleSizes
#' @template field_events
#' @template field_overallEvents
#'
#' @details
#' This object cannot be created directly; better use \code{\link{getDataset}}
#' with suitable arguments to create a dataset of rates.
#'
#' @include class_core_parameter_set.R
#' @include class_core_plot_settings.R
#' @include f_core_constants.R
#'
#' @keywords internal
#'
#' @importFrom methods new
#'
DatasetRates <- setRefClass("DatasetRates",
contains = "Dataset",
fields = list(
sampleSizes = "numeric",
events = "numeric",
overallSampleSizes = "numeric",
overallEvents = "numeric"
),
methods = list(
getSampleSize = function(stage, group = 1, subset = NA_character_) {
return(.data$sampleSize[.getIndices(stage = stage, group = group, subset = subset)])
},
getSampleSizes = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$sampleSize[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getSampleSizesUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$sampleSize[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getEvent = function(stage, group = 1, subset = NA_character_) {
return(.data$event[.getIndices(stage = stage, group = group, subset = subset)])
},
getEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$event[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getEventsUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$event[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getOverallSampleSize = function(stage, group = 1, subset = NA_character_) {
return(.data$overallSampleSize[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallSampleSizes = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallSampleSize[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallSampleSizesUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallSampleSize[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getOverallEvent = function(stage, group = 1, subset = NA_character_) {
return(.data$overallEvent[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallEvent[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallEventsUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallEvent[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
.initByDataFrame = function(dataFrame) {
callSuper(dataFrame)
# case: one rate - stage wise
if (.paramExists(dataFrame, C_KEY_WORDS_SAMPLE_SIZES)) {
.inputType <<- "stagewise"
sampleSizes <<- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame, C_KEY_WORDS_SAMPLE_SIZES),
parameterName = "Sample sizes"
)
.validateValues(sampleSizes, "n")
if (any(stats::na.omit(sampleSizes) <= 0)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"all sample sizes must be > 0, but 'n' = ",
.arrayToString(sampleSizes, vectorLookAndFeelEnabled = TRUE)
)
}
events <<- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame, C_KEY_WORDS_EVENTS),
parameterName = "Events"
)
.validateValues(events, "events")
if (any(stats::na.omit(events) < 0)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0, but 'events' = ",
.arrayToString(events, vectorLookAndFeelEnabled = TRUE)
)
}
kMax <- length(sampleSizes)
stageNumber <- length(stats::na.omit(sampleSizes))
dataInput <- data.frame(
sampleSizes = sampleSizes,
events = events
)
dataInput <- .getOverallData(dataInput, kMax, stage = stageNumber)
overallSampleSizes <<- dataInput$overallSampleSizes
overallEvents <<- dataInput$overallEvents
.setParameterType("sampleSizes", C_PARAM_USER_DEFINED)
.setParameterType("events", C_PARAM_USER_DEFINED)
.setParameterType("overallSampleSizes", C_PARAM_GENERATED)
.setParameterType("overallEvents", C_PARAM_GENERATED)
}
# case: one rate - overall
else if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES)) {
.inputType <<- "overall"
overallSampleSizes <<- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(
dataFrame,
C_KEY_WORDS_OVERALL_SAMPLE_SIZES
),
parameterName = "Cumulative sample sizes"
)
.validateValues(overallSampleSizes, "overallSampleSizes")
.assertValuesAreStrictlyIncreasing(overallSampleSizes, "overallSampleSizes", endingNasAllowed = TRUE)
overallEvents <<- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EVENTS),
parameterName = "Cumulative events"
)
.validateValues(overallEvents, "overallEvents")
.assertValuesAreMonotoneIncreasing(overallEvents, "overallEvents", endingNasAllowed = TRUE)
kMax <- length(overallSampleSizes)
stageNumber <- length(stats::na.omit(overallSampleSizes))
stageWiseData <- .getStageWiseData(data.frame(
overallSampleSizes = overallSampleSizes,
overallEvents = overallEvents
), kMax, stage = stageNumber)
sampleSizes <<- stageWiseData$sampleSizes
events <<- stageWiseData$events
.setParameterType("sampleSizes", C_PARAM_GENERATED)
.setParameterType("events", C_PARAM_GENERATED)
.setParameterType("overallSampleSizes", C_PARAM_USER_DEFINED)
.setParameterType("overallEvents", C_PARAM_USER_DEFINED)
}
# case: two or more rates - stage wise
else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_SAMPLE_SIZES, 1)) &&
.paramExists(dataFrame, paste0(C_KEY_WORDS_SAMPLE_SIZES, 2))) {
.inputType <<- "stagewise"
numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_SAMPLE_SIZES)
stages <<- rep(stages, numberOfTreatmentGroups)
groups <<- integer(0)
sampleSizes <<- numeric(0)
events <<- numeric(0)
overallSampleSizes <<- numeric(0)
overallEvents <<- numeric(0)
for (group in 1:numberOfTreatmentGroups) {
sampleSizesTemp <- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(
dataFrame, C_KEY_WORDS_SAMPLE_SIZES,
suffix = group
),
parameterName = "Sample sizes"
)
.validateValues(sampleSizesTemp, paste0("n", group))
if (any(stats::na.omit(sampleSizesTemp) <= 0)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"all sample sizes must be > 0, but 'n", group, "' = ",
.arrayToString(sampleSizesTemp, vectorLookAndFeelEnabled = TRUE)
)
}
sampleSizes <<- c(sampleSizes, sampleSizesTemp)
eventsTemp <- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame, C_KEY_WORDS_EVENTS, suffix = group),
parameterName = "Events"
)
.validateValues(eventsTemp, paste0("events", group))
if (any(stats::na.omit(eventsTemp) < 0)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0, but 'events", group, "' = ",
.arrayToString(eventsTemp, vectorLookAndFeelEnabled = TRUE)
)
}
events <<- c(events, eventsTemp)
groups <<- c(groups, rep(as.integer(group), length(sampleSizesTemp)))
kMax <- length(sampleSizesTemp)
numberOfValidStages <- length(stats::na.omit(sampleSizesTemp))
overallData <- .getOverallData(data.frame(
sampleSizes = sampleSizesTemp,
events = eventsTemp
), kMax, stage = numberOfValidStages)
overallSampleSizes <<- c(overallSampleSizes, overallData$overallSampleSizes)
overallEvents <<- c(overallEvents, overallData$overallEvents)
}
if (sum(stats::na.omit(sampleSizes) < 0) > 0) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be >= 0")
}
.setParameterType("sampleSizes", C_PARAM_USER_DEFINED)
.setParameterType("events", C_PARAM_USER_DEFINED)
.setParameterType("overallSampleSizes", C_PARAM_GENERATED)
.setParameterType("overallEvents", C_PARAM_GENERATED)
}
# case: two or more rates - overall
else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_SAMPLE_SIZES, 1)) &&
.paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_SAMPLE_SIZES, 2))) {
.inputType <<- "overall"
numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES)
stages <<- rep(stages, numberOfTreatmentGroups)
groups <<- integer(0)
sampleSizes <<- numeric(0)
events <<- numeric(0)
overallSampleSizes <<- numeric(0)
overallEvents <<- numeric(0)
for (group in 1:numberOfTreatmentGroups) {
overallSampleSizesTemp <- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(
dataFrame, C_KEY_WORDS_OVERALL_SAMPLE_SIZES,
suffix = group
),
parameterName = "Cumulative sample sizes"
)
.validateValues(overallSampleSizesTemp, paste0("overallSampleSizes", group))
.assertValuesAreStrictlyIncreasing(overallSampleSizesTemp,
paste0("overallSampleSizes", group),
endingNasAllowed = TRUE
)
overallSampleSizes <<- c(overallSampleSizes, overallSampleSizesTemp)
overallEventsTemp <- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame,
C_KEY_WORDS_OVERALL_EVENTS,
suffix = group
),
parameterName = "Cumulative events"
)
.validateValues(overallEventsTemp, paste0("overallEvents", group))
.assertValuesAreMonotoneIncreasing(overallEventsTemp,
paste0("overallEvents", group),
endingNasAllowed = TRUE
)
overallEvents <<- c(overallEvents, overallEventsTemp)
groups <<- c(groups, rep(as.integer(group), length(overallSampleSizesTemp)))
kMax <- length(overallSampleSizesTemp)
numberOfValidStages <- length(stats::na.omit(overallSampleSizesTemp))
stageWiseData <- .getStageWiseData(data.frame(
overallSampleSizes = overallSampleSizesTemp,
overallEvents = overallEventsTemp
), kMax, stage = numberOfValidStages)
validatedSampleSizes <- stageWiseData$sampleSizes
.validateValues(validatedSampleSizes, paste0("n", group))
sampleSizes <<- c(sampleSizes, validatedSampleSizes)
events <<- c(events, stageWiseData$events)
if (sum(stats::na.omit(sampleSizes) < 0) > 0) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all sample sizes must be >= 0")
}
}
.setParameterType("sampleSizes", C_PARAM_GENERATED)
.setParameterType("events", C_PARAM_GENERATED)
.setParameterType("overallSampleSizes", C_PARAM_USER_DEFINED)
.setParameterType("overallEvents", C_PARAM_USER_DEFINED)
} else {
stop(
C_EXCEPTION_TYPE_MISSING_ARGUMENT,
"sample sizes are missing or not correctly specified"
)
}
if (sum(stats::na.omit(events) < 0) > 0) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0")
}
.recreateDataFrame()
if (.enrichmentEnabled) {
.createOverallDataEnrichment()
}
},
.recreateDataFrame = function() {
callSuper()
.data <<- cbind(.data, data.frame(
sampleSize = sampleSizes,
event = events,
overallSampleSize = overallSampleSizes,
overallEvent = overallEvents
))
.orderDataByStageAndGroup()
.setDataToVariables()
},
.setDataToVariables = function() {
callSuper()
sampleSizes <<- .data$sampleSize
events <<- .data$event
overallSampleSizes <<- .data$overallSampleSize
overallEvents <<- .data$overallEvent
},
.fillWithNAs = function(kMax) {
callSuper(kMax)
n <- .getNumberOfNAsToAdd(kMax)
sampleSizes <<- c(sampleSizes, rep(NA_real_, n))
events <<- c(events, rep(NA_real_, n))
overallSampleSizes <<- c(overallSampleSizes, rep(NA_real_, n))
overallEvents <<- c(overallEvents, rep(NA_real_, n))
.recreateDataFrame()
},
.trim = function(kMax = NA_integer_) {
indices <- callSuper(kMax)
if (length(indices) == 0) {
return(invisible(FALSE))
}
sampleSizes <<- sampleSizes[indices]
events <<- events[indices]
overallSampleSizes <<- overallSampleSizes[indices]
overallEvents <<- overallEvents[indices]
.recreateDataFrame()
return(invisible(TRUE))
},
getRandomData = function() {
data <- NULL
for (stage in 1:getNumberOfStages()) {
for (group in 1:getNumberOfGroups()) {
if (.enrichmentEnabled) {
for (subset in levels(.data$subset)) {
n <- getSampleSize(stage = stage, group = group, subset = subset)
numberOfEvents <- getEvent(stage = stage, group = group, subset = subset)
randomIndizes <- sample(x = c(1:n), size = numberOfEvents, replace = FALSE)
randomData <- rep(0, n)
randomData[randomIndizes] <- 1
row <- data.frame(
stage = stage,
group = group,
subset = subset,
randomData = randomData
)
if (is.null(data)) {
data <- row
} else {
data <- rbind(data, row)
}
}
} else {
n <- getSampleSize(stage = stage, group = group)
numberOfEvents <- getEvent(stage = stage, group = group)
randomIndizes <- sample(x = c(1:n), size = numberOfEvents, replace = FALSE)
randomData <- rep(0, n)
randomData[randomIndizes] <- 1
row <- data.frame(
stage = stage,
group = group,
randomData = randomData
)
if (is.null(data)) {
data <- row
} else {
data <- rbind(data, row)
}
}
}
}
data$stage <- factor(data$stage)
data$group <- factor(data$group, label = paste("Group", c(1:getNumberOfGroups())))
return(data)
},
.createOverallDataEnrichment = function() {
if (!.enrichmentEnabled) {
return(invisible())
}
.data$overallSampleSize <<- rep(NA_real_, nrow(.data))
.data$overallEvent <<- rep(NA_real_, nrow(.data))
for (s in levels(.data$subset)) {
for (g in levels(.data$group)) {
indices <- which(.data$subset == s & .data$group == g)
.data$overallSampleSize[indices] <<- cumsum(.data$sampleSize[indices])
.data$overallEvent[indices] <<- cumsum(.data$event[indices])
}
}
.setDataToVariables()
},
.getOverallData = function(dataInput, kMax, stage) {
"Calculates cumulative values if stage-wise data is available"
if (is.null(dataInput[["sampleSizes"]])) {
stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "data input must contain variable 'sampleSizes'")
}
if (is.null(dataInput[["events"]])) {
stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "data input must contain variable 'events'")
}
dataInput$overallSampleSizes <- c(
cumsum(dataInput$sampleSizes[1:stage]),
rep(NA_real_, kMax - stage)
)
dataInput$overallEvents <- c(
cumsum(dataInput$events[1:stage]),
rep(NA_real_, kMax - stage)
)
return(dataInput)
},
.getStageWiseData = function(dataInput, kMax, stage) {
"Calculates stage-wise values if cumulative data is available"
if (is.null(dataInput[["overallSampleSizes"]])) {
stop(
C_EXCEPTION_TYPE_MISSING_ARGUMENT,
"data input must contain variable 'overallSampleSizes'"
)
}
if (is.null(dataInput[["overallEvents"]])) {
stop(
C_EXCEPTION_TYPE_MISSING_ARGUMENT,
"data input must contain variable 'overallEvents'"
)
}
dataInput$sampleSizes <- c(dataInput$overallSampleSizes[1:stage], rep(NA_real_, kMax - stage))
if (stage > 1) {
dataInput$sampleSizes[2:stage] <- dataInput$overallSampleSizes[2:stage] -
dataInput$overallSampleSizes[1:(stage - 1)]
}
dataInput$events <- c(dataInput$overallEvents[1:stage], rep(NA_real_, kMax - stage))
if (stage > 1) {
dataInput$events[2:stage] <- dataInput$overallEvents[2:stage] -
dataInput$overallEvents[1:(stage - 1)]
}
return(dataInput)
}
)
)
#'
#' @name DatasetSurvival
#'
#' @title
#' Dataset of Survival Data
#'
#' @description
#' Class for a dataset of survival data.
#'
#' @template field_groups
#' @template field_stages
#' @template field_events
#' @template field_overallEvents
#' @template field_allocationRatios
#' @template field_overallAllocationRatios
#' @template field_logRanks
#' @template field_overallLogRanks
#'
#'
#' @details
#' This object cannot be created directly; better use \code{\link{getDataset}}
#' with suitable arguments to create a dataset of survival data.
#'
#' @include class_core_parameter_set.R
#' @include class_core_plot_settings.R
#' @include f_core_constants.R
#'
#' @keywords internal
#'
#' @importFrom methods new
#'
DatasetSurvival <- setRefClass("DatasetSurvival",
contains = "Dataset",
fields = list(
overallEvents = "numeric",
overallAllocationRatios = "numeric",
overallLogRanks = "numeric",
events = "numeric",
allocationRatios = "numeric",
logRanks = "numeric"
),
methods = list(
getEvent = function(stage, group = 1, subset = NA_character_) {
return(.data$event[.getIndices(stage = stage, group = group, subset = subset)])
},
getEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$event[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getEventsUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$event[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getAllocationRatio = function(stage, group = 1, subset = NA_character_) {
return(.data$allocationRatio[.getIndices(stage = stage, group = group, subset = subset)])
},
getAllocationRatios = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$allocationRatio[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getAllocationRatiosUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$allocationRatio[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getLogRank = function(stage, group = 1, subset = NA_character_) {
return(.data$logRank[.getIndices(stage = stage, group = group, subset = subset)])
},
getLogRanks = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$logRank[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getLogRanksUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$logRank[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getOverallEvent = function(stage, group = 1, subset = NA_character_) {
return(.data$overallEvent[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallEvent[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallEventsUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallEvent[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getOverallAllocationRatio = function(stage, group = 1, subset = NA_character_) {
return(.data$overallAllocationRatio[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallAllocationRatios = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallAllocationRatio[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallAllocationRatiosUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallAllocationRatio[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getOverallLogRank = function(stage, group = 1, subset = NA_character_) {
return(.data$overallLogRank[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallLogRanks = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallLogRank[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallLogRanksUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallLogRank[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
.getAllocationRatioDefaultValues = function(stages, events, logRanks) {
allocationRatioDefaultValues <- rep(C_ALLOCATION_RATIO_DEFAULT, length(stages))
indices <- which(is.na(events) | is.na(logRanks))
allocationRatioDefaultValues[indices] <- NA_real_
return(allocationRatioDefaultValues)
},
.initByDataFrame = function(dataFrame) {
callSuper(dataFrame)
if (inherits(.self, "DatasetEnrichmentSurvival")) {
if (.paramExists(dataFrame, C_KEY_WORDS_EXPECTED_EVENTS) ||
.paramExists(dataFrame, C_KEY_WORDS_VARIANCE_EVENTS)) {
.inputType <<- "stagewise"
events <<- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame, C_KEY_WORDS_EVENTS),
parameterName = "Events"
)
.validateValues(events, "events")
allocationRatios <<- .getValuesByParameterName(
dataFrame, C_KEY_WORDS_ALLOCATION_RATIOS,
defaultValues = .getAllocationRatioDefaultValues(stages, events, expectedEvents)
)
.validateValues(allocationRatios, "allocationRatios")
} else if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_EXPECTED_EVENTS) ||
.paramExists(dataFrame, C_KEY_WORDS_OVERALL_VARIANCE_EVENTS)) {
.inputType <<- "overall"
overallEvents <<- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EVENTS),
parameterName = "Cumulative events"
)
.validateValues(overallEvents, "overallEvents")
overallAllocationRatios <<- .getValuesByParameterName(
dataFrame,
parameterNameVariants = C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
defaultValues = .getAllocationRatioDefaultValues(stages, overallEvents, overallExpectedEvents)
)
.validateValues(overallAllocationRatios, "overallAllocationRatios")
}
# stratified enrichment: do nothing more here
}
# case: survival, two groups - overall
else if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_LOG_RANKS)) {
.inputType <<- "overall"
overallEvents <<- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EVENTS),
parameterName = "Cumulative events"
)
.validateValues(overallEvents, "overallEvents")
if (!.enrichmentEnabled) {
.assertValuesAreStrictlyIncreasing(overallEvents, "overallEvents", endingNasAllowed = TRUE)
}
overallLogRanks <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_LOG_RANKS)
.validateValues(overallLogRanks, "overallLogRanks")
overallAllocationRatios <<- .getValuesByParameterName(
dataFrame,
parameterNameVariants = C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
defaultValues = .getAllocationRatioDefaultValues(stages, overallEvents, overallLogRanks)
)
.validateValues(overallAllocationRatios, "overallAllocationRatios")
.setParameterType("groups", C_PARAM_NOT_APPLICABLE)
}
# case: survival, two groups - stage wise
else if (.paramExists(dataFrame, C_KEY_WORDS_LOG_RANKS)) {
.inputType <<- "stagewise"
events <<- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
dataFrame, C_KEY_WORDS_EVENTS
), parameterName = "Events")
.validateValues(events, "events")
if (any(stats::na.omit(events) < 0)) {
stop(C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0")
}
logRanks <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_LOG_RANKS)
.validateValues(logRanks, "logRanks")
allocationRatios <<- .getValuesByParameterName(
dataFrame, C_KEY_WORDS_ALLOCATION_RATIOS,
defaultValues = .getAllocationRatioDefaultValues(stages, events, logRanks)
)
.validateValues(allocationRatios, "allocationRatios")
.setParameterType("groups", C_PARAM_NOT_APPLICABLE)
}
# case: survival, three ore more groups - overall
else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_LOG_RANKS, 1)) &&
.paramExists(dataFrame, paste0(C_KEY_WORDS_OVERALL_LOG_RANKS, 2))) {
.inputType <<- "overall"
numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_OVERALL_LOG_RANKS)
stages <<- rep(stages, numberOfTreatmentGroups)
groups <<- integer(0)
overallEvents <<- numeric(0)
overallAllocationRatios <<- numeric(0)
overallLogRanks <<- numeric(0)
for (group in 1:numberOfTreatmentGroups) {
overallEventsTemp <- .getValuesByParameterName(dataFrame,
C_KEY_WORDS_OVERALL_EVENTS,
suffix = group
)
.validateValues(overallEventsTemp, paste0("overallEvents", group))
if (is.null(dataFrame[["subset"]]) || length(unique(dataFrame[["subset"]])) <= 1) {
.assertValuesAreStrictlyIncreasing(overallEventsTemp,
paste0("overallEvents", group),
endingNasAllowed = TRUE
)
}
overallEvents <<- c(overallEvents, overallEventsTemp)
overallLogRanksTemp <- .getValuesByParameterName(
dataFrame, C_KEY_WORDS_OVERALL_LOG_RANKS,
suffix = group
)
.validateValues(overallLogRanksTemp, paste0("overallLogRanks", group))
overallLogRanks <<- c(overallLogRanks, overallLogRanksTemp)
overallAllocationRatiosTemp <- .getValuesByParameterName(
dataFrame, C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
suffix = group,
defaultValues = .getAllocationRatioDefaultValues(
overallEventsTemp,
overallEventsTemp, overallLogRanksTemp
)
)
.validateValues(overallAllocationRatiosTemp, paste0("overallAllocationRatios", group))
overallAllocationRatios <<- c(overallAllocationRatios, overallAllocationRatiosTemp)
groups <<- c(groups, rep(as.integer(group), length(overallLogRanksTemp)))
}
}
# case: survival, three ore more groups - stage wise
else if (.paramExists(dataFrame, paste0(C_KEY_WORDS_LOG_RANKS, 1)) &&
.paramExists(dataFrame, paste0(C_KEY_WORDS_LOG_RANKS, 2))) {
.inputType <<- "stagewise"
numberOfTreatmentGroups <- .getNumberOfGroups(dataFrame, C_KEY_WORDS_LOG_RANKS)
stages <<- rep(stages, numberOfTreatmentGroups)
groups <<- integer(0)
events <<- numeric(0)
allocationRatios <<- numeric(0)
logRanks <<- numeric(0)
for (group in 1:numberOfTreatmentGroups) {
eventsTemp <- .getValidatedFloatingPointNumbers(.getValuesByParameterName(
dataFrame, C_KEY_WORDS_EVENTS,
suffix = group
), parameterName = "Events")
if (any(stats::na.omit(eventsTemp) < 0)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT, "all events must be >= 0, but 'events", group, "' = ",
.arrayToString(eventsTemp, vectorLookAndFeelEnabled = TRUE)
)
}
events <<- c(events, eventsTemp)
logRanksTemp <- .getValuesByParameterName(
dataFrame, C_KEY_WORDS_LOG_RANKS,
suffix = group
)
.validateValues(logRanksTemp, paste0("n", group))
logRanks <<- c(logRanks, logRanksTemp)
allocationRatiosTemp <- .getValuesByParameterName(
dataFrame, C_KEY_WORDS_ALLOCATION_RATIOS,
suffix = group,
defaultValues = .getAllocationRatioDefaultValues(
eventsTemp,
eventsTemp, logRanksTemp
)
)
.validateValues(allocationRatiosTemp, paste0("allocationRatios", group))
allocationRatios <<- c(allocationRatios, allocationRatiosTemp)
groups <<- c(groups, rep(as.integer(group), length(eventsTemp)))
}
} else {
stop(
C_EXCEPTION_TYPE_RUNTIME_ISSUE, "unable to identify case for ", .getClassName(.self), " and columns ",
.arrayToString(colnames(dataFrame))
)
}
if (.inputType == "stagewise") {
n <- length(events)
overallEvents <<- rep(NA_real_, n)
overallAllocationRatios <<- rep(NA_real_, n)
overallLogRanks <<- rep(NA_real_, n)
.setParameterType("events", C_PARAM_USER_DEFINED)
.setParameterType("allocationRatios", C_PARAM_USER_DEFINED)
if (!inherits(.self, "DatasetEnrichmentSurvival")) {
.setParameterType("logRanks", C_PARAM_USER_DEFINED)
}
.setParameterType("overallEvents", C_PARAM_GENERATED)
.setParameterType("overallAllocationRatios", C_PARAM_GENERATED)
if (!inherits(.self, "DatasetEnrichmentSurvival")) {
.setParameterType("overallLogRanks", C_PARAM_GENERATED)
}
if (!inherits(.self, "DatasetEnrichmentSurvival")) {
.recreateDataFrame()
.createOverallData()
}
} else {
n <- length(overallEvents)
events <<- rep(NA_real_, n)
allocationRatios <<- rep(NA_real_, n)
logRanks <<- rep(NA_real_, n)
.setParameterType("events", C_PARAM_GENERATED)
.setParameterType("allocationRatios", C_PARAM_GENERATED)
if (!inherits(.self, "DatasetEnrichmentSurvival")) {
.setParameterType("logRanks", C_PARAM_GENERATED)
}
.setParameterType("overallEvents", C_PARAM_USER_DEFINED)
.setParameterType("overallAllocationRatios", C_PARAM_USER_DEFINED)
if (!inherits(.self, "DatasetEnrichmentSurvival")) {
.setParameterType("overallLogRanks", C_PARAM_USER_DEFINED)
}
if (!inherits(.self, "DatasetEnrichmentSurvival")) {
.recreateDataFrame()
.createStageWiseData()
}
}
},
.recreateDataFrame = function() {
callSuper()
if (inherits(.self, "DatasetEnrichmentSurvival")) {
.data <<- cbind(.data, data.frame(
overallEvent = overallEvents,
overallExpectedEvent = overallExpectedEvents,
overallVarianceEvent = overallVarianceEvents,
overallAllocationRatio = overallAllocationRatios,
event = events,
expectedEvent = expectedEvents,
# varianceEvent = varianceEvents, # maybe implemented later
allocationRatio = allocationRatios
))
} else {
.data <<- cbind(.data, data.frame(
overallEvent = overallEvents,
overallAllocationRatio = overallAllocationRatios,
overallLogRank = overallLogRanks,
event = events,
allocationRatio = allocationRatios,
logRank = logRanks
))
}
.orderDataByStageAndGroup()
.setDataToVariables()
},
.setDataToVariables = function() {
callSuper()
overallEvents <<- .data$overallEvent
overallAllocationRatios <<- .data$overallAllocationRatio
events <<- .data$event
allocationRatios <<- .data$allocationRatio
if (!inherits(.self, "DatasetEnrichmentSurvival")) {
overallLogRanks <<- .data$overallLogRank
logRanks <<- .data$logRank
}
},
.fillWithNAs = function(kMax) {
callSuper(kMax)
n <- .getNumberOfNAsToAdd(kMax)
overallEvents <<- c(overallEvents, rep(NA_real_, n))
overallAllocationRatios <<- c(overallAllocationRatios, rep(NA_real_, n))
overallLogRanks <<- c(overallLogRanks, rep(NA_real_, n))
events <<- c(events, rep(NA_real_, n))
allocationRatios <<- c(allocationRatios, rep(NA_real_, n))
logRanks <<- c(logRanks, rep(NA_real_, n))
.recreateDataFrame()
},
.trim = function(kMax = NA_integer_) {
indices <- callSuper(kMax)
if (length(indices) == 0) {
return(invisible(FALSE))
}
events <<- events[indices]
allocationRatios <<- allocationRatios[indices]
logRanks <<- logRanks[indices]
overallEvents <<- overallEvents[indices]
overallAllocationRatios <<- overallAllocationRatios[indices]
overallLogRanks <<- overallLogRanks[indices]
.recreateDataFrame()
return(invisible(TRUE))
},
getRandomData = function() {
stop(
C_EXCEPTION_TYPE_RUNTIME_ISSUE,
"the function 'DatasetSurvival.getRandomData()' is not implemented yet"
)
},
.getOverallLogRanks = function(logRanks, events, overallEvents,
kMax = length(logRanks), stage = length(logRanks)) {
result <- c(logRanks[1:stage], rep(NA_real_, kMax - stage))
if (stage == 1) {
return(result)
}
for (k in 2:stage) {
result[k] <-
(sqrt(events[k]) * logRanks[k] +
sqrt(overallEvents[k - 1]) *
result[k - 1]) / sqrt(overallEvents[k])
}
return(result)
},
.getOverallAllocationRatios = function(allocationRatios, events, overallEvents,
kMax = length(allocationRatios), stage = length(allocationRatios)) {
result <- c(
allocationRatios[1:stage],
rep(NA_real_, kMax - stage)
)
if (stage == 1) {
return(result)
}
for (k in 2:stage) {
result[k] <- (events[k] *
allocationRatios[k] + overallEvents[k - 1] *
result[k - 1]) / overallEvents[k]
}
return(result)
},
.createOverallData = function() {
.data$overallEvent <<- rep(NA_real_, nrow(.data))
if (inherits(.self, "DatasetEnrichmentSurvival")) {
.data$overallExpectedEvent <<- rep(NA_real_, nrow(.data))
.data$overallVarianceEvent <<- rep(NA_real_, nrow(.data))
} else {
.data$overallLogRank <<- rep(NA_real_, nrow(.data))
}
.data$overallAllocationRatio <<- rep(NA_real_, nrow(.data))
subsetLevels <- NA_character_
if (.enrichmentEnabled) {
subsetLevels <- levels(.data$subset)
}
for (s in subsetLevels) {
for (g in levels(.data$group)) {
if (!is.na(s)) {
indices <- which(.data$subset == s & .data$group == g)
} else {
indices <- which(.data$group == g)
}
.data$overallEvent[indices] <<- cumsum(.data$event[indices])
.data$overallExpectedEvent[indices] <<- cumsum(.data$expectedEvent[indices])
# .data$overallVarianceEvent[indices] <<- # maybe implemented later
.data$overallLogRank[indices] <<- .getOverallLogRanks(
.data$logRank[indices], .data$event[indices], .data$overallEvent[indices]
)
.data$overallAllocationRatio[indices] <<- .getOverallAllocationRatios(
.data$allocationRatio[indices], .data$event[indices], .data$overallEvent[indices]
)
}
}
.setDataToVariables()
},
.getStageWiseEvents = function(overallEvents) {
result <- overallEvents
if (length(result) == 1) {
return(result)
}
kMax <- length(result)
result[2:kMax] <- overallEvents[2:kMax] - overallEvents[1:(kMax - 1)]
return(result)
},
.getStageWiseLogRanks = function(overallLogRanks, overallEvents) {
result <- overallLogRanks
if (length(result) == 1) {
return(result)
}
kMax <- length(result)
result[2:kMax] <- (sqrt(overallEvents[2:kMax]) *
overallLogRanks[2:kMax] -
sqrt(overallEvents[1:(kMax - 1)]) *
overallLogRanks[1:(kMax - 1)]) /
sqrt(overallEvents[2:kMax] - overallEvents[1:(kMax - 1)])
return(result)
},
.getStageWiseAllocationRatios = function(overallAllocationRatios, events, overallEvents) {
result <- overallAllocationRatios
if (length(result) == 1) {
return(result)
}
kMax <- length(result)
result[2:kMax] <- (
overallAllocationRatios[2:kMax] -
overallAllocationRatios[1:(kMax - 1)] *
overallEvents[1:(kMax - 1)] / overallEvents[2:kMax]
) / (events[2:kMax] / overallEvents[2:kMax])
if (any(stats::na.omit(result) <= 0)) {
stop(
C_EXCEPTION_TYPE_ILLEGAL_ARGUMENT,
"overall allocation ratios not correctly specified: ",
"one or more calculated stage-wise allocation ratios <= 0"
)
}
return(result)
},
.createStageWiseData = function() {
"Calculates stage-wise logrank statistics, events, and allocation ratios if cumulative data is available"
.data$event <<- rep(NA_real_, nrow(.data))
if (inherits(.self, "DatasetEnrichmentSurvival")) {
.data$expectedEvent <<- rep(NA_real_, nrow(.data))
.data$varianceEvent <<- rep(NA_real_, nrow(.data))
} else {
.data$logRank <<- rep(NA_real_, nrow(.data))
}
.data$allocationRatio <<- rep(NA_real_, nrow(.data))
subsetLevels <- NA_character_
if (.enrichmentEnabled) {
subsetLevels <- levels(.data$subset)
}
for (s in subsetLevels) {
for (g in levels(.data$group)) {
if (!is.na(s)) {
indices <- which(.data$subset == s & .data$group == g)
} else {
indices <- which(.data$group == g)
}
groupNumber <- ifelse(levels(.data$group) > 1, g, "")
if (.enrichmentEnabled) {
.assertValuesAreStrictlyIncreasing(.data$overallEvent[indices],
paste0("overallEvents", groupNumber, "[subset == \"", s, "\"]"),
endingNasAllowed = TRUE
)
} else {
.assertValuesAreStrictlyIncreasing(.data$overallEvent[indices],
paste0("overallEvents", groupNumber),
endingNasAllowed = TRUE
)
}
.data$event[indices] <<- .getStageWiseEvents(.data$overallEvent[indices])
if (inherits(.self, "DatasetEnrichmentSurvival")) {
.data$expectedEvent[indices] <<- .getStageWiseEvents(.data$overallExpectedEvent[indices])
# .data$varianceEvent[indices] <<- # maybe implemented later
} else {
.data$logRank[indices] <<- .getStageWiseLogRanks(
.data$overallLogRank[indices], .data$overallEvent[indices]
)
}
.data$allocationRatio[indices] <<- .getStageWiseAllocationRatios(
.data$overallAllocationRatio[indices],
.data$event[indices], .data$overallEvent[indices]
)
}
}
.setDataToVariables()
}
)
)
#'
#' @rdname DatasetSurvival
#'
#' @keywords internal
#'
DatasetEnrichmentSurvival <- setRefClass("DatasetEnrichmentSurvival",
contains = "DatasetSurvival",
fields = list(
expectedEvents = "numeric",
varianceEvents = "numeric",
overallExpectedEvents = "numeric",
overallVarianceEvents = "numeric"
),
methods = list(
.initByDataFrame = function(dataFrame) {
callSuper(dataFrame)
if (.paramExists(dataFrame, C_KEY_WORDS_OVERALL_EXPECTED_EVENTS) ||
.paramExists(dataFrame, C_KEY_WORDS_OVERALL_VARIANCE_EVENTS)) {
if (!.paramExists(dataFrame, C_KEY_WORDS_OVERALL_EXPECTED_EVENTS)) {
stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "'overallExpectedEvents' is missing")
}
if (!.paramExists(dataFrame, C_KEY_WORDS_OVERALL_VARIANCE_EVENTS)) {
stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "'overallVarianceEvents' is missing")
}
.inputType <<- "overall"
overallEvents <<- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EVENTS),
parameterName = "Cumulative events"
)
.validateValues(overallEvents, "overallEvents")
overallExpectedEvents <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_EXPECTED_EVENTS)
.validateValues(overallExpectedEvents, "overallExpectedEvents")
overallVarianceEvents <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_OVERALL_VARIANCE_EVENTS)
.validateValues(overallVarianceEvents, "overallVarianceEvents")
overallAllocationRatios <<- .getValuesByParameterName(
dataFrame,
parameterNameVariants = C_KEY_WORDS_OVERALL_ALLOCATION_RATIOS,
defaultValues = .getAllocationRatioDefaultValues(stages, overallEvents, overallExpectedEvents)
)
.validateValues(overallAllocationRatios, "overallAllocationRatios")
} else if (.paramExists(dataFrame, C_KEY_WORDS_EXPECTED_EVENTS) ||
.paramExists(dataFrame, C_KEY_WORDS_VARIANCE_EVENTS)) {
if (!.paramExists(dataFrame, C_KEY_WORDS_EXPECTED_EVENTS)) {
stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "'expectedEvents' is missing")
}
if (!.paramExists(dataFrame, C_KEY_WORDS_VARIANCE_EVENTS)) {
stop(C_EXCEPTION_TYPE_MISSING_ARGUMENT, "'varianceEvents' is missing")
}
.inputType <<- "stagewise"
events <<- .getValidatedFloatingPointNumbers(
.getValuesByParameterName(dataFrame, C_KEY_WORDS_EVENTS),
parameterName = "Events"
)
.validateValues(events, "events")
expectedEvents <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_EXPECTED_EVENTS)
.validateValues(expectedEvents, "expectedEvents")
varianceEvents <<- .getValuesByParameterName(dataFrame, C_KEY_WORDS_VARIANCE_EVENTS)
.validateValues(varianceEvents, "varianceEvents")
allocationRatios <<- .getValuesByParameterName(
dataFrame,
parameterNameVariants = C_KEY_WORDS_ALLOCATION_RATIOS,
defaultValues = .getAllocationRatioDefaultValues(stages, events, expectedEvents)
)
.validateValues(allocationRatios, "allocationRatios")
}
.setParameterType("groups", C_PARAM_NOT_APPLICABLE)
if (.inputType == "stagewise") {
n <- length(events)
overallExpectedEvents <<- rep(NA_real_, n)
overallVarianceEvents <<- rep(NA_real_, n)
.setParameterType("events", C_PARAM_USER_DEFINED)
.setParameterType("allocationRatios", C_PARAM_USER_DEFINED)
.setParameterType("expectedEvents", C_PARAM_USER_DEFINED)
.setParameterType("varianceEvents", C_PARAM_USER_DEFINED)
.setParameterType("overallEvents", C_PARAM_GENERATED)
.setParameterType("overallAllocationRatios", C_PARAM_GENERATED)
.setParameterType("overallExpectedEvents", C_PARAM_GENERATED)
.setParameterType("overallVarianceEvents", C_PARAM_GENERATED)
.recreateDataFrame()
.createOverallData()
} else {
n <- length(overallEvents)
expectedEvents <<- rep(NA_real_, n)
varianceEvents <<- rep(NA_real_, n)
.setParameterType("events", C_PARAM_GENERATED)
.setParameterType("allocationRatios", C_PARAM_GENERATED)
.setParameterType("expectedEvents", C_PARAM_GENERATED)
.setParameterType("varianceEvents", C_PARAM_GENERATED)
.setParameterType("overallEvents", C_PARAM_USER_DEFINED)
.setParameterType("overallAllocationRatios", C_PARAM_USER_DEFINED)
.setParameterType("overallExpectedEvents", C_PARAM_USER_DEFINED)
.setParameterType("overallVarianceEvents", C_PARAM_USER_DEFINED)
.recreateDataFrame()
.createStageWiseData()
}
},
.getVisibleFieldNames = function() {
visibleFieldNames <- callSuper()
visibleFieldNames <- visibleFieldNames[!(visibleFieldNames %in% c("logRanks", "overallLogRanks"))]
return(visibleFieldNames)
},
.setDataToVariables = function() {
callSuper()
overallExpectedEvents <<- .data$overallExpectedEvent
overallVarianceEvents <<- .data$overallVarianceEvent
expectedEvents <<- .data$expectedEvent
},
getOverallExpectedEvent = function(stage, group = 1, subset = NA_character_) {
return(.data$overallExpectedEvent[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallExpectedEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallExpectedEvent[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallExpectedEventsUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallExpectedEvent[.getIndices(stage = c(1:to), group = group, subset = subset)])
},
getOverallVarianceEvent = function(stage, group = 1, subset = NA_character_) {
return(.data$overallVarianceEvent[.getIndices(stage = stage, group = group, subset = subset)])
},
getOverallVarianceEvents = function(..., stage = NA_integer_, group = NA_integer_, subset = NA_character_) {
return(.data$overallVarianceEvent[.getIndices(stage = .getValidatedStage(stage), group = group, subset = subset)])
},
getOverallVarianceEventsUpTo = function(to, group = 1, subset = NA_character_) {
return(.data$overallVarianceEvent[.getIndices(stage = c(1:to), group = group, subset = subset)])
}
)
)
.isFloatingPointSampleSize <- function(object, param) {
values <- object[[param]]
if (is.null(values)) {
return(FALSE)
}
values <- na.omit(values)
if (length(values) == 0) {
return(FALSE)
}
if (any(floor(values) != values)) {
return(TRUE)
}
return(FALSE)
}
.getMaxDigits <- function(values) {
values <- na.omit(values)
if (length(values) == 0) {
return(0)
}
values <- trimws(format(values, scientific = FALSE, digits = 15))
values <- gsub("^\\d*\\.", "", values)
values <- gsub("\\D", "", values)
max(nchar(values))
}
#'
#' @title
#' Dataset Summary
#'
#' @description
#' Displays a summary of \code{\link{Dataset}} object.
#'
#' @param object A \code{\link{Dataset}} object.
#' @inheritParams param_digits
#' @inheritParams param_three_dots
#'
#' @details
#' Summarizes the parameters and results of a dataset.
#'
#' @template details_summary
#'
#' @template return_object_summary
#' @template how_to_get_help_for_generics
#'
#' @export
#'
#' @keywords internal
#'
summary.Dataset <- function(object, ..., type = 1, digits = NA_integer_) {
.warnInCaseOfUnknownArguments(functionName = "summary", ...)
if (type == 1 && inherits(object, "SummaryFactory")) {
return(object)
}
if (type != 1) {
return(summary.ParameterSet(object, type = type, digits = digits, ...))
}
intervalFormat <- getOption("rpact.summary.intervalFormat", "[%s; %s]")
.assertIsValidSummaryIntervalFormat(intervalFormat)
summaryFactory <- SummaryFactory(object = object, intervalFormat = intervalFormat)
s <- object$.toString()
kMax <- object$getNumberOfStages()
summaryFactory$title <- .firstCharacterToUpperCase(s)
numberOfGroups <- object$getNumberOfGroups()
if (numberOfGroups == 1) {
groups <- "one sample"
} else if (numberOfGroups == 2) {
groups <- c("one treatment", "one control group")
if (object$isDatasetSurvival()) {
groups <- paste0(groups, c(" (1)", " (2)"))
}
} else {
groups <- c(paste0(
.integerToWrittenNumber(numberOfGroups - 1),
" treatment groups"
), "one control group")
if (object$isDatasetSurvival()) {
groups <- paste0(groups, c(
paste0(" (", .arrayToString(1:(numberOfGroups - 1)), ")"),
paste0(" (", numberOfGroups, ")")
))
}
}
prefix <- ""
if (object$isDatasetMeans()) {
prefix <- "the sample sizes, means, and standard deviations of "
} else if (object$isDatasetRates()) {
prefix <- "the sample sizes and events of "
} else if (object$isDatasetSurvival()) {
prefix <- "the events and log rank statistics of the comparison of "
}
if (numberOfGroups > 1) {
prefix <- paste0(prefix, "\n")
}
header <- paste0(
"The dataset contains ", prefix,
paste0(groups, collapse = ifelse(object$isDatasetSurvival(), " with ", " and "))
)
if (object$.enrichmentEnabled) {
header <- paste0(header, ". The data will be analyzed ", ifelse(object$isStratified(), "", "non-"), "stratified")
}
if (kMax > 1) {
header <- paste0(
header, ".\nThe total number of looks is ", .integerToWrittenNumber(kMax),
"; stage-wise and cumulative data are included"
)
}
header <- paste0(header, ".")
summaryFactory$header <- header
digitSettings <- .getSummaryDigits(digits)
digits <- digitSettings$digits
digitsSampleSize <- 0
digitsGeneral <- digitSettings$digitsGeneral
digitsProbabilities <- digitSettings$digitsProbabilities
paramsToCheck <- character(0)
if (object$isDatasetMeans() || object$isDatasetRates()) {
paramsToCheck <- c(paramsToCheck, "sampleSizes")
if (kMax > 1) {
paramsToCheck <- c(paramsToCheck, "overallSampleSizes")
}
} else if (object$isDatasetRates() || object$isDatasetSurvival()) {
paramsToCheck <- c(paramsToCheck, "events")
if (kMax > 1) {
paramsToCheck <- c(paramsToCheck, "overallEvents")
}
}
if (length(paramsToCheck) > 0) {
for (param in paramsToCheck) {
if (.isFloatingPointSampleSize(object, param)) {
digitsSampleSize <- max(digitsSampleSize, .getMaxDigits(object[[param]]))
}
}
digitsSampleSize <- min(digitsSampleSize, digits)
}
summaryFactory$addItem("Stage", object$stages)
if (numberOfGroups > 1) {
groupNumbers <- object$groups
if (object$isDatasetSurvival()) {
groupNumbers <- paste0(object$groups, " vs ", numberOfGroups)
summaryFactory$addItem("Comparison", groupNumbers)
} else {
summaryFactory$addItem("Group", groupNumbers)
}
}
if (object$.enrichmentEnabled) {
summaryFactory$addItem("Subset", object$subsets)
}
parameterCaptionPrefix <- ifelse(kMax == 1, "", "Stage-wise ")
if (object$isDatasetMeans() || object$isDatasetRates()) {
summaryFactory$addParameter(object,
parameterName = "sampleSizes",
parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "sample size"),
roundDigits = digitsSampleSize
)
if (kMax > 1) {
summaryFactory$addParameter(object,
parameterName = "overallSampleSizes",
parameterCaption = "Cumulative sample size", roundDigits = digitsSampleSize
)
}
}
if (object$isDatasetMeans()) {
summaryFactory$addParameter(object,
parameterName = "means",
parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "mean"),
roundDigits = digitsGeneral
)
if (kMax > 1) {
summaryFactory$addParameter(object,
parameterName = "overallMeans",
parameterCaption = "Cumulative mean", roundDigits = digitsGeneral
)
}
summaryFactory$addParameter(object,
parameterName = "stDevs",
parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "standard deviation"),
roundDigits = digitsGeneral
)
if (kMax > 1) {
summaryFactory$addParameter(object,
parameterName = "overallStDevs",
parameterCaption = "Cumulative standard deviation", roundDigits = digitsGeneral
)
}
} else if (object$isDatasetRates()) {
summaryFactory$addParameter(object,
parameterName = "events",
parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "number of events"),
roundDigits = digitsSampleSize
)
if (kMax > 1) {
summaryFactory$addParameter(object,
parameterName = "overallEvents",
parameterCaption = "Cumulative number of events", roundDigits = digitsSampleSize
)
}
} else if (object$isDatasetSurvival()) {
summaryFactory$addParameter(object,
parameterName = "events",
parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "number of events"),
roundDigits = digitsSampleSize
)
if (kMax > 1) {
summaryFactory$addParameter(object,
parameterName = "overallEvents",
parameterCaption = "Cumulative number of events", roundDigits = digitsSampleSize
)
}
summaryFactory$addParameter(object,
parameterName = "logRanks",
parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "log rank statistic"),
roundDigits = digitsGeneral
)
if (kMax > 1) {
summaryFactory$addParameter(object,
parameterName = "overallLogRanks",
parameterCaption = "Cumulative log rank statistic", roundDigits = digitsGeneral
)
}
if (!any(is.na(object$allocationRatios)) && any(object$allocationRatios != 1)) {
summaryFactory$addParameter(object,
parameterName = "allocationRatios",
parameterCaption = .firstCharacterToUpperCase(parameterCaptionPrefix, "allocation ratio"),
roundDigits = digitsGeneral
)
if (kMax > 1) {
summaryFactory$addParameter(object,
parameterName = "overallAllocationRatios",
parameterCaption = "Cumulative allocation ratio", roundDigits = digitsGeneral
)
}
}
}
return(summaryFactory)
}
.getDatasetArgumentsRCodeLines <- function(x, complete = FALSE, digits = 4) {
m <- getWideFormat(x)
lines <- character(0)
paramNames <- colnames(m)
if (!complete) {
if (x$.inputType == "stagewise") {
paramNames <- paramNames[!grepl("^overall", paramNames)]
} else {
paramNames <- paramNames[grepl("^(stage|group|subset|overall)", paramNames)]
}
}
for (paramName in paramNames) {
encapsulate <- grepl("^subset", paramName)
if (!encapsulate || isTRUE(x$.enrichmentEnabled)) {
values <- m[[paramName]]
if (!encapsulate && is.numeric(values) && !is.null(digits) && length(digits) == 1 && !is.na(digits)) {
values <- round(values, digits = digits)
}
lines <- c(lines, paste0(paramName, " = ", .arrayToString(values,
vectorLookAndFeelEnabled = TRUE, encapsulate = encapsulate, digits = NA_integer_
)))
}
}
return(lines)
}
#'
#' @title
#' Print Dataset Values
#'
#' @description
#' \code{print} prints its \code{\link{Dataset}} argument and returns it invisibly (via \code{invisible(x)}).
#'
#' @param x A \code{\link{Dataset}} object.
#' @param markdown If \code{TRUE}, the output will be created in Markdown.
#' @param output A character defining the output type, default is "list".
#' @inheritParams param_three_dots
#'
#' @details
#' Prints the dataset.
#'
#' @export
#'
#' @keywords internal
#'
print.Dataset <- function(x, ..., markdown = FALSE, output = c("list", "long", "wide", "r", "rComplete")) {
fCall <- match.call(expand.dots = FALSE)
datasetName <- deparse(fCall$x)
output <- match.arg(output)
if (markdown) {
if (output != "list") {
warning("'output' (\"", output, "\") will be ignored ",
"because only \"list\" is supported yet if markdown is enabled",
call. = FALSE
)
}
x$.catMarkdownText()
return(invisible(x))
}
if (output == "long") {
m <- getLongFormat(x)
m <- prmatrix(m, rowlab = rep("", nrow(m)))
print(m, quote = FALSE, right = FALSE)
return(invisible(x))
} else if (output == "wide") {
m <- getWideFormat(x)
m <- prmatrix(m, rowlab = rep("", nrow(m)))
print(m, quote = FALSE, right = FALSE)
return(invisible(x))
} else if (output %in% c("r", "rComplete")) {
lines <- .getDatasetArgumentsRCodeLines(x, complete = (output == "rComplete"))
lines <- paste0("\t", lines)
if (is.null(datasetName) || length(datasetName) != 1 || is.na(datasetName)) {
datasetName <- "dataInput"
}
cat(datasetName, " <- getDataset(\n", sep = "")
cat(paste0(lines, collapse = ",\n"), "\n")
cat(")\n")
return(invisible(x))
}
x$show()
return(invisible(x))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.