Nothing
#######################################################################################################################
# Function: CreateDataStack.
# Argument: Data model and number of simulations.
# Description: Generates a data stack, which is a collection of individual data sets (one data set per simulation run).
CreateDataStack = function(data.model, n.sims, seed=NULL) {
# Perform error checks for the data model and create an internal data structure
data.structure = CreateDataStructure(data.model)
# Check the seed if defined (the seed should be defined only when the user generate the data stack)
if (!is.null(seed)){
if (!is.numeric(seed))
stop("Seed must be an integer.")
if (length(seed) > 1)
stop("Seed: Only one value must be specified.")
if (nchar(as.character(seed)) > 10)
stop("Length of seed must be inferior to 10.")
}
# Create short names for data model parameters
outcome.dist = data.structure$outcome$outcome.dist
outcome.type = data.structure$outcome$outcome.type
outcome.dist.dim = data.structure$outcome$outcome.dist.dim
data.sample.id = data.structure$id
data.size = data.structure$sample.size.set
data.event = data.structure$event.set
rando.ratio = data.structure$rando.ratio
data.design = data.structure$design.parameter.set
data.outcome = data.structure$outcome.parameter.set
# Number of outcome parameter sets, sample size sets and design parameter sets
n.outcome.parameter.sets = length(data.structure$outcome.parameter.set)
if (!is.null(data.structure$design.parameter.set)) {
n.design.parameter.sets = length(data.structure$design.parameter.set)
} else {
n.design.parameter.sets = 1
}
# Determine if sample size or event were used
# Determine which sample size set corresponds to the maximum of events or sample size for each data sample
sample.size = any(!is.na(data.size))
event = any(!is.na(data.event))
if (sample.size) {
n.sample.size.event.sets = dim(data.structure$sample.size.set)[1]
max.sample.size = apply(data.size,2,max)
} else if (event){
n.sample.size.event.sets = dim(data.structure$event.set)[1]
max.event = apply(data.event,2,max)
}
# Number of data samples specified in the data model
n.data.samples = length(data.sample.id)
# Create the data stack which is represented by a list of data sets (one data set for each simulation run)
data.set = list()
# Create a grid of the data scenario factors (outcome parameter, sample size and design parameter)
data.scenario.grid = expand.grid(design.parameter.set = 1:n.design.parameter.sets,
outcome.parameter.set = 1:n.outcome.parameter.sets,
sample.size.set = 1:n.sample.size.event.sets)
colnames(data.scenario.grid) = c("design.parameter", "outcome.parameter", "sample.size")
# Number of data scenarios (number of unique combinations of the data scenario factors)
n.data.scenarios = dim(data.scenario.grid)[1]
# Create a grid of the ouctcome and design scenario factors (outcome parameter and design parameter)
data.design.outcome.grid = expand.grid(design.parameter.set = 1:n.design.parameter.sets,
outcome.parameter.set = 1:n.outcome.parameter.sets)
colnames(data.design.outcome.grid) = c("design.parameter", "outcome.parameter")
# Number of design and outcome scenarios (number of unique combinations of the design and outcome scenario factors)
n.design.outcome.scenarios = dim(data.design.outcome.grid)[1]
# Set the seed
if (!is.null(seed)) set.seed(seed)
# Loop over the simulations
for (sim.index in 1:n.sims) {
# If sample size is used (fixed number of sample size)
if (sample.size) {
design.outcome.variables = vector(n.design.outcome.scenarios, mode = "list")
# Loop over the design and outcome grid
for (design.outcome.index in 1:n.design.outcome.scenarios) {
# Get the current design index and parameters
current.design.index = data.design.outcome.grid[design.outcome.index, "design.parameter"]
current.design.parameter = data.design[[current.design.index]]
# Get the outcome index and parameters
current.outcome.index = data.design.outcome.grid[design.outcome.index, "outcome.parameter"]
current.outcome.parameter = data.outcome[[current.outcome.index]]
# Initialized the data frame list
df = vector(n.data.samples, mode = "list")
# Loop over the data samples
for (data.sample.index in 1:n.data.samples) {
# Maximum sample size across the sample size sets for the current data sample
current.max.sample.size = max.sample.size[data.sample.index]
# Outcome parameter for the current data sample
current.outcome = list(dist = outcome.dist, par = current.outcome.parameter[[data.sample.index]], type = outcome.type)
# Get the current sample id
current.sample.id = unlist(data.sample.id[[data.sample.index]])
# Generate the data for the current design and outcome parameters
df[[data.sample.index]] = GeneratePatients(current.design.parameter, current.outcome, current.sample.id, current.max.sample.size)
} # Loop over the data samples
design.outcome.variables[[design.outcome.index]] = list(design.parameter = current.design.index, outcome.parameter = current.outcome.index, sample = df)
} # Loop over the design and outcome grid
# Create the data scenario list (one element for each unique combination of the data scenario factors)
data.scenario = list()
# Loop over the data scenarios
for (data.scenario.index in 1:n.data.scenarios) {
design.index = data.scenario.grid[data.scenario.index, 1]
outcome.index = data.scenario.grid[data.scenario.index, 2]
sample.size.index = data.scenario.grid[data.scenario.index, 3]
# Get the design.outcome variables corresponding to the current data scenario
current.design.outcome.index = sapply(design.outcome.variables, function(x) x$design.parameter == design.index & x$outcome.parameter == outcome.index)
current.design.outcome.variables = design.outcome.variables[current.design.outcome.index][[1]]$sample
# Get the sample size
current.sample.size = data.size[sample.size.index,]
# Generate the data for the current data scenario
data.scenario[[data.scenario.index]] = list(sample = CreateDataScenarioSampleSize(current.design.outcome.variables, current.sample.size))
}
} else if (event) {
# If event is used (generate data until the number of event required for the first outcome is reached)
design.outcome.variables = vector(n.design.outcome.scenarios, mode = "list")
# Loop over the design and outcome grid
for (design.outcome.index in 1:n.design.outcome.scenarios) {
# Get the current design index and parameters
current.design.index = data.design.outcome.grid[design.outcome.index, "design.parameter"]
current.design.parameter = data.design[[current.design.index]]
# Get the outcome index and parameters
current.outcome.index = data.design.outcome.grid[design.outcome.index, "outcome.parameter"]
current.outcome.parameter = data.outcome[[current.outcome.index]]
# Initialized the data frame list
df = vector(n.data.samples, mode = "list")
# Initialized the temporary data frame list
df.temp = vector(n.data.samples, mode = "list")
# Set the Number of events
n.observed.events = 0
# Loop over the data samples to generate a first set of data corresponding to the maximum number of events required divided by randomization ratio
for (data.sample.index in 1:n.data.samples) {
# Outcome parameter for the current data sample
current.outcome = list(dist = outcome.dist, par = current.outcome.parameter[[data.sample.index]], type = outcome.type)
# Get the current sample id
current.sample.id = unlist(data.sample.id[[data.sample.index]])
# Generate the data for the current design and outcome parameters
df.temp[[data.sample.index]] = GeneratePatients(current.design.parameter, current.outcome, current.sample.id, rando.ratio[data.sample.index] * ceiling(max.event / sum(rando.ratio)))
# Merge the previous generated data with the temporary data
if (!is.null(df[[data.sample.index]])) {
data.temp = as.data.frame(mapply(rbind, lapply(df[[data.sample.index]], function(x) as.data.frame(x$data)), lapply(df.temp[[data.sample.index]], function(x) as.data.frame(x$data)), SIMPLIFY=FALSE))
row.names(data.temp) = NULL
df[[data.sample.index]] = lapply(df[[data.sample.index]], function(x) {return(list(id = x$id, outcome.type = x$outcome.type, data = as.matrix(data.temp)))})
} else {
df[[data.sample.index]] = df.temp[[data.sample.index]]
}
} # Loop over the data samples
# Get the number of events observed accross all samples for the primary endpoint
n.observed.events = sum(unlist(lapply(df, function(x) {return(!x[[1]]$data[,"patient.censor.indicator"])})))
# Loop until the maximum number of events required is observed
while(n.observed.events < max.event){
# Loop over the data samples
for (data.sample.index in 1:n.data.samples) {
# Outcome parameter for the current data sample
current.outcome = list(dist = outcome.dist, par = current.outcome.parameter[[data.sample.index]], type = outcome.type)
# Get the current sample id
current.sample.id = unlist(data.sample.id[[data.sample.index]])
# Generate the data for the current design and outcome parameters
df.temp[[data.sample.index]] = GeneratePatients(current.design.parameter, current.outcome, current.sample.id, rando.ratio[data.sample.index])
# Merge the previous generated data with the temporary data
if (!is.null(df[[data.sample.index]])) {
data.temp = lapply(mapply(rbind, lapply(df[[data.sample.index]], function(x) as.data.frame(x$data)), lapply(df.temp[[data.sample.index]], function(x) as.data.frame(x$data)), SIMPLIFY=FALSE), function(x) as.matrix(x))
#df[[data.sample.index]] = mapply(function(x,y) {return(list(id=x$id, outcome.type = x$outcome.type, data = as.matrix(y, row.names = NULL)))}, x=df[[data.sample.index]], y=data.temp, SIMPLIFY=FALSE)
df[[data.sample.index]] = mapply(function(x,y) {return(list(id=x$id, outcome.type = x$outcome.type, data = as.data.frame(y)))}, x=df[[data.sample.index]], y=data.temp, SIMPLIFY=FALSE)
} else {
df[[data.sample.index]] = df.temp[[data.sample.index]]
}
} # Loop over the data samples
# Get the number of events observed accross all samples for the primary endpoint
n.observed.events = sum(unlist(lapply(df, function(x) {return(!x[[1]]$data[,"patient.censor.indicator"])})))
} # Loop until the maximum number of events required is observed
design.outcome.variables[[design.outcome.index]] = list(design.parameter = current.design.index, outcome.parameter = current.outcome.index, sample = df)
} # Loop over the design and outcome grid
# Create the data scenario list (one element for each unique combination of the data scenario factors)
data.scenario = list()
# Loop over the data scenarios
for (data.scenario.index in 1:n.data.scenarios) {
design.index = data.scenario.grid[data.scenario.index, 1]
outcome.index = data.scenario.grid[data.scenario.index, 2]
event.index = data.scenario.grid[data.scenario.index, 3]
# Get the design.outcome variables corresponding to the current data scenario
current.design.outcome.index = sapply(design.outcome.variables, function(x) x$design.parameter == design.index & x$outcome.parameter == outcome.index)
current.design.outcome.variables = design.outcome.variables[current.design.outcome.index][[1]]$sample
# Get the number of events
current.events = data.event[event.index,]
# Generate the data for the current data scenario
data.scenario[[data.scenario.index]] = list(sample = CreateDataScenarioEvent(current.design.outcome.variables, current.events, rando.ratio))
} # Loop over the data scenarios
} # If event
data.set[[sim.index]] = list(data.scenario = data.scenario)
} # Loop over the simulations
# Create the data stack
data.stack = list(description = "data.stack",
data.set = data.set,
data.scenario.grid = data.scenario.grid,
data.structure = data.structure
#,
#n.sims = n.sims,
#seed = seed
)
class(data.stack) = "DataStack"
return(data.stack)
}
# End of CreateDataStack
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.