Nothing
##############################################################################################################################################
# Function: PerformAnalysis.
# Argument: Data model (or data stack) and analysis model.
# Description: This function carries out the statistical tests and computes the descriptive statistics specified in
# the analysis model using the specified data model (or data stack generated by the user).
# The number of simulations (n.sims) is used only if a data model is specified. If a data stack is specified,
# the number of simulations is obtained from this data stack.
#' @import doRNG
#' @import doParallel
#' @import foreach
PerformAnalysis = function(data, analysis.model, sim.parameters) {
# Check if a data stack was specified and, if a data model is specified, call the CreateDataStructure function
if (class(data) == "DataStack") {
if (data$description == "data.stack") {
call.CreateDataStructure = FALSE
data.stack = data
n.sims = data.stack$sim.parameters$n.sims
seed = data.stack$sim.parameters$seed
data.structure = data.stack$data.structure
} else {
stop("The data object is not recognized.")
}
} else {
call.CreateDataStructure = TRUE
data.model = data
# Create a dummy data.stack
data.stack = CreateDataStack(data.model, 1)
data.structure = CreateDataStructure(data.model)
}
# Simulation parameters
# Number of simulation runs
if (is.null(sim.parameters$n.sims))
stop("The number of simulation runs must be provided (n.sims)")
if (call.CreateDataStructure == TRUE){
n.sims = sim.parameters$n.sims
}
else {
# Get the number of simulations with the data stack
n.sims = n.sims
warning("The number of simulation runs from the sim.parameter was ignored as a data stack was defined.")
}
if (!is.numeric(n.sims))
stop("Number of simulation runs must be an integer.")
if (length(n.sims) > 1)
stop("Number of simulation runs: Only one value must be specified.")
if (n.sims%%1 != 0)
stop("Number of simulation runs must be an integer.")
if (n.sims <= 0)
stop("Number of simulation runs must be positive.")
# Seed
if (is.null(sim.parameters$seed))
stop("The seed must be provided (seed)")
if (call.CreateDataStructure == TRUE){
seed = sim.parameters$seed
} else {
# Get the number of simulations with the data stack
seed = seed
warning("The seed from the sim.parameter was ignored as a data stack was defined.")
}
if (!is.numeric(seed))
stop("Seed must be an integer.")
if (length(seed) > 1)
stop("Seed: Only one value must be specified.")
if (nchar(as.character(seed)) > 10)
stop("Length of seed must be inferior to 10.")
if (!is.null(sim.parameters$proc.load)){
proc.load = sim.parameters$proc.load
if (is.numeric(proc.load)){
if (length(proc.load) > 1)
stop("Number of cores: Only one value must be specified.")
if (proc.load %%1 != 0)
stop("Number of cores must be an integer.")
if (proc.load <= 0)
stop("Number of cores must be positive.")
n.cores = proc.load
}
else if (is.character(proc.load)){
n.cores=switch(proc.load,
low={1},
med={parallel::detectCores()/2},
high={parallel::detectCores()-1},
full={parallel::detectCores()},
{stop("Processor load not valid")})
}
} else n.cores = 1
sim.parameters = list(n.sims = n.sims, seed = seed, proc.load = n.cores)
# Perform error checks for the data model and create an internal analysis structure
analysis.structure = CreateAnalysisStructure(analysis.model)
# Check if the samples referenced in the analysis model are actually specified in the data model
# List of sample IDs
sample.id = unlist(data.structure$id)
# Number of tests specified in the analysis model
n.tests = length(analysis.structure$test)
if (n.tests > 0) {
# Test IDs
test.id = rep(" ", n.tests)
for (test.index in 1:n.tests) {
test.id[test.index] = analysis.structure$test[[test.index]]$id
# Number of samples in the current test
n.test.samples = length(analysis.structure$test[[test.index]]$samples)
# When processing samples specified for individual tests, it is important to remember that
# a hierarchical structure can be used, i.e., samples can first be merged and then passed to a specific test
for (i in 1:n.test.samples) {
# Number of subsamples in the current sample
n.subsamples = length(analysis.structure$test[[test.index]]$samples[[i]])
if (n.subsamples == 1) {
if (!(analysis.structure$test[[test.index]]$samples[[i]] %in% sample.id))
stop(paste0("Analysis model: Sample '", analysis.structure$test[[test.index]]$samples[[i]], "' is not defined in the data model."))
} else {
# Multiple subsamples
for (j in 1:n.subsamples) {
if (!(analysis.structure$test[[test.index]]$samples[[i]][[j]] %in% sample.id))
stop(paste0("Analysis model: Sample '", analysis.structure$test[[test.index]]$samples[[i]][[j]], "' is not defined in the data model."))
}
}
}
}
}
# Number of statistics specified in the analysis model
n.statistics = length(analysis.structure$statistic)
# Statistic IDs
statistic.id = rep(" ", n.statistics)
if (n.statistics > 0) {
for (statistic.index in 1:n.statistics) {
statistic.id[statistic.index] = analysis.structure$statistic[[statistic.index]]$id
# Number of samples in the current statistic
n.statistic.samples = length(analysis.structure$statistic[[statistic.index]]$samples)
for (i in 1:n.statistic.samples) {
# Number of subsamples in the current sample
n.subsamples = length(analysis.structure$statistic[[statistic.index]]$samples[[i]])
if (n.subsamples == 1) {
if (!(analysis.structure$statistic[[statistic.index]]$samples[[i]] %in% sample.id))
stop(paste0("Analysis model: Sample '", analysis.structure$statistic[[statistic.index]]$samples[[i]], "' is not defined in the data model."))
} else {
# Multiple subsamples
for (j in 1:n.subsamples) {
if (!(analysis.structure$statistic[[statistic.index]]$samples[[i]][[j]] %in% sample.id))
stop(paste0("Analysis model: Sample '", analysis.structure$statistic[[statistic.index]]$samples[[i]][[j]], "' is not defined in the data model."))
}
}
}
}
}
# Information on the analysis scenario factors
# Number of multiplicity adjustment sets
if (!is.null(analysis.structure$mult.adjust)) {
n.mult.adjust = length(analysis.structure$mult.adjust)
} else {
n.mult.adjust = 1
}
# Number of analysis points (total number of interim and final analyses)
if (!is.null(analysis.structure$interim.analysis)) {
n.analysis.points = length(analysis.structure$interim.analysis$interim.looks$fraction)
} else {
# No interim analyses
n.analysis.points = 1
}
# Create the analysis stack (list of the analysis sets produced by the test and statistic functions,
# each element in this list contains the results generated in a single simulation run)
analysis.set = list()
# Number of data scenarios
n.data.scenarios = dim(data.stack$data.scenario.grid)[1]
data.scenario.grid = data.stack$data.scenario.grid
# Create a grid of the data and analysis scenario factors (outcome parameter, sample size,
# design parameter, multiplicity adjustment)
scenario.grid = matrix(0, n.data.scenarios * n.mult.adjust, 2)
index = 1
for (i in 1:n.data.scenarios) {
for (j in 1:n.mult.adjust) {
scenario.grid[index, 1] = i
scenario.grid[index, 2] = j
index = index + 1
}
}
# Number of data and analysis scenarios
n.scenarios = dim(scenario.grid)[1]
# Number of analysis samples in each data scenario
n.analysis.samples = length(data.stack$data.set[[1]]$data.scenario[[1]]$sample)
# Simulation parameters
# Use proc.load to generate the clusters
cluster.mediana = parallel::makeCluster(getOption("cluster.mediana.cores", sim.parameters$proc.load))
# To make this reproducible I used the same number as the seed
set.seed(seed)
parallel::clusterSetRNGStream(cluster.mediana, seed)
#Export all functions in the global environment to each node
parallel::clusterExport(cluster.mediana,ls(envir=.GlobalEnv))
doParallel::registerDoParallel(cluster.mediana)
# Simulation index initialisation
sim.index=0
# Loop over simulation runs
result.analysis.scenario=foreach::foreach(sim.index=1:sim.parameters$n.sims, .packages=(.packages())) %dorng% {
# Select the current data set within the data stack
if (!call.CreateDataStructure){
current.data.set = data.stack$data.set[[sim.index]]
} else {
current.data.stack = CreateDataStack(data.model, 1)
current.data.set = current.data.stack$data.set[[1]]
}
# Matrix of results (p-values) produced by the tests
test.results = matrix(0, n.tests, n.analysis.points)
# Matrix of results produced by the statistics
statistic.results = matrix(0, n.statistics, n.analysis.points)
# Create the analysis scenario list (one element for each unique combination of the data scenario factors)
result.data.scenario = list()
# Loop over the data scenario factors (outcome parameter, sample size, and design parameter)
for (scenario.index in 1:n.data.scenarios) {
# Current data scenario
current.data.scenario = current.data.set$data.scenario[[scenario.index]]
# Current sample size set
current.sample.size.set = data.stack$data.scenario.grid[scenario.index, "sample.size"]
# Vector of sample sizes across the data samples in the current sample size set
if (!any(is.na(data.stack$data.structure$sample.size.set))) current.sample.sizes = data.stack$data.structure$sample.size.set[current.sample.size.set, ]
# Loop over interim analyses
for (analysis.point.index in 1:n.analysis.points) {
# Create a data slice for the current interim look if interim analyses are specified in the analysis model
if (!is.null(analysis.structure$interim.analysis)) {
sample.list = analysis.structure$interim.analysis$interim.looks$sample
parameter = analysis.structure$interim.analysis$interim.looks$parameter
fraction = analysis.structure$interim.analysis$interim.looks$fraction[[analysis.point.index]]
# Compute the total sample size in the sample list
n.sample.list = length(sample.list)
total.sample.size = 0
# Number of samples
n.samples = length(sample.id)
for (k in 1:n.samples) {
for (l in 1:n.sample.list) {
if(sample.list[[l]] == sample.id[k]) total.sample.size = total.sample.size + current.sample.sizes[k]
}
}
data.slice = CreateDataSlice(current.data.scenario, sample.list, parameter, round(total.sample.size * fraction))
} else {
# No interim analyses are specified in the analysis model -- simply use the current data scenario
data.slice = current.data.scenario
}
# Loop over the tests specified in the analysis model to compute statistic results
# if tests are specified in the analysis model
if (n.tests > 0) {
# Loop over the tests specified in the analysis model to compute test results (p-values)
for (test.index in 1:n.tests) {
# Current test
current.test = analysis.structure$test[[test.index]]
# Number of analysis samples specified in the current test
n.samples = length(current.test$samples)
# Extract the data frames for the analysis samples specified in the current test
sample.list = list()
# Extract the data frames for the analysis samples specified in the current test
for (sample.index in 1:n.samples) {
# Number of subsamples within the current analysis sample
n.subsamples = length(current.test$samples[[sample.index]])
if (n.subsamples == 1) {
# If there is only one subsamples, no merging is required, simply select the right analysis sample
sample.flag.num = match(current.test$samples[[sample.index]],sample.id)
sample.list[[sample.index]] = data.slice$sample[[sample.flag.num]]$data
} else {
# If there are two or more subsamples, these subsamples must be merged first to create analysis samples
# that are passed to the statistic function
subsample.flag.num = match(current.test$samples[[sample.index]],sample.id)
selected.subsamples = lapply(as.list(subsample.flag.num), function(x) data.slice$sample[[x]]$data)
# Merge the subsamples
sample.list[[sample.index]] = do.call(rbind, selected.subsamples)
}
}
# Compute the test results (p-values) by calling the function for the current test with the test parameters
test.results[test.index, analysis.point.index] = do.call(current.test$method,
list(sample.list, list("PerformAnalysis",current.test$par)))
} # End of the loop over the tests
} # End of the if n.tests>0
# Loop over the statistics specified in the analysis model to compute statistic results
# if statistics are specified in the analysis model
if (n.statistics > 0) {
for (statistic.index in 1:n.statistics) {
# Current statistic
current.statistic = analysis.structure$statistic[[statistic.index]]
# Number of analysis samples specified in the current statistic
n.samples = length(current.statistic$samples)
# Extract the data frames for the analysis samples specified in the current statistic
sample.list = list()
for (sample.index in 1:n.samples) {
# Number of subsamples within the current analysis sample
n.subsamples = length(current.statistic$samples[[sample.index]])
if (n.subsamples == 1) {
# If there is only one subsamples, no merging is required, simply select the right analysis sample
sample.flag.num = match(current.statistic$samples[[sample.index]],sample.id)
sample.list[[sample.index]] = data.slice$sample[[sample.flag.num]]$data
} else {
# If there are two or more subsamples, these subsamples must be merged first to create analysis samples
# that are passed to the statistic function
subsample.flag.num = match(current.statistic$samples[[sample.index]],sample.id)
selected.subsamples = lapply(as.list(subsample.flag.num), function(x) data.slice$sample[[x]]$data)
# Merge the subsamples
sample.list[[sample.index]] = do.call(rbind, selected.subsamples)
}
}
# Compute the statistic results by calling the function for the current statistic with the statistic parameters
statistic.results[statistic.index, analysis.point.index] = do.call(current.statistic$method,
list(sample.list, list("PerformAnalysis",current.statistic$par)))
} # End of the loop over the statistics
} # End of the if n.statistics>0
} # Loop over interim analyses
# Assign test names
if (n.tests > 0) {
test.results = as.data.frame(test.results)
rownames(test.results) = test.id
if (n.analysis.points == 1) {
colnames(test.results) = "Analysis"
} else {
names = rep("", n.analysis.points)
for (j in 1:n.analysis.points) names[j] = paste0("Analysis ", j)
colnames(test.results) = names
}
} else {
# No tests are specified in the analysis model
test.results = NA
}
# Assign statistic names
if (n.statistics > 0) {
statistic.results = as.data.frame(statistic.results)
rownames(statistic.results) = statistic.id
if (n.analysis.points == 1) {
colnames(statistic.results) = "Analysis"
} else {
names = rep("", n.analysis.points)
for (j in 1:n.analysis.points) names[j] = paste0("Analysis ", j)
colnames(statistic.results) = names
}
} else {
# No statistics are specified in the analysis model
statistic.results = NA
}
result = list(tests = test.results,
statistic = statistic.results)
result.data.scenario[[scenario.index]] = list(result = result)
} # Loop over the data scenario factors
# Loop for each data scenario
for (data.scenario.index in 1:n.data.scenarios) {
# If at least one multiplicity adjustment has been specified loop over the analysis scenario factors (multiplicity adjustment)
if (!is.null(analysis.structure$mult.adjust)) {
# Create the analysis scenario list (one element for each unique combination of the data and analysis scenario factors)
result.data.scenario[[data.scenario.index]]$result$tests.adjust = list()
# Loop for each analysis.scenarios
for (scenario.index in 1:n.mult.adjust) {
# Matrix of results (p-values) produced by the tests
test.results.adj = matrix(0, n.tests, n.analysis.points)
# Get the current multiplicity adjustment procedure
current.mult.adjust = analysis.structure$mult.adjust[[scenario.index]]
# Get the unadjusted pvalues for the current data scenarios
current.pvalues = result.data.scenario[[data.scenario.index]]$result$tests
# Loop for each analysis point
for (analysis.point.index in 1:n.analysis.points) {
# Number of multiplicity adjustment procedure within the multiplicity adjustment scenarios
n.mult.adjust.sc = length(current.mult.adjust)
# Loop for each multiplicity adjustment procedure within the multiplicity adjustment scenarios
for (mult.adjust.sc in 1:n.mult.adjust.sc) {
# Apply the multiple testing procedure specified in the current multiplicity adjustment set
# to the tests specified in this set
# Extract the p-values for the tests specified in the current multiplicity adjustment set
pvalues.flag.num = match(current.mult.adjust[[mult.adjust.sc]]$tests, test.id)
selected.pvalues = current.pvalues[pvalues.flag.num, analysis.point.index]
if (!is.na(current.mult.adjust[[mult.adjust.sc]]$proc)) {
test.results.adj[pvalues.flag.num, analysis.point.index] = do.call(current.mult.adjust[[mult.adjust.sc]]$proc, list(selected.pvalues, list("Analysis", current.mult.adjust[[mult.adjust.sc]]$par)))
} else {
# If no multiplicity procedure is defined, there is no adjustment
test.results.adj[pvalues.flag.num, analysis.point.index] = selected.pvalues
}
} # End Loop for each multiplicity adjustment procedure within the multiplicity adjustment scenario
} # End Loop for each analysis point
# Assign test names
if (n.tests > 0) {
test.results.adj = as.data.frame(test.results.adj)
rownames(test.results.adj) = test.id
if (n.analysis.points == 1) {
colnames(test.results.adj) = "Analysis"
} else {
names = rep("", n.analysis.points)
for (j in 1:n.analysis.points) names[j] = paste0("Analysis ", j)
colnames(test.results.adj) = names
}
} else {
# No tests are specified in the analysis model
test.results.adj = NA
}
result.data.scenario[[data.scenario.index]]$result$tests.adjust$analysis.scenario[[scenario.index]] = test.results.adj
} # End Loop for each analysis.scenarios
} # End if analysis.structure
else {
result.data.scenario[[data.scenario.index]]$result$tests.adjust$analysis.scenario[[1]] = result.data.scenario[[data.scenario.index]]$result$tests
}
} # End loop for each data scenario
result.analysis.scenario = result.data.scenario
return(result.analysis.scenario)
} # End of the loop over the simulations
# Stop the cluster
parallel::stopCluster(cluster.mediana)
#closeAllConnections()
# Define the analysis scenario grid (unique combinations of the data and analysis scenario factors)
analysis.scenario.grid = as.data.frame(matrix(0, n.data.scenarios * n.mult.adjust, 4))
d = data.stack$data.scenario.grid
analysis.scenario.grid[, 1:3] = d[scenario.grid[,1], ]
analysis.scenario.grid[, 4] = scenario.grid[,2]
colnames(analysis.scenario.grid) = c("design.parameter", "outcome.parameter",
"sample.size", "multiplicity.adjustment")
# Create the analysis stack
analysis.stack = list(description = "analysis.stack",
analysis.set = result.analysis.scenario,
analysis.scenario.grid = analysis.scenario.grid,
data.structure = data.structure,
analysis.structure = analysis.structure,
sim.parameters = sim.parameters)
class(analysis.stack) = "AnalysisStack"
return(analysis.stack)
}
# End of PerformAnalysis
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.