Nothing
#' @name allocationTable
#'
#' @title Allocation Tables for the Randomization Module
#' @description Generate allocation table for the REDCap
#' randomization module. Randomization may be stratified by other
#' (categorical) variables in the data set as well as by data
#' access group. Additionally, randomization may be blocked
#' to ensure balanced groups throughout the allocation
#'
#' @param rcon A REDCap connection object as generated by \code{redcapConnection}
#' @param meta_data A text string giving the location of the data
#' dictionary downloaded from REDCap.
#' @param random The field name to be randomized.
#' @param strata Field names by which to stratify the randomization.
#' @param group A field name giving a group by which randomization should be
#' stratified. This could also be listed in \code{strata}, but the argument
#' is provided to remain consistent with the REDCap user interface.
#' @param dag.id Data Access Group IDs. See the package wiki for instructions
#' on how to get the ID's. (They cannot currently be accessed via the API)
#' @param replicates The number of randomizations to perform within each stratum
#' @param block.size Block size for the randomization. Blocking is recommended
#' to ensure balanced groups throughout the randomization. This may be a vector
#' to indicate variable block sizes throughout the randomization.
#' @param block.size.shift A vector the same length as \code{block.size} where the
#' first element is 0. This controls when the block size changes as a proportion
#' of the total sample size. When \code{block.size=c(8, 4, 2)} and
#' \code{block.size.shift = c(0, .5, .9)}, the first half of the randomization
#' is performed in blocks of 8, then the next 40 percent of the randomization
#' is performed in blocks of 4, with the last 10 percent performed in blocks
#' of 2.
#' @param seed.dev At least one value is required. If only one value is given,
#' it will be converted to a vector with length equal to the number of strata.
#' Values will be incremented by 100 to provide independent randomizations.
#' This may also have length equal to the number of strata.
#' @param seed.prod Same as \code{seed.prod}, but used to seed the production
#' allocation. No pairwise elements of \code{seed.dev} and \code{seed.prod}
#' may be equal. This guarantees that the two randomization schemes are
#' unique.
#' @param bundle A \code{redcapBundle} object.
#' @param weights An optional vector giving the sampling weights for each of the randomization
#' groups. There must be one number for each level of the randomization variable. If named,
#' the names must match the group labels. If unnamed, the group labels will be assigned in the
#' same order they appear in the data dictionary. The weights will be normalized, so they do
#' not need to sum to 1.0. In other words, \code{weights=c(3, 1)} can indicate a 3:1 sampling
#' ratio.
#' @param ... Arguments to be passed to other methods
#'
#' @details Each element in \code{block.size} must be a multiple of the number of groups in the randomized
#' variable.
#'
#' The 'offline' version of the function operates on the data dictionary file downloaded from
#' REDCap. This is made available for instances where the
#' API can not be accessed for some reason (such as waiting for API approval
#' from the REDCap administrator).
#'
#' The value of \code{replicates} controls how many allocations are generated. It
#' is possible to get slightly more replicates than requested if your blocking design
#' cannot exactly match replicates. For example, if you as for 30 replicates in
#' blocks of 8, a warning will be printed and you will receive 32 replicates in the
#' randomization table.
#'
#' @author Benjamin Nutter
#'
#' @references
#' More instruction on using \code{redcapAPI} to produce allocation tables is
#' on the package wiki:
#' \url{https://github.com/nutterb/redcapAPI/wiki/Randomization-Module}
#'
#' Please refer to your institution's API documentation.
#'
#' Additional details on API parameters are found on the package wiki at
#' \url{https://github.com/nutterb/redcapAPI/wiki/REDCap-API-Parameters}
#' @export
allocationTable <- function(rcon, random, strata = NULL,
group = NULL, dag.id = NULL,
replicates, block.size,
block.size.shift = 0,
seed.dev = NULL, seed.prod = NULL,
bundle = NULL,
weights = NULL, ...)
UseMethod("allocationTable")
#' @rdname allocationTable
#' @export
allocationTable.redcapDbConnection <- function(rcon, random, strata = NULL,
group = NULL, dag.id = NULL,
replicates, block.size,
block.size.shift = 0,
seed.dev = NULL, seed.prod = NULL,
bundle = NULL,
weights = c(1, 1), ...){
message("Please accept my apologies. The exportUsers method for redcapDbConnection objects\n",
"has not yet been written. Please consider using the API.")
}
#' @rdname allocationTable
#' @export
allocationTable.redcapApiConnection <- function(rcon, random, strata = NULL,
group = NULL, dag.id = NULL,
replicates, block.size,
block.size.shift = 0,
seed.dev = NULL, seed.prod = NULL,
bundle = NULL,
weights = c(1, 1), ...)
{
if (!is.na(match("proj", names(list(...)))))
{
message("The 'proj' argument is deprecated. Please use 'bundle' instead")
bundle <- list(...)[["proj"]]
}
coll <- checkmate::makeAssertCollection()
#* Establish the meta_data table
meta_data <-
if (is.null(bundle$meta_data))
exportMetaData(rcon)
else bundle$meta_data
#* A utility function to extract the coded values from the meta_data
redcapChoices <- function(v, meta_data, raw=TRUE)
{
if (meta_data$field_type[meta_data$field_name == v] %in% c("dropdown", "radio")){
choice_str <- meta_data$select_choices_or_calculations[meta_data$field_name == v]
choice_str <- unlist(strsplit(choice_str, " [|] "))
return(stringr::str_split_fixed(choice_str, ", ", 2)[, (2-raw)])
}
else if (meta_data$field_type[meta_data$field_name == v] %in% c("yesno", "true_false"))
return(0:1)
else stop(paste0("'", v, "' is not a valid variable for stratification/randomization"))
}
#***************************************
#* Parameter Checking
#* 1. Verifying that 'random' is not missing
#* 2. random, strata and group are characters
#* 3. random and group have length 1
#* 4. all fields in 'random', 'strata', and 'group' exist in meta_data
#* 5. Calculate n_strata
#* 6. Verify 'replicates' is not missing and is numeric
#* 7. If 'blocks.size' is missing, set it equal to 'replicates'.
#* If not missing, it must be numeric
#* 9. block.size must be a multiple of n_strata
#* 9. First element in block.size.shift must be 0
#* 10. block.size.shift must be strictly increasing in the interval [0, 1)
#* 11. block.size.shift must have the same length as block.size
#* 12. The sum of all of the blocks must add up to replicates
#* 13. Check if all blocks conform to blocking design (warning produced)
#* 14. seed.dev is not NULL and has length 1 or n_strata
#* 15. seed.prod is not NULL and has length 1 or n_strata
#* 16. no pairwise elements of seed.dev are equal to seed.prod
#* 1. Verifying that 'random' is not missing
checkmate::assert_character(x = random,
len = 1,
add = coll)
#* 2. random, strata and group are characters
checkmate::assert_character(x = strata,
null.ok = TRUE,
add = coll)
checkmate::assert_character(x = group,
len = 1,
null.ok = TRUE,
add = coll)
#* 4. all fields in 'random', 'strata', and 'group' exist in meta_data
#* Verify that all given fields exist in the database
checkmate::assert_subset(x = random,
choices = meta_data$field_name,
add = coll)
checkmate::assert_subset(x = strata,
choices = meta_data$field_name,
add = coll)
checkmate::assert_subset(x = group,
choices = meta_data$field_name,
add = coll)
checkmate::reportAssertions(coll)
#* 5. Calculate n_levels
#* randomization levels
random_levels <- redcapChoices(random, meta_data)
random_level_names <- redcapChoices(random, meta_data, raw = FALSE)
n_levels <- length(random_levels)
#* stratification groups
strata <- c(strata, group)
strata_levels <- lapply(strata, redcapChoices, meta_data)
names(strata_levels) <- strata
if (!is.null(dag.id)) strata_levels[['redcap_data_access_group']] <- dag.id
#* Allocation table
allocation <- expand.grid(strata_levels)
if (nrow(allocation) == 0) allocation <- data.frame(place.holding.strata=1)
n_strata <- nrow(allocation)
#* 6. Verify 'replicates' is not missing and is numeric
checkmate::assert_integerish(x = replicates,
len = 1,
add = coll)
#* 7. If 'block.size' is missing, set it equal to 'replicates'.
if (missing(block.size)){
block.size <- replicates
warning("'block.size' was not provided. The value of 'replicates' is used")
}
else{
checkmate::assert_integerish(x = block.size,
add = coll)
}
#* 8. block.size must be a multiple of n_levels
if (any((block.size %% n_levels) != 0)){
coll$push(paste0("'block.size' must be a multiple of ", n_levels))
}
#* 9. First element in block.size.shift must be 0
if (block.size.shift[1] != 0){
coll$push(": The first element of 'block.size.shift' must be 0")
}
#* 10. block.size.shift must be strictly increasing in the interval [0, 1)
if (!all(block.size.shift >= 0) | !all(block.size.shift < 1) |
!all(diff(block.size.shift) > 0)){
coll$push("'block.size.shift' must be strictly increasing on the interval [0, 1)")
}
#* 11. block.size.shift must have the same length as block.size
if (length(block.size) != length(block.size.shift)){
coll$push(": 'block.size' and 'block.size.shift' must have the same length")
}
#* 12. The sum of all of the blocks must add up to replicates
max.n <- cumsum(diff(c(block.size.shift * replicates, replicates)))
blocks <- NULL
for (i in 1:length(block.size)){
while(sum(blocks) < max.n[i]){
blocks <- c(blocks, block.size[i])
}
}
Blocks <- data.frame(block.num = 1:length(blocks),
block.size = blocks,
cum.n = cumsum(blocks))
Blocks <- merge(Blocks, data.frame(block.size=block.size,
max.n = max.n),
by="block.size", sort=FALSE)
Blocks$conform <- with(Blocks, cum.n <= max.n)
if (sum(Blocks$block.size) != replicates){
warning("The sum of the block sizes should add up to 'replicates'\n",
" Please review the Blocks attribute and consider changing your blocking scheme")
}
#* 13. Check if all blocks conform to blocking design (warning produced)
if (!all(Blocks$conform)){
warning("The blocking design did not conform exactly to specifications\n",
" Please review the Blocks attribute and consider changing your blocking scheme")
}
#* 14. seed.dev is not NULL and has length 1 or n_strata
if (ifelse(is.null(seed.dev), TRUE, !length(seed.dev) %in% c(1, n_strata))){
coll$push(paste0("'seed.dev' is a required argument and must be length 1 or ", n_strata))
}
#* 15. seed.prod is not NULL and has length 1 or n_strata
if (ifelse(is.null(seed.prod), TRUE, !length(seed.prod) %in% c(1, n_strata))){
coll$push(paste0("'seed.prod' is a required argument and must be length 1 or ", n_strata))
}
#* 16. no pairwise elements of seed.dev are equal to seed.prod
if (any(seed.dev == seed.prod)){
coll$push("No pairwise elements of 'seed.dev' and 'seed.prod' may be equal")
}
#* 17. If 'weights' is not NULL, it is the same length as the number of levels in 'random'
if (is.null(weights)){
weights <- rep(1, length(random_levels))
names(weights) <- random_levels
warning("No 'weights' were given. Equal weights have been assumed.")
}
#* 18. If 'weights' has names, the names are identical to the levels of 'random'
if (!is.null(names(weights))){
if (!identical(names(weights), random_level_names)) {
coll$push(paste0("'weight' names must be '",
paste0(random_level_names, collapse = "', '"),
"'."))
}
}
#* 19. if 'weights' doesn't have names, assume the weights were given in the order of levels(random)
else {
names(weights) <- random_level_names
warning(paste0("No names given with 'weights'. The names '",
paste0(random_level_names, collapse = "', '"),
"' have been assumed"))
}
weights_orig <- weights
weights <- weights[random_level_names] / sum(weights)
checkmate::reportAssertions(coll)
if (length(seed.dev) == 1) seed.dev <- seed.dev + ((1:n_strata)-1)*100
if (length(seed.prod) == 1) seed.prod <- seed.prod + ((1:n_strata)-1)*100
if (is.null(weights)) weights <- rep(1, length(random_levels))
weights <- weights / sum(weights)
#* Randomization function
Randomization <- function(choices, Blocks, seed, weights){
set.seed(seed) #* set the seed
#* Randomizations
do.call("c",
lapply(X = Blocks$block.size,
FUN = function(x) sample(makeChoices(choices, x, weights),
size = x)))
}
# return(list(allocation, Blocks, random_levels, seed.dev))
#* Generate an allocation table for each stratum (Development)
dev_allocate <-
lapply(X = 1:nrow(allocation),
FUN = function(r){
a <- allocation[r, , drop=FALSE]
#* extend the length of the stratum data frame to accomodate the sampling
a <- a[rep(row.names(a), sum(Blocks$block.size)), , drop=FALSE]
a[[random]] <- Randomization(choices = random_levels,
Blocks = Blocks,
seed = seed.dev[r],
weights = weights)
return(a)
})
#* Combine the allocation tables
dev_allocate <- do.call("rbind", dev_allocate)
#* reorder the allocation table for uploading to REDCap
dev_allocate <- dev_allocate[, c(random, names(strata_levels)), drop=FALSE]
rownames(dev_allocate) <- NULL
#* Generate an allocation table for each stratum (Production)
prod_allocate <-
lapply(X = 1:nrow(allocation),
FUN = function(r){
a <- allocation[r, , drop=FALSE]
#* extend the length of the stratum data frame to accomodate the sampling
a <- a[rep(row.names(a), sum(Blocks$block.size)), , drop=FALSE]
a[[random]] <- Randomization(choices = random_levels,
Blocks = Blocks,
seed = seed.prod[r],
weights = weights)
return(a)
})
#* Combine the allocation tables
prod_allocate <- do.call("rbind", prod_allocate)
#* reorder the allocation table for uploading to REDCap
prod_allocate <- prod_allocate[c(random, names(strata_levels))]
rownames(prod_allocate) <- NULL
return(list(dev_allocation = dev_allocate,
dev_seed = seed.dev,
prod_allocation = prod_allocate,
prod_seed = seed.prod,
blocks = Blocks,
weights = weights_orig))
}
#' @rdname allocationTable
#' @param random_levels A vector of the randomization group level names. Determined from the
#' data dictionary.
makeChoices <- function(random_levels, block.size, weights){
group.size <- block.size * weights
if (sum(ceiling(group.size) - group.size) == 0)
choices <- rep(random_levels,
times = group.size)
else
choices <- sample(random_levels,
size = block.size,
replace = TRUE,
prob = weights)
choices
}
#' @rdname allocationTable
#' @export
allocationTable_offline <- function(meta_data, random, strata = NULL,
group = NULL, dag.id = NULL,
replicates, block.size,
block.size.shift = 0,
seed.dev = NULL, seed.prod = NULL,
bundle = NULL,
weights = c(1, 1), ...){
meta_data <- utils::read.csv(meta_data,
stringsAsFactors=FALSE,
na.strings = "")
col.names=c('field_name', 'form_name', 'section_header',
'field_type', 'field_label', 'select_choices_or_calculations',
'field_note', 'text_validation_type_or_show_slider_number',
'text_validation_min', 'text_validation_max', 'identifier',
'branching_logic', 'required_field', 'custom_alignment',
'question_number', 'matrix_group_name', 'matrix_ranking',
'field_annotation')
names(meta_data) <- col.names[1:length(col.names)]
allocationTable.redcapApiConnection(rcon = NULL,
random = random,
strata = strata,
group = group,
dag.id = dag.id,
replicates = replicates,
block.size = block.size,
block.size.shift = block.size.shift,
seed.dev = seed.dev,
seed.prod = seed.prod,
bundle = list(meta_data = meta_data),
weights = weights,
...)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.