R/independent_twoway_generation.R
In extraSuperpower: Power Calculation for Two-Way Factorial Designs

Documented in twoway_simulation_independent

#' Simulate independent measurements in a two-way factorial design
#'
#' Both regular and internal function.
#' As regular function takes input generated by the `calculate_mean_matrix` function and iteratively simulates independent measures two-way factorial experiments.
#' Outcome may be normally distributed, have a skewed normal distribution or a truncated normal distribution.
#'
#' As internal function runs with a single iteration inside `graph_twoway_assumptions`, which in itself is inside  `calculate_mean_matrix` to generate data for the cell mean and standard deviation plot.
#'
#' @param group_size Integer or matrix - Sample size for each condition (combination of factor levels). If `balanced=TRUE` (default) `group_size` must be an integer. If `balanced=FALSE` `group_size` must be a matrix.
#' @param matrices_obj List - Output generated by `calculate_mean_matrix` that include cell mean and standard deviation matrices.
#' @param distribution Character - Type of distribution to simulate. Possible values are 'normal', 'skewed' or 'truncated.normal'.
#' @param skewness Numeric - Momentum of distribution skewness
#' @param inferior_limit Numeric - Value of the lower bound for the truncated distribution, defaults to '-Inf'. Ignored if `distribution` is either 'normal' or 'skewed'.
#' @param superior_limit Numeric - Value of the upper bound for the truncated distribution, defaults to 'Inf'. Ignored if `distribution` is either 'normal' or 'skewed'.
#' @param balanced Logical - Whether the study will be performed with the same number of subjects in all groups. Default is `TRUE`. See 'details'.
#' @param nsims Integer - Number of iterations.
#'
#' @details
#' For unbalanced independent measures designs, this function generates a simulation with `max(group_size)` for all factors combinations and then eliminates observations
#' at random in those factor combinations that have less participants or study subjects. This may not sound like the most efficient way to proceed, is quite fast anyhow.
#'The 'n' column in the output will reflect how many observations each factor combination has. This should match the input matrix.
#'
#' @importFrom stats rnorm
#'
#' @return data.frame with modeled outcome values, factor level labels, iteration number and sample size.
#'
#' @examples
#' refmean <- 1
#' treatgroups <- 4
#' timepoints <- 5
#' treateff <- 1.5
#' timeeff <- 0.85
#' factors_levels_names <- list(treatment=letters[1:treatgroups], time=1:timepoints)
#'
#' ## Independent design
#' effects_treat_time <- calculate_mean_matrix(refmean = refmean,
#'                                             fAeffect = treateff, fBeffect = timeeff,
#'                                             nlfA = treatgroups, nlfB = timepoints,
#'                                             label_list = factors_levels_names)
#' ## Inspect plot to check if matrices correspond to design
#' n <- 20
#' independent_experiment <- twoway_simulation_independent(group_size = n,
#'                                                         matrices_obj = effects_treat_time)
#' head(independent_experiment, 10)
#'
#' @export
twoway_simulation_independent <- function(group_size, matrices_obj, distribution="normal", skewness=1, inferior_limit=-Inf, superior_limit=Inf, balanced=TRUE, nsims=200)
{

  if(any(grepl("gg", unlist(sapply(matrices_obj, class)))))
  {
    matrices_obj <- matrices_obj$matrices_obj
  }

  if(length(matrices_obj)>2)
  {stop("'matrices_obj' should only include two matrices. Are you sure you want to simulate independent measures?")}

  if(is.null(dimnames(matrices_obj$mean.mat)) | is.null(names(dimnames(matrices_obj$mean.mat))))
  {
    stop("The cell mean model must have full dimnames")
  }
  label_list <- dimnames(matrices_obj$mean.mat)
  factor_levels <- dim(matrices_obj$mean.mat)
  mean_matrix <- as.vector(t(matrices_obj$mean.mat))
  if (!balanced & length(group_size)==1)
  {
    stop("Are you sure you want to set balanced to false?")
  } else if (balanced & length(group_size)>1)
  {
    stop("If you wish a balanced design a single integer is required as 'group_size' argument.")
  }
  if(balanced & length(group_size==1))
  {
    if(as.integer(group_size)!=group_size)
    {
      stop("For balanced designs 'group_size' must be a single integer")
    }
  }
  if(length(mean_matrix) %% length(group_size)>0)
  {
    stop("Number of elements of mean matrix must be a multiple of group size elements.")
  }
  if((distribution=="normal" | distribution=="skewed") & (superior_limit<Inf | inferior_limit>-Inf))
  {
    warning(paste("Superior and inferior limits are ignored when distribution is", distribution))
  }
  if((distribution=="normal" | distribution=="truncated.normal") & (skewness!=1))
  {
    warning(paste("Skewness is ignored when distribution is", distribution))
  }

  if(is.matrix(matrices_obj$sd.mat))
  {
    sd_matrix <- as.vector(t(matrices_obj$sd.mat))
  } else if (is.numeric(matrices_obj$sd.mat) & length(matrices_obj$sd.mat)==1)
  {
    sd_matrix <- matrices_obj$sd.mat
  }

  sampn <- max(group_size)
  fdata <- as.data.frame(mapply(rnorm, sampn, mean_matrix, sd_matrix))
  fdata$subject <- 1:sampn
  fdata <- reshape2::melt(fdata, id.vars = "subject", variable.name = "cond",
                          value.name = "y")
  for (j in 1:2)
  {
    assigned_factor_levels <- rep(as.list(paste(names(label_list)[j], label_list[[j]], sep = "_")),
                                  each = sampn * prod(factor_levels)/prod(factor_levels[1:j]),
                                  times = prod(factor_levels)/prod(factor_levels[j:2]))
    assigned_factor_levels <- unlist(assigned_factor_levels)
    assigned_factor_levels <- factor(assigned_factor_levels, levels = unique(assigned_factor_levels))
    fdata <- cbind(fdata, assigned_factor_levels)
  }
  names(fdata)[4:5] <- names(label_list)

  if(distribution=="normal")
  {
    y <- suppressMessages(replicate(nsims, reshape2::melt(mapply(rnorm, sampn,
                                                                 mean_matrix, sd_matrix))$value))
  } else if (distribution=="skewed")
  {
    y <- suppressMessages(replicate(nsims, reshape2::melt(mapply(fGarch::rsnorm, sampn,
                                                                 mean_matrix, sd_matrix, skewness))$value))
  } else if (distribution=="truncated.normal")
  {
    y <- suppressMessages(replicate(nsims, reshape2::melt(mapply(truncnorm::rtruncnorm, sampn, a=inferior_limit, b=superior_limit,
                                                                 mean_matrix))$value))
  }
  sim <- lapply(seq(nsims),
                function(x)
                {
                  fdata$y <- y[,x]
                  fdata$iteration <- x
                  fdata$subject <- factor(1:nrow(fdata))
                  fdata
                })
  sim <- do.call(rbind, sim)
  if(!balanced & length(group_size)>1)
  {
    tosample <- sim[sim$iteration==1,]
    keep <- sapply(1:length(levels(tosample$cond)),
                   function(x) sample(tosample$subject[tosample$cond==levels(tosample$cond)[x]], t(group_size)[x]))
    keep <- sort(unlist(keep))
    sim <- sim[sim$subject %in% keep,]
    sim$subject <- droplevels(sim$subject)
    sim$n <- rep(unlist(mapply(rep, t(group_size), t(group_size))), nsims)
  }
  if(balanced & length(group_size)==1)
  {
    sim$n <- group_size
  }
  sim
}