#' Create a set of continuous covariates
#'
#' Creates a set of continuous covariates from a multivariate normal
#' distribution and (optionally) a set of constraints.
#'
#'
#' @param subjects (Required) Subjects for which to create covariates
#' @param names (Required) Names for the continuous covariates. They should be
#' valid R names (See \code{\link{validNames}}) and no duplicate name should be
#' given
#' @param mean (Required) Vector of means. Must be of same length than
#' \code{names}
#' @param covariance (Optional) Lower triangle of covariance matrix. See
#' \code{\link{parseCovMatrix}} for details. 1 by default
#' @param range (Optional) Ranges of acceptable values for each covariates. See
#' \code{\link{parseRangeCode}} for details. This is missing by default,
#' resulting in no "range" limitation being applied
#' @param digits (Optional) Number of digits used to round the values. This
#' argument can be either missing (the default), so no rounding is done, of
#' length one and all variables are rounded at the same digits, of same length
#' than the number of covariates so that each covariate is rounded according to
#' its value. This argument is first parsed by \code{\link{parseCharInput}} so
#' it can either be a character vector or a numeric vector. See
#' \code{\link{parseCharInput}}. If the parsed \code{digits} vector does not
#' have length one or length equal to the number of covariates, an error is
#' generated by the \code{\link{ectdStop}} function. This is missing by
#' default, resulting in no rounding being performed
#' @param maxDraws (Optional) Maximum number of attempts allowed if initial
#' data not in range (100 by default)
#' @param seed (Optional) Random seed to use. By default, this is derived from
#' the master random seed
#' @param idCol (Optional) Name of the subject column. Must be a valid R name
#' (See \code{\link{validNames}}) and not equal to one entry of \code{names}.
#' "SUBJ" by default
#' @param includeIDCol (Optional) Should the subject column be included.
# Typically only set to FALSE when called from \code{\link{createCovariates}}.
# Defaults to TRUE
#' @author Romain Francois
#' @seealso \code{\link{createDiscreteCovariates}} to create covariates for a
#' discrete distribution.
#'
# \code{\link{createExternalCovariates}} to create covariates by
# \emph{sampling} data from an external file.
#'
# \code{\link{createTimeVaryingCovariates}} to create time-varying covariates.
#'
# \code{\link{createCovariates}} that wraps \code{createContinuousCovariates}
# and the two other described above.
#' @keywords datagen
#' @examples
#'
#' # 30 samples from a : [ 0 ] [ 1, 0, 0 ]
#' # N ( [ 0 ] , [ 0, 1, 0 ] )
#' # [ 1 ] [ 0, 0, 1 ]
#' dat <- createContinuousCovariates( 30,
#' mean = "0,0,1",
#' names = c("X", "Y", "Z") )
#'
#'
#' # 30 samples from a : [ 0 ] [ 1, 0, 0 ]
#' # N ( [ 0 ] , [ 0, 1, 0 ] )
#' # [ 1 ] [ 0, 0, 1 ]
#' # truncated at X > 0
#' dat <- createContinuousCovariates( 30,
#' mean = "0,0,1",
#' names = c("X", "Y", "Z"),
#' range= "X > 0" )
#'
#' # 30 samples from a : [ 0 ] [ 1, 0, 0 ]
#' # N ( [ 0 ] , [ 0, 1, 0 ] )
#' # [ 1 ] [ 0, 0, 1 ]
#' # truncated at X = 0, and X < Y < 1
#' dat <- createContinuousCovariates( 30,
#' mean = "0,0,1",
#' names = c("X", "Y", "Z"),
#' range= c("X > 0", "X< Y<1") )
#' stopifnot( all( dat$X < dat$Y ) )
#' stopifnot( all( dat$X > 0 ) )
#'
#' # 30 samples from a : [ 0 ] [ 1 , .5, 0 ]
#' # N ( [ 0 ] , [ .5, 1 , 0 ] )
#' # [ 1 ] [ 0 , 0 , 1 ]
#' dat1 <- createContinuousCovariates( 30,
#' mean = "0,0,1",
#' names = c("X", "Y", "Z"),
#' covariance = "1,.5,1,0,0,1",
#' seed = 30 )
#'
#' # same
#' dat2 <- createContinuousCovariates( 30,
#' mean = "0,0,1",
#' names = c("X", "Y", "Z"),
#' covariance = cbind(c(1,.5,0),
#' c(.5,1,0),
#' c(0,0,1)) ,
#' seed = 30 )
#'
#' stopifnot( all(dat1 == dat2 ))
#' # use of the digits argument
#' # X will be rounded at 2 digits
#' # Y will be rounded at 3 digits
#' # Z will be rounded at 2 digits
#' createContinuousCovariates( 10,
#' mean = "100,100,100",
#' names = c("X", "Y", "Z"),
#' digits = "2,3,2" )
#'
#' @export
createContinuousCovariates <- function(subjects,
names,
mean,
covariance = 1,
range = NULL,
digits,
maxDraws = 100 ,
seed = .deriveFromMasterSeed(),
idCol = getEctdColName("Subject"),
includeIDCol = TRUE) {
##############################################################################
# Mango Solutions, Chippenham SN14 0SQ 2006
# createContinuousCovariates.R
# Fri Jun 01 10:41:35 BST 2007 @445 /Internet Time/
#
# Author: Romain Francois
##############################################################################
# DESCRIPTION: create a set of continuous covariates, from a (truncated)
# multivariate nomal distribution
# KEYWORDS: datagen, component:covariate
##############################################################################
set.seed(seed)
## sanity checks on the inputs
if( missing(mean) )
ectdStop("`mean` is needed in `createContinuousCovariates`")
subjects <- .expandSubjects( subjects )
nSubjects <- get("nSubjects")
mean <- parseCharInput( mean )
nCov <- length( mean )
names <- if(missing(names)) {
"X" %.% 1:nCov
} else {
parseCharInput( names , checkdup = TRUE, convertToNumeric = FALSE)
}
if (length(names) != length(mean))
ectdStop(
"Dimension mismatch between `names` and `mean`" %.nt%
"`mean` of length: " %.% length(mean) %.nt%
"`names` of length: " %.% length(names)
)
covariance <- parseCovMatrix(covariance, nCov)
validNames(names, idCol)
maxDraws <- as.integer(maxDraws)
if (maxDraws < 1)
ectdStop("The maximum number of draws should be a positive integer")
if (!missing(digits)) {
for (i in 1:length(digits)) {
if (digits[i] < 0) {
ectdStop("The `digits` argument must be positive")
}
i = i + 1
}
}
if (is.null(range)) {
out <-
as.data.frame(MASS::mvrnorm(nSubjects, mu = mean, Sigma = covariance))
}
else {
# deal with range code
range <- parseRangeCode(range)
nGen <- 0
out <-
do.call(data.frame, structure(rep(list(
rep(0, nSubjects)
), nCov), names = names))
for (i in 1:maxDraws) {
# generate a new set of data
newsets <-
as.data.frame(MASS::mvrnorm(nSubjects, mu = mean, Sigma = covariance))
names(newsets) <- names
alright <- try(eval(range, newsets) , silent = TRUE)
if (class(alright) == "try-error")
next
indxs <- which(alright)
howManyToAdd <- min(nSubjects - nGen, length(indxs))
if (howManyToAdd == 0)
next
out[nGen + 1:howManyToAdd,] <-
newsets[indxs[1:howManyToAdd], , drop = FALSE]
nGen <- nGen + howManyToAdd
.log(
"..(createContinuousCovariates) iteration $i , $nGen generated (",
sprintf("%6.2f", round(nGen / nSubjects * 100, 2)),
"%)"
)
if (nGen == nSubjects)
break
}
if (nGen != nSubjects)
ectdStop(
paste(
"After",
maxDraws,
"attempts, covariate data for only",
nGen,
"of the",
nSubjects,
"subjects matched the range criteria"
)
)
}
names(out) <- names
out <- .roundIt(out, digits)
if (includeIDCol)
out <- .eval("data.frame( $idCol = subjects, out )")
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.