#' Creates a data frame of artificial health data; category variables only
#'
#' @code{dfMaker(N = 10000, V = 26, pNA = 0.1, pd = 0.5)}
#'
#' @param N number of rows for the data frame
#' @param V number of explanatory variables. Each has three levels a, b and c.
#' @param pNA the proportion of values to make NA at random
#' @param pd the proportion of the binary response variable to be 'diseased'
#'
#' @details it can be useful to create a toy data frame of categorical explanatory
#' variables with NAs included. This will produce any number of columns with a maximum
#' of 26 categorical explanatory variables, each of three levels.
#'
#' @examples
#' DF = dfMaker(N = 5000, V = 5, pNA = 0.02, pd = 0.1)
#' str(DF)
dfMaker = function(N = 10000, V = 12, pNA = 0.1, pd = 0.5){
n = round(N * pNA)
ABC = data.frame(A=letters[sample(1:3, N, replace=TRUE)])
colnames(ABC) = LETTERS[27-V]
for(a in 2:V){
X=letters[sample(1:3, N, replace=TRUE)]
ABC = cbind(ABC,X)
names(ABC)[ncol(ABC)] = LETTERS[26-V+a]
}
NAin = function(x){
x[sample(1:length(x),n)] = NA
return(x)
}
ABC = as.data.frame(apply(ABC, 2, NAin))
ABC$Condition = factor(sample(c("Diseased","Clear"),N, prob=c(pd,1-pd), replace=TRUE))
return(ABC)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.