tests/ClusterSimul.R

# Simulate a mixed data set
# @param models a vector of models to simulate (note that proportion free or
# proportion equal models will be treated in the same mannear)
# @param params parameters to used, for each model, the list provides a vector
# with the number of variables and the proportion of missing values
# @param nbSample size of the sample to simulate (not used if z is given)
# @param nbCluster number of class to simulate (not used if prop is given)
# @param z vector giving the classes of each sample
# @param prop vector with the probabilities of each classes (not used if z is given)
#
# @return two lists
simulMixedData <- function(models, params, nbSample, nbCluster=2, z = NULL, prop = NULL)
{
  # check
  if (!is.vector(models) < 2) { stop("models has to be a vector")}
  if (!is.list(params) < 2) { stop("params has to be a list")}
  if (length(models) != length(params)) { stop("params and models must have the same size")}
  if (nbCluster < 2) { stop("The number of clusters must be greater or equal to 2")}
  
  # check if we have to compute z or it is already given
  if (is.null((z)))
  {
    if (nbSample < nbCluster) { stop("The number of sample must be greater to the number of class")}
    if(is.null(prop)) { prop=rep(1/nbCluster, length.out=nbCluster); }
    z <- sample.int(1:nbCluster, size=nbSample, replace = TRUE, prob= prop);
  }
  else
  { nbSample <- length(z);}
  
  res <- vector("list", length = length(models)+1);
  names(res) <- c("z", models);
  res$z <- z;
  for (i in 1:length(models))
  {
    # get current model and parameters
    if (is.list(models)) { model <- models[[i]];}
    else                 { model <- models[i];}
    param <- params[[i]];
    if (clusterValidCategoricalNames(model))
    {
      # simulate Categorical
      all = c( "categorical_pk_pjk", "categorical_pk_pk", "categorical_p_pjk", "categorical_p_pk")
      
    }
    else if (clusterValidDiagGaussianNames(models[i]))
    {
      # simulate Gaussian
      all = c( "gaussian_pk_sjk", "gaussian_pk_sj", "gaussian_pk_sk", "gaussian_pk_s"
          , "gaussian_p_sjk", "gaussian_p_sj", "gaussian_p_sk", "gaussian_p_s")
    }
    else if (clusterValidGammaNames(models[i]))
    {
      # simulate Gamma
      all = c( "gamma_p_ajk_bjk",  "gamma_p_ajk_bk",  "gamma_p_ajk_bj",  "gamma_p_ajk_b"
          , "gamma_p_ak_bjk",  "gamma_p_ak_bk",  "gamma_p_ak_bj",  "gamma_p_ak_b"
          , "gamma_p_aj_bjk", "gamma_p_aj_bk"
          , "gamma_p_a_bjk", "gamma_p_a_bk"
          , "gamma_pk_ajk_bjk", "gamma_pk_ajk_bk", "gamma_pk_ajk_bj", "gamma_pk_ajk_b"
          , "gamma_pk_ak_bjk", "gamma_pk_ak_bk", "gamma_pk_ak_bj", "gamma_pk_ak_b"
          , "gamma_pk_aj_bjk", "gamma_pk_aj_bk"
          , "gamma_pk_a_bjk", "gamma_pk_a_bk"
      )
    }
    else if (clusterValidPoissonNames(models[i]))
    {
      # simulate Poisson
    all = c( "poisson_pk_ljk", "poisson_pk_lk", "poisson_pk_ljlk", "poisson_p_ljk", "poisson_p_lk", "poisson_p_ljlk")
  }
    else
    {
      stop("Invalid model name")
    }
  }
  res
}

Try the MixAll package in your browser

Any scripts or data that you put into this service are public.

MixAll documentation built on Sept. 12, 2019, 5:05 p.m.