R/SES.glmm.R

Defines functions SES.glmm

Documented in SES.glmm

SES.glmm = function(target, reps = NULL, group, dataset, max_k = 3, threshold = 0.05, test = NULL, ini = NULL, wei = NULL, user_test = NULL, 
                        hash=FALSE, hashObject = NULL, slopes = FALSE, ncores = 1) {
  ##############################
  # initialization part of SES #
  ##############################
  runtime <- proc.time()
  stat_hash <- NULL;
  pvalue_hash <- NULL;
 
  if ( hash )  {
    if (is.null(hashObject) )  {
      stat_hash <- Rfast::Hash();
      pvalue_hash <- Rfast::Hash();
    } else if ( is.list( hashObject ) ) {
      stat_hash <- hashObject$stat_hash;
      pvalue_hash <- hashObject$pvalue_hash;
    } else   stop('hashObject must be a list of two hash objects (stat_hash, pvalue_hash)')
  }
  ###################################
  # dataset checking and initialize #
  ###################################
  if (is.null(dataset) || is.null(target) ) {
      stop('invalid dataset or target (class feature) arguments.');
  } else  target <- target;
  #check for NA values in the dataset and replace them with the variable mean
  if ( any(is.na(dataset)) ) {
    warning("The dataset contains missing values and they were replaced automatically by the variable (column) median.")
    dataset = apply(dataset, 2, function(x){ x[which(is.na(x))] = median(x, na.rm = TRUE) ; return(x) });
  }
  ##################################
  # target checking and initialize #
  ##################################
  targetID <-  -1;
  #check if the target is a string
  if (is.character(target) & length(target) == 1) {
    findingTarget <- target == colnames(dataset);#findingTarget <- target %in% colnames(dataset);
    if ( !sum(findingTarget) == 1 ) {
      warning('Target name not in colnames or it appears multiple times');
      return(NULL);
    }
    targetID <- which(findingTarget);
    target <- dataset[ , targetID];
  }
  
  #checking if target is a single number
  if (is.numeric(target) & length(target) == 1) {
    if (target > dim(dataset)[2]){
      warning('Target index larger than the number of variables');
      return(NULL);
    }
    targetID <- target;
    target <- dataset[ , targetID];
  }
  ################################
  # test checking and initialize #
  ################################
  la <- length( unique( as.numeric(target) ) )
  
  if (typeof(user_test) == "closure") {
    test = user_test;
  } else {
    #auto detect independence test in case of not defined by the user and the test is null or auto
    if ( is.null(test) || test == "auto" )  {
      if ( survival::is.Surv(target) ) {
        test <- "testIndGLMMCR"
      } else if ( la > 2 & sum(target - round(target) ) != 0  &  is.null(wei)  &  !slopes  &  is.null(reps) ) {
        test <- "testIndLMM"
      } else if (la == 2) {
        test <- "testIndGLMMLogistic"
      } else if ( la > 2  &  sum(target - round(target)) == 0 ) {
        test <- "testIndGLMMPois"
      } else if ( is.ordered(target) ) {
        test <- "testIndGLMMOrdinal"
      } else test <- "testIndGLMMReg"
    }  
    #available conditional independence tests
    av_tests <- c("testIndGLMMReg", "testIndGLMMLogistic", "testIndGLMMPois", "testIndGLMMGamma", 
                 "testIndGLMMNormLog", "testIndGLMMOrdinal", "testIndGLMMCR", "testIndLMM", "auto",  NULL);
    ci_test <- test
    if (length(test) == 1) {   #avoid vectors, matrices etc
      test = match.arg(test, av_tests, TRUE);
      #convert to closure type
      if ( test == "testIndGLMMReg" ) {
        test <- testIndGLMMReg;
      } else if ( test == "testIndGLMMReg"  &  is.null(wei)  &  (!slopes) ) {
        test <- testIndLMM;
      } else if ( test == "testIndGLMMLogistic" ) {
        test <- testIndGLMMLogistic;
      } else if ( test == "testIndGLMMPois" ) {
        test <- testIndGLMMPois;
      } else if ( test == "testIndGLMMGamma" ) {
        test <- testIndGLMMGamma;
      } else if ( test == "testIndGLMMNormLog" ) {
        test <- testIndGLMMNormLog;
      } else if ( test == "testIndGLMMOrdinal" ) {
        test <- testIndGLMMOrdinal;
      } else if ( test == "testIndGLMMCR" ) {
        test <- testIndGLMMCR;
      } else if ( test == "testIndLMM" ) {
        test <- testIndLMM
      }
      
    } else   stop('invalid test option');
  }
  ###################################
  # options checking and initialize #
  ###################################
  #extracting the parameters
  max_k <- floor(max_k);
  varsize <- dim(dataset)[[2]];
  #option checking
  if ( (typeof(max_k) != "double") || max_k < 1 )   stop('invalid max_k option');
  if (max_k > varsize)   max_k = varsize;
  if ( (typeof(threshold) != "double") || threshold <= 0 || threshold >= 1 )   stop('invalid threshold option');
  ######################################################################################
  if( !is.null(user_test) )  ci_test = "user_test";
  #######################################################################################
  #call the main SES function after the checks and the initializations
  results <- InternalSES.glmm(target, reps, group, dataset, max_k, log(threshold), test, ini, wei, user_test, hash, varsize, stat_hash, pvalue_hash, 
                                 targetID, slopes, ncores);
  
  runtime <- proc.time() - runtime
  SES.glmm.output <-new("SES.glmm.output", selectedVars = results$selectedVars, selectedVarsOrder = results$selectedVarsOrder, 
                            queues = results$queues, signatures = results$signatures, hashObject = results$hashObject, pvalues = results$pvalues, 
                            stats = results$stats, univ = results$uni, max_k = results$max_k, threshold = results$threshold, 
                            n.tests = results$n.tests, runtime = runtime, test = ci_test, slope = results$slope);
  
  return(SES.glmm.output);
}

Try the MXM package in your browser

Any scripts or data that you put into this service are public.

MXM documentation built on Aug. 25, 2022, 9:05 a.m.