Nothing
## ---- eval = FALSE------------------------------------------------------------
#
# # Overview the MXM::`MMPC()` function
#
# mod <- MXM::MMPC(
# target, # The target variable vector
# dataset, # The dataset with the target column removed
# max_k = 3, # The maximum size of the conditioning set to use
# threshold = 0.05, # level of alpha for statistical significance
# test = 'testIndFisher',
# ini = NULL, # if TRUE, the calculated univariate associations
# # are stored for runtime efficiency in subsequent
# # MMPC runs with diferent hyper-parameters.
# hash = TRUE, # if TRUE, the calculated statistics are stored.
# hashObject = NULL, # the mmpcobject from a previous run
# ncores = 1, # number of cores for parallel execution.
# # Recommended for thousands of variables.
# backward = TRUE) # If TRUE, the backward phase
# # (or symmetry correction) is implemented.
# # Falsely included variables,
# # in the MMPC output signature are removed.
## ---- warning = FALSE, message = FALSE----------------------------------------
# 0. INSTALL and LOAD the MXM R Package:
#install.packages('MXM', dependencies = TRUE )
library(MXM)
# 1. DOWNLOAD the wine dataset from UCI:
URL <- "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
wine <- read.csv(URL, header = FALSE)
# 2. SET variables' names as header:
colnames(wine) <- c('Type', 'Alcohol', 'Malic', 'Ash',
'Alcalinity', 'Magnesium', 'Phenols',
'Flavanoids', 'Nonflavanoids', 'Proanthocyanins',
'Color', 'Hue', 'Dilution', 'Proline')
# 3. REMOVE the 1st attribute, which is the class information:
wine <- wine[,-1]
# 4. PREVIEW UCI's wine dataset:
head(wine, 2)
# The header should include the wine attributes sans the class labels,
# in the following order:
# Alcohol | Malic | Ash | Alcalinity | Magnesium | Phenols | Flavanoids
# Nonflavanoids | Proanthocyanins | Color | Hue | Dilution | Proline
## ---- warning = FALSE, message = FALSE----------------------------------------
# 5. CHECK for missing or non-numeric values in the dataframe:
sum(is.na(wine))
sum(is.nan(as.matrix(wine))) #if 0, then No NAs, none NaNs, good to go!
## ---- warning = FALSE, message = FALSE----------------------------------------
# 6. CHECK `wine` object's data type, dimensions:
str(wine)
# The output should be a datarame:
#'data.frame': 178 obs. of 13 variables
## ---- warning = FALSE, message = FALSE----------------------------------------
# 0. Exclude target variable column
targetVariable <- wine$Nonflavanoids
targetVariable <- NULL
# 1. Convert dataframe to matrix:
wine_dataset <- as.matrix(wine[, -8])
wine_dataset[, 12] <- as.numeric(wine_dataset[, 12])
head(wine_dataset, 2)
## ---- warning = FALSE, message = FALSE----------------------------------------
# 2. Check dimensions of the wine_dataset
# REMINDER: We need it as N x f // N for instances, f or features
dim(wine_dataset)
# The output should be 178 x 12,
#178 instances and 12 features; if so, we're good to go
## ---- warning = FALSE, message = FALSE----------------------------------------
# 3. Select the target variable (`Nonflavanoids`) and store as a matrix:
target_NonFlav <- as.vector(wine$Nonflavanoids)
str(target_NonFlav,2)
## ---- warning = FALSE, message = FALSE----------------------------------------
# MMPC on the wine dataset:
library('MXM')
mmpcobject_wine_NonFlav <- MXM::MMPC( target = target_NonFlav,
dataset = wine_dataset,
max_k = 3,
threshold = 0.05,
test = 'testIndFisher',
ini = NULL,
hash = TRUE,
hashObject = NULL,
ncores = 1,
backward = TRUE)
## -----------------------------------------------------------------------------
# Cache of the stats calculated in the MMPC run
str(mmpcobject_wine_NonFlav@hashObject)
# a list with the univariate associations
str(mmpcobject_wine_NonFlav@univ)
## -----------------------------------------------------------------------------
execution_time_1st_MMPC_run <- mmpcobject_wine_NonFlav@runtime
execution_time_1st_MMPC_run
## ---- warning = FALSE, message = FALSE----------------------------------------
# MMPC on the wine dataset:
library('MXM')
mmpcobject_2nd_run <- MXM::MMPC(target = target_NonFlav,
dataset = wine_dataset ,
# it was set to 3 in the 1st run
max_k = 5,
# it was set to 0.05 in the 1st run
threshold = 0.01,
test = 'testIndFisher',
#the cached univariate tests
ini = mmpcobject_wine_NonFlav@univ,
# cached stats, p-values
hashObject = mmpcobject_wine_NonFlav@hashObject)
## ---- warning = FALSE, message = FALSE----------------------------------------
execution_time_2nd_MMPC_run <- mmpcobject_2nd_run@runtime
execution_time_1st_MMPC_run
execution_time_2nd_MMPC_run
## ---- warning = FALSE, message = FALSE----------------------------------------
# Grid Search for MMPC hyper-parameter tuning
library('MXM')
mmpcGridSearch <- MXM::mmpc.path(target = target_NonFlav,
dataset = wine_dataset,
max_ks = c(3,4,5,6), # a vector of k to try
alphas = NULL, # a vector of thresholds;
# If NULL, 0.1, 0.05 and 0.01
# will be tested.
test = 'testIndFisher',
ncores = 1)
## ---- warning = FALSE, message = FALSE----------------------------------------
BIC_results <- as.data.frame(mmpcGridSearch$bic)
head(BIC_results, 4)
# We can retrieve the indices of the minimum BIC values:
which(BIC_results == min(BIC_results), arr.ind = TRUE)
## ---- warning = FALSE, message = FALSE----------------------------------------
size_of_signature_results <- as.data.frame(mmpcGridSearch$size)
head(size_of_signature_results, 4)
# We can retrieve the indices of the maximum subset:
which(size_of_signature_results == max(size_of_signature_results), arr.ind = TRUE)
## ---- warning = FALSE, message = FALSE----------------------------------------
head(mmpcGridSearch$variables, 4)
## ---- warning = FALSE, message = FALSE----------------------------------------
summary(mmpcobject_wine_NonFlav)
## ---- warning = FALSE, message = FALSE----------------------------------------
mmpcobject_wine_NonFlav@selectedVarsOrder
# The signature should include the variables with indices 7, 4, 5
## ---- warning = FALSE, message = FALSE----------------------------------------
colnames(wine_dataset)[7]
colnames(wine_dataset)[4]
colnames(wine_dataset)[5]
## ---- warning = FALSE, message = FALSE----------------------------------------
# MODEL ESTIMATES USING MMPC'S FEATURE SUBSET AS PrEDICTORs
mmpcmodel_wine_NonFlav<- mmpc.model(
target = target_NonFlav,
dataset = wine_dataset,
wei = NULL,
mmpcObject = mmpcobject_wine_NonFlav,
test = 'testIndFisher')
summary(mmpcmodel_wine_NonFlav) ;
mmpcmodel_wine_NonFlav$ypografi
## ---- warning = FALSE, message = FALSE----------------------------------------
mmpcmodel_wine_NonFlav$mod
## ---- warning = FALSE, message = FALSE----------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.