inst/doc/rebmix.R

### R code from vignette source 'rebmix.Rnw'

###################################################
### code chunk number 1: rebmix-code
###################################################
##############################################
## R sources for reproducing the results in ##
##              rebmix package              ##
##############################################

options(prompt = "R> ", continue = "+  ", width = 80,
  useFancyQuotes = FALSE, digits = 3)


###################################################
### code chunk number 2: rebmix-code
###################################################
###################
## Preliminaries ##
###################

## load package and set prompt before starting new page to TRUE.

library(rebmix)
devAskNewPage(ask = TRUE)


###################################################
### code chunk number 3: rebmix-code
###################################################
######################
##  Gamma datasets  ##
######################

## Generate gamma datasets.

n <- c(100, 100, 100, 100)

Theta <- new("RNGMIX.Theta", c = 4, pdf = "gamma")

a.theta1(Theta) <- rep(1/100, 4)
a.theta2(Theta) <- c(200, 400, 600, 800)

gamma1 <- RNGMIX(Dataset.name = "gamma1", n = n, Theta = a.Theta(Theta))

n <- c(40, 360)

Theta <- new("RNGMIX.Theta", c = 2, pdf = "gamma")

a.theta1(Theta) <- c(1/27, 1 / 270)
a.theta2(Theta) <- c(9, 90)

gamma2 <- RNGMIX(Dataset.name = "gamma2", n = n, Theta = a.Theta(Theta))

n <- c(80, 240, 80)

Theta <- new("RNGMIX.Theta", c = 3, pdf = "gamma")

a.theta1(Theta) <- c(1/20, 1, 1/20)
a.theta2(Theta) <- c(40, 6, 200)

gamma3 <- RNGMIX(Dataset.name = "gamma3", rseed = -4, n = n, Theta = a.Theta(Theta))


###################################################
### code chunk number 4: rebmix-code
###################################################
## Estimate number of components, component weights and component parameters.

gamma1est <- REBMIX(Dataset = a.Dataset(gamma1),
  Preprocessing = "kernel density estimation",
  cmax = 8,
  Criterion = "BIC",
  pdf = "gamma")

gamma2est <- REBMIX(Dataset = a.Dataset(gamma2),
  Preprocessing = "histogram",
  cmax = 8,
  Criterion = "BIC",
  pdf = "gamma")

gamma3est <- REBMIX(Dataset = a.Dataset(gamma3),
  Preprocessing = "histogram",
  cmax = 8,
  Criterion = "BIC",
  pdf = "gamma",
  K = 23:27)


###################################################
### code chunk number 5: gamma3-fig
###################################################
plot(gamma3est, pos = 1, what = c("pdf", "marginal cdf"), ncol = 2, npts = 1000, family = "sans")


###################################################
### code chunk number 6: rebmix-code
###################################################
summary(gamma2est)

a.theta1.all(gamma1est, pos = 1)

a.theta2.all(gamma1est, pos = 1)


###################################################
### code chunk number 7: rebmix-code
###################################################
## Bootstrap finite mixture.

gamma3boot <- boot(x = gamma3est, pos = 1, Bootstrap = "p", B = 10)

gamma3boot

summary(gamma3boot)


###################################################
### code chunk number 8: rebmix-code
###################################################
## EM.control object creation.

EM <- new("EM.Control",
  strategy = "best",
  variant = "EM",
  acceleration = "fixed",
  acceleration.multiplier = 1.0,
  tolerance = 1e-4,
  maximum.iterations = 1000,
  K = 0)

gamma1est.em <- REBMIX(Dataset = a.Dataset(gamma1),
  Preprocessing = "kernel density estimation",
  cmax = 8,
  Criterion = "BIC",
  pdf = "gamma",
  EMcontrol = EM)

gamma2est.em <- REBMIX(Dataset = a.Dataset(gamma2),
  Preprocessing = "histogram",
  cmax = 8,
  Criterion = "BIC",
  pdf = "gamma",
  EMcontrol = EM)

gamma3est.em <- REBMIX(Dataset = a.Dataset(gamma3),
  Preprocessing = "histogram",
  cmax = 8,
  Criterion = "BIC",
  pdf = "gamma",
  K = 23:27,
  EMcontrol = EM)

summary(gamma1est.em)

summary(gamma2est.em)

summary(gamma3est.em)


###################################################
### code chunk number 9: rebmix-code
###################################################
#########################
##   Poisson dataset   ##
#########################

## Generate the Poisson dataset.

n <- c(200, 200, 200)

Theta <- new("RNGMIX.Theta", c = 3, pdf = rep("Poisson", 2))

a.theta1(Theta, 1) <- c(3, 2)
a.theta1(Theta, 2) <- c(9, 10)
a.theta1(Theta, 3) <- c(15, 16)

poisson <- RNGMIX(Dataset.name = paste("Poisson_", 1:10, sep = ""), n = n, Theta = a.Theta(Theta))


###################################################
### code chunk number 10: rebmix-code
###################################################
## Estimate number of components, component weights and component parameters.

poissonest <- REBMIX(Dataset = a.Dataset(poisson),
  Preprocessing = "histogram",
  cmax = 10,
  Criterion = "MDL5",
  pdf = rep("Poisson", 2),
  K = 1)


###################################################
### code chunk number 11: poisson-fig
###################################################
plot(poissonest, pos = 1, what = c("pdf", "marginal pdf", "IC", "D", "logL"), nrow = 2, ncol = 3, npts = 1000, family = "sans")


###################################################
### code chunk number 12: poisson-clu-fig
###################################################
poissonclu <- RCLRMIX(x = poissonest, pos = 1, Zt = a.Zt(poisson))

plot(poissonclu, family = "sans")


###################################################
### code chunk number 13: rebmix-code
###################################################
## Visualize results.

summary(poissonest)

a.theta1.all(poissonest, pos = 1)

a.theta2.all(poissonest, pos = 1)


###################################################
### code chunk number 14: rebmix-code
###################################################
## EM.control object creation.

EM <- new("EM.Control",
  strategy = "exhaustive",
  variant = "EM",
  acceleration = "fixed",
  acceleration.multiplier = 1.0,
  tolerance = 1e-4,
  maximum.iterations = 1000,
  K = 0)

poissonest.em <- REBMIX(Dataset = a.Dataset(poisson),
  Preprocessing = "histogram",
  cmax = 10,
  Criterion = "MDL5",
  pdf = rep("Poisson", 2),
  K = 1,
  EMcontrol  = EM)

summary(poissonest.em)


###################################################
### code chunk number 15: rebmix-code
###################################################
###################################
##  Multivariate normal dataset  ##
###################################

## Generate normal dataset.

n <- c(50, 50, 50, 50, 50)

Theta <- new("RNGMVNORM.Theta", c = 5, d = 2)

a.theta1(Theta, 1) <- c(2.7, 3.7)

a.theta1(Theta, 2) <- c(5.7, 9.1)

a.theta1(Theta, 3) <- c(2.0, 9.0)

a.theta1(Theta, 4) <- c(9.5, 6.6)

a.theta1(Theta, 5) <- c(6.3, 0.6)

a.theta2(Theta, 1) <- c(0.9, -0.1, -0.1, 0.4)

a.theta2(Theta, 2) <- c(2.8, -1.3, -1.3, 1.5)

a.theta2(Theta, 3) <- c(0.1, 0.0, 0.0, 0.3)

a.theta2(Theta, 4) <- c(1.3, -0.4, -0.4, 0.4)

a.theta2(Theta, 5) <- c(0.5, 0.3, 0.3, 2.5)

mvnorm.simulated <- RNGMIX(model = "RNGMVNORM",
  Dataset.name = "mvnormdataset",
  rseed = -1,
  n = n,
  Theta = a.Theta(Theta))


###################################################
### code chunk number 16: rebmix-code
###################################################
## Estimate number of components, component weights and component parameters.

mvnormest <- REBMIX(model = "REBMVNORM",
  Dataset = a.Dataset(mvnorm.simulated),
  Preprocessing = "histogram",
  cmax = 20,
  Criterion = "BIC")


###################################################
### code chunk number 17: mvnorm-fig
###################################################
plot(mvnormest, family = "sans")


###################################################
### code chunk number 18: mvnorm-clu-fig
###################################################
mvnormclu <- RCLRMIX(model = "RCLRMVNORM", x = mvnormest)

plot(mvnormclu, family = "sans")


###################################################
### code chunk number 19: rebmix-code
###################################################
summary(mvnormest)


###################################################
### code chunk number 20: rebmix-code
###################################################
summary(mvnormclu)


###################################################
### code chunk number 21: rebmix-code
###################################################
## EM.control object creation.

EM <- new("EM.Control",
  strategy = "single",
  variant = "EM",
  acceleration = "fixed",
  acceleration.multiplier = 1.0,
  tolerance = 1e-4,
  maximum.iterations = 1000,
  K = 0)

## Optimal K estimation.

K <- optbins(Dataset = a.Dataset(mvnorm.simulated), Rule = "Knuth equal", kmin = 2, kmax = 100)

## Finite mixture estimation

mvnormest.em <- REBMIX(model = "REBMVNORM",
  Dataset = a.Dataset(mvnorm.simulated),
  Preprocessing = "histogram",
  cmax = 20,
  K = K,
  Criterion = "BIC",
  EMcontrol = EM)

summary(mvnormest.em)


###################################################
### code chunk number 22: rebmix-code
###################################################
## EM.control object creation.

CEM <- new("EM.Control",
  strategy = "exhaustive",
  variant = "ECM",
  acceleration = "fixed",
  acceleration.multiplier = 1.0,
  tolerance = 1e-4,
  maximum.iterations = 1000,
  K = 0)

## Estimate number of components, component weights and component parameters.

mvnormest.cem <- REBMIX(model = "REBMVNORM",
  Dataset = a.Dataset(mvnorm.simulated),
  Preprocessing = "histogram",
  cmax = 10,
  Criterion = "ICL",
  EMcontrol = CEM)

mvnorm.clu <- RCLRMIX(model = "RCLRMVNORM", x = mvnormest.cem)


###################################################
### code chunk number 23: mvnorm-clu-fig-cem
###################################################
plot(mvnorm.clu, family = "sans")


###################################################
### code chunk number 24: rebmix-code
###################################################
## Create the EM.Control object to utilize one of the REBMIX&EM strategies.

EM.normal <- new("EM.Control",
  strategy = "exhaustive",
  variant = "EM",
  acceleration = "fixed",
  acceleration.multiplier = 1.0,
  tolerance = 1e-4,
  maximum.iterations = 1000,
  K = 0)

## Estimate number of components, component weights and component parameters.

mvnormestest.em.normal <- REBMIX(model = "REBMVNORM",
  Dataset = a.Dataset(mvnorm.simulated),
  Preprocessing = "histogram",
  cmax = 15,
  Criterion = "BIC",
  EMcontrol = EM.normal)

cat("Total number of EM algorithm iterations: ",
  a.summary.EM(mvnormestest.em.normal, pos = 1, col.name = "total.iterations.nbr"),
  ". Value of BIC: ", a.summary(mvnormestest.em.normal, pos = 1, col.name = "IC"))


###################################################
### code chunk number 25: rebmix-code
###################################################
## Create the EM.Control object to utilize one of the REBMIX&EM strategies.

EM.fixed1.5 <- new("EM.Control",
  strategy = "exhaustive",
  variant = "EM",
  acceleration = "fixed",
  acceleration.multiplier = 1.5,
  tolerance = 1e-4,
  maximum.iterations = 1000,
  K = 0)

## Estimate number of components, component weights and component parameters.

mvnormest.em.fixed1.5 <- REBMIX(model = "REBMVNORM",
  Dataset = a.Dataset(mvnorm.simulated),
  Preprocessing = "histogram",
  cmax = 15,
  Criterion = "BIC",
  EMcontrol = EM.fixed1.5)

cat("Total number of EM algorithm iterations: ",
  a.summary.EM(mvnormest.em.fixed1.5, pos = 1, col.name = "total.iterations.nbr"),
  ". Value of BIC: ", a.summary(mvnormest.em.fixed1.5, pos = 1, col.name = "IC"))


###################################################
### code chunk number 26: rebmix-code
###################################################
## Create the EM.Control object to utilize one of the REBMIX&EM strategies.

EM.line <- new("EM.Control",
  strategy = "exhaustive",
  variant = "EM",
  acceleration = "line",
  acceleration.multiplier = 1.0,
  tolerance = 1e-4,
  maximum.iterations = 1000,
  K = 0)

## Estimate number of components, component weights and component parameters.

mvnormest.em.line <- REBMIX(model = "REBMVNORM",
  Dataset = a.Dataset(mvnorm.simulated),
  Preprocessing = "histogram",
  cmax = 15,
  Criterion = "BIC",
  EMcontrol = EM.line)

cat("Total number of EM algorithm iterations: ",
  a.summary.EM(mvnormest.em.line, pos = 1, col.name = "total.iterations.nbr"),
  ". Value of BIC: ", a.summary(mvnormest.em.line, pos = 1, col.name = "IC"))


###################################################
### code chunk number 27: rebmix-code
###################################################
## Create the EM.Control object to utilize one of the REBMIX&EM strategies.

EM.golden <- new("EM.Control",
  strategy = "exhaustive",
  variant = "EM",
  acceleration = "golden",
  acceleration.multiplier = 1.0,
  tolerance = 1e-4,
  maximum.iterations = 1000,
  K = 0)

## Estimate number of components, component weights and component parameters.

mvnormest.em.golden <- REBMIX(model = "REBMVNORM",
  Dataset = a.Dataset(mvnorm.simulated),
  Preprocessing = "histogram",
  cmax = 15,
  Criterion = "BIC",
  EMcontrol = EM.golden)

cat("Total number of EM algorithm iterations: ",
  a.summary.EM(mvnormest.em.golden, pos = 1, col.name = "total.iterations.nbr"),
  ". Value of BIC: ", a.summary(mvnormest.em.golden, pos = 1, col.name = "IC"))


###################################################
### code chunk number 28: rebmix-code
###################################################
data(sensorlessdrive)

## Split dataset into train (75%) and test (25%) subsets.

set.seed(5)

Drive <- split(p = 0.75, Dataset = sensorlessdrive, class = 4)


###################################################
### code chunk number 29: rebmix-code
###################################################
## Estimate number of components, component weights and component
## parameters for train subsets.

driveest <- REBMIX(model = "REBMVNORM",
  Dataset = a.train(Drive),
  Preprocessing = "histogram",
  cmax = 15,
  Criterion = "BIC")


###################################################
### code chunk number 30: rebmix-code
###################################################
## Selected features.

drivecla <- RCLSMIX(model = "RCLSMVNORM",
  x = list(driveest),
  Dataset = a.test(Drive),
  Zt = a.Zt(Drive))


###################################################
### code chunk number 31: rebmix-code
###################################################
drivecla

summary(drivecla)


###################################################
### code chunk number 32: drive-cla-fig
###################################################
# Plot selected features.

plot(drivecla, nrow = 3, ncol = 2, family = "sans")


###################################################
### code chunk number 33: rebmix-code
###################################################
## EM.control object creation.

EM <- new("EM.Control",
  strategy = "exhaustive",
  variant = "EM",
  acceleration = "fixed",
  acceleration.multiplier = 1.0,
  tolerance = 1e-4,
  maximum.iterations = 1000,
  K = 300)

## Estimate number of components, component weights and component
## parameters for train subsets.

driveest <- REBMIX(model = "REBMVNORM",
  Dataset = a.train(Drive),
  Preprocessing = "histogram",
  cmax = 15,
  Criterion = "BIC",
  EMcontrol = EM)

drivecla <- RCLSMIX(model = "RCLSMVNORM",
  x = list(driveest),
  Dataset = a.test(Drive),
  Zt = a.Zt(Drive))

summary(drivecla)


###################################################
### code chunk number 34: rebmix-code
###################################################
data(adult)

## Find complete cases.

adult <- adult[complete.cases(adult),]

## Replace levels with numbers.

adult <- as.data.frame(data.matrix(adult))


###################################################
### code chunk number 35: rebmix-code
###################################################
## Find numbers of levels.

cmax <- unlist(lapply(apply(adult[, c(-1, -16)], 2, unique), length))

cmax


###################################################
### code chunk number 36: rebmix-code
###################################################
## Split adult dataset into train and test subsets for two Incomes
## and remove Type and Income columns.

Adult <- split(p = list(type = 1, train = 2, test = 1),
  Dataset = adult, class = 16)


###################################################
### code chunk number 37: rebmix-code
###################################################
## Estimate number of components, component weights and component parameters
## for the set of chunks 1:14.

adultest <- list()

for (i in 1:14) {
  adultest[[i]] <- REBMIX(Dataset = a.train(chunk(Adult, i)),
    Preprocessing = "histogram",
    cmax = min(120, cmax[i]),
    Criterion = "BIC",
    pdf = "Dirac",
    K = 1)
}


###################################################
### code chunk number 38: rebmix-code
###################################################
## Class membership prediction based upon the best first search algorithm.

adultcla <- BFSMIX(x = adultest,
  Dataset = a.test(Adult),
  Zt = a.Zt(Adult))


###################################################
### code chunk number 39: rebmix-code
###################################################
adultcla

summary(adultcla)


###################################################
### code chunk number 40: adult-cla-fig
###################################################
## Plot selected chunks.

plot(adultcla, nrow = 5, ncol = 2, family = "sans")


###################################################
### code chunk number 41: rebmix-code
###################################################
rm(list = ls())

Try the rebmix package in your browser

Any scripts or data that you put into this service are public.

rebmix documentation built on Feb. 9, 2024, 3:01 p.m.