#install.packages("normalregMix_1.0.tar.gz", repos = NULL, type="source")
library(snow)
library(doParallel)
library(Rmpi)
library(normalregMix)
## Generates EM test result according to the dimension of X
PerformEMtest <- function (sample, q, an, m = 1, z = NULL, parallel) {
library(doParallel) # workers might need information
library(normalregMix) # workers might need information
testMode(TRUE) # for replication
n <- as.integer(length(sample)/(q+1))
y <- sample[1:n] # first n elements represents y data
if (q <= 0)
return (normalmixMEMtest(y, m = m, z = z, an = an, crit.method = "asy",
parallel = parallel))
# the other part consists of n chuck of q-length x data
x <- matrix(sample[(n+1):length(sample)], nrow = n, byrow = TRUE)
return (regmixMEMtest(y, x, m = m, z = z, an = an, crit.method = "asy",
parallel = parallel))
}
## Returns frequency that the null H0: m=1 is rejected
# out of replications of given an and data that consists of columns of samples
PerformEMtests <- function (an, data, q = 1, m = 1,
parallel, rmpi) {
if (rmpi)
{
# need to transform data (matrix) to a list first; each element is a column (y x_1' x_2' ... x_n')'
ldata <- lapply(seq_len(ncol(data)), function(i) data[,i])
out <- mpi.applyLB(ldata, PerformEMtest, q = q, an = an, m = m, z = NULL,
parallel = parallel)
}
else
out <- apply(data, 2, PerformEMtest, q = q, an = an, m = m, z = NULL,
parallel = parallel)
pvals <- sapply(out, "[[", "pvals")
print(list(an = an, reject.one.K2 = mean(pvals[2,] < 0.01), reject.one.K3 = mean(pvals[3,] < 0.01),
reject.five.K2 = mean(pvals[2,] < 0.05), reject.five.K3 = mean(pvals[3,] < 0.05)))
return (list(reject.one.K2 = mean(pvals[2,] < 0.01), reject.one.K3 = mean(pvals[3,] < 0.01),
reject.five.K2 = mean(pvals[2,] < 0.05), reject.five.K3 = mean(pvals[3,] < 0.05)))
}
# Returns data set of rejection frequency rate corresponding to each an,
# the value of optimal an that is closest to given sig. level (0.05 by default), and
# the frequency of rejection according to the optimal an.
FindOptimal1vs2an <- function (phidatapair, anset, m = 1,
parallel = 0, rmpi = TRUE) {
phi <- phidatapair$phi
data <- phidatapair$data
q <- length(phi$betaset)
# loop over each a_n.
output <- lapply(anset, PerformEMtests, data = data, q = q, m = m,
parallel = parallel, rmpi = rmpi)
freqs.one.K2 <- sapply(output, "[[", "reject.one.K2")
freqs.one.K3 <- sapply(output, "[[", "reject.one.K3")
freqs.five.K2 <- sapply(output, "[[", "reject.five.K2")
freqs.five.K3 <- sapply(output, "[[", "reject.five.K3")
# show me what you've got.
table <- data.frame(anset, freqs.one.K2, freqs.one.K3, freqs.five.K2, freqs.five.K3)
colnames(table) <- c("an", "1%, K=2", "1%, K=3", "5%, K=2", "5%, K=3")
optimal.value <- anset[which(abs(freqs.five.K2-0.05)==min(abs(freqs.five.K2-0.05)))][1]
optimal.perf <- freqs.five.K2[which(abs(freqs.five.K2-0.05)==min(abs(freqs.five.K2-0.05)))][1]
print(table)
return (list(optimal.value = optimal.value, optimal.perf = optimal.perf))
}
## Generate a column that represents a sample using phi given.
# each column has the form (y x_1' x_2' ... x_n')'
# where each column x_i represents q data for each observation
GenerateSample <- function(phi) {
n <- phi$n
betaset <- phi$betaset
q <- 0
if (!is.null(betaset))
q <- length(betaset[[1]])
if (q <= 0)
print("Error; in this experiment, dim(X) > 0")
x.sample <- matrix(rnorm(n*q), nrow = n) # each row is one observation
y.sample <- rnorm(n)
y.sample <- apply(x.sample, 1, function(x.obs)
rnorm(1, mean = ((betaset*x.obs)), sd = 1))
sample <- c(y.sample, c(t(x.sample)))
return (sample)
}
## Generate a pair of phi and data, where data is generated by replication.
GeneratePhiDataPair <- function(phi, replication) {
phi <- as.list(phi) # make it atomic.
# data is an (n replication) matrix whose column represents a sample of size n,
data <- do.call(cbind, replicate(replication, GenerateSample(phi = phi), simplify = FALSE))
return (list(phi = phi, data = data))
}
## Create data given phiset and replication
GeneratePhiDataPairs <- function(phiset, replication = 2000) { # original paper has 10000 replications
apply(phiset, 1, GeneratePhiDataPair, replication = replication) # list of (phi data)
}
## Rmpi setup
print("collecting workers..")
mpi.spawn.Rslaves()
mpi.setup.rngstream()
mpi.bcast.Robj2slave(performEMtest, all=TRUE)
print("workers loaded.")
## ====== BEGIN EXPERIMENT ======
## Initiliazation & data generation
# Model specification (per row of the table)
# dim(X) = 2
dimx <- 2
anlb <- 3.0
anub <- 3.3
ancount <- 4
SEED <- 222222
# init.
set.seed(SEED)
anset <- seq(anlb,anub,length.out = ancount)[1:ancount]
betaset <- rep(0.5, dimx)
# generate data
phiset <- expand.grid(n=200)
phiset$betasets <- lapply(1:nrow(phiset), function(j) betaset)
pairs <- GeneratePhiDataPairs(phiset)
## 2. Create a row for a table.
cols <- list()
for (i in 1:length(pairs)) {
phi <- pairs[[i]]$phi
data <- pairs[[i]]$data
n <- phi$n
result <- FindOptimal1vs2an(pairs[[i]], anset = anset, m = 1)
cols[[i]] <- list(n, result$optimal.value, result$optimal.perf)
df <- data.frame(matrix(unlist(cols), ncol = length(cols[[1]]), byrow=T))
colnames(df) <- c("n", "optimal.value", "optimal.perf")
print(df) # save every time
}
print(df)
## ====== END EXPERIMENT ======
# Rmpi termination
mpi.close.Rslaves()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.