experiments/Table4/archive/1vs2TestCoarseX3.R

#install.packages("normalregMix_1.0.tar.gz", repos = NULL, type="source")
library(snow)
library(doParallel)
library(Rmpi)
library(normalregMix)
## Generates EM test result according to the dimension of X
PerformEMtest <- function (sample, q, an, m = 1, z = NULL, parallel) {
  library(doParallel) # workers might need information
  library(normalregMix)  # workers might need information
  testMode(TRUE) # for replication
	
	n <- as.integer(length(sample)/(q+1))
  y <- sample[1:n] # first n elements represents y data 
  if (q <= 0)
    return (normalmixMEMtest(y, m = m, z = z, an = an, crit.method = "asy", 
														 parallel = parallel))
  # the other part consists of n chuck of q-length x data
  x <- matrix(sample[(n+1):length(sample)], nrow = n, byrow = TRUE)
  return (regmixMEMtest(y, x, m = m, z = z, an = an, crit.method = "asy",  
                        parallel =  parallel))
}

## Returns frequency that the null H0: m=1 is rejected
# out of replications of given an and data that consists of columns of samples
PerformEMtests <- function (an, data, crit = 0.05, q = 1, m = 1, 
                            parallel, rmpi) {
  if (rmpi)
  {
    # need to transform data (matrix) to a list first; each element is a column (y x_1' x_2' ... x_n')'
    ldata <- lapply(seq_len(ncol(data)), function(i) data[,i])
    out  <- mpi.applyLB(ldata, PerformEMtest, q = q, an = an, m = m, z = NULL,
                        parallel = parallel)
  }
  else
    out <- apply(data, 2, PerformEMtest, q = q, an = an, m = m, z = NULL,
                 parallel = parallel)
  pvals <- sapply(out, "[[", "pvals")
	print(list(an, K2 = mean(pvals[2,] < crit), K3 = mean(pvals[3,] < crit)))
  return (list(K2 = mean(pvals[2,] < crit), K3 = mean(pvals[3,] < crit)))
}
# Returns data set of rejection frequency rate corresponding to each an,
# the value of optimal an that is closest to given sig. level (0.05 by default), and
# the frequency of rejection according to the optimal an.
FindOptimal1vs2an <- function (phidatapair, anset, m = 1,
                               parallel = 0, rmpi = TRUE) { 
  phi  <- phidatapair$phi
  data <- phidatapair$data
  crit <- phi$crit
  q		 <- length(phi$betaset)
  
  # loop over each a_n.
  output <- lapply(anset, PerformEMtests, data = data, crit = crit, q = q, m = m, 
                   parallel = parallel, rmpi = rmpi)
  freqsK2 <- sapply(output, "[[", "K2")
  freqsK3 <- sapply(output, "[[", "K3")
  
  # show me what you've got.
  table <- data.frame(anset, freqsK2, freqsK3)
  colnames(table) <- c("an", "K=2", "K=3")
  optimal <- anset[which(abs(freqsK2-crit)==min(abs(freqsK2-crit)))]
  optimalresult <- freqsK2[which(abs(freqsK2-crit)==min(abs(freqsK2-crit)))]
  print(table)
  
  return (list(optimal.value = optimal, optimal.perf = optimalresult))
}
## Generate a column that represents a sample using phi given.
# each column has the form (y x_1' x_2' ... x_n')' 
# where each column x_i represents q data for each observation
GenerateSample <- function(phi) {
	n				 <- phi$n
	betaset  <- phi$betaset
	q				 <- 0 
	if (!is.null(betaset))
		q <- length(betaset[[1]])
	
	if (q <= 0)
		print("Error; in this experiment, dim(X) > 0")
  x.sample <- matrix(rnorm(n*q), nrow = n) # each row is one observation
  y.sample <- rnorm(n)
  y.sample <- apply(x.sample, 1, function(x.obs)
								rnorm(1, mean = ((betaset*x.obs)), sd = 1))
  sample <- c(y.sample, c(t(x.sample)))
	return (sample)
}

## Generate a pair of phi and data, where data is generated by replication.
GeneratePhiDataPair <- function(phi, replication) {
	phi <- as.list(phi) # make it atomic.
  # data is an (n replication) matrix whose column represents a sample of size n,  
  data <- do.call(cbind, replicate(replication, GenerateSample(phi = phi), simplify = FALSE))
  return (list(phi = phi, data = data))
}

## Create data given phiset and replication	
GeneratePhiDataPairs <- function(phiset, replication = 200) {
	apply(phiset, 1, GeneratePhiDataPair, replication = replication) # list of (phi data)
}

## Rmpi setup 
print("collecting workers..")
    mpi.spawn.Rslaves()
    mpi.setup.rngstream()
    mpi.bcast.Robj2slave(performEMtest, all=TRUE)
print("workers loaded.")

## ====== BEGIN EXPERIMENT ======
## Initiliazation & data generation
# Model specification (per row of the table)
# dim(X) = 3
dimx <- 3
anlb <- 0.4
anub <- 0.9
ancount <- 8
SEED <- 333333

# init.
set.seed(SEED)
anset <- seq(anlb,anub,length.out = ancount)
betaset <- rep(0.5, dimx)

# generate data
phiset <- expand.grid(n=200)
phiset$betasets <- lapply(1:nrow(phiset), function(j) betaset)
pairs <- GeneratePhiDataPairs(phiset) 

## 2. Create a row for a table.
cols <- list()
for (i in 1:length(pairs)) {
  phi <- pairs[[i]]$phi
  data <- pairs[[i]]$data
  n <- phi$n
  result <- FindOptimal1vs2an(pairs[[i]], anset = anset, m = 1)
  cols[[i]] <- list(crit, n, result$optimal.value, result$optimal.perf)
  df <- data.frame(matrix(unlist(cols), ncol = length(cols[[1]]), byrow=T))
  colnames(df) <- c("crit", "n", "optimal.value", "optimal.perf")
  print(df) # save every time
}
print(df) 

## ====== END EXPERIMENT ======
# Rmpi termination
    mpi.close.Rslaves()
hkasahar/normalregMix documentation built on May 17, 2019, 4 p.m.