experiments/penaltyReg/penaltyRegM3.R

library(doParallel)
library(Rmpi)
library(normalregMix)

# Returns a term involving misclassification rates given phi.
GetMisclTerm <- function(phi) {
  
  parlist <- list(alpha = phi$alphaset, mu = phi$muset, sigma = phi$sigmaset)
  m <- length(phi$alphaset) 
  
  if (m == 2) 
  {
    omega.12  <- omega.12(parlist)
    return (log(omega.12 /(0.5-omega.12)))
  }
  
  if (m == 3) # ln(omega_12 omega_23 / (0.5-omega_12)(0.5-omega_23))
  {
    omega.123 <- omega.123(parlist)
    omega.12 <- omega.123[1]
    omega.23 <- omega.123[2]
    return (log(omega.12 * omega.23 / ((0.5-omega.12)*(0.5-omega.23))))
  }
  omega.1234 <- omega.1234(parlist)
  omega.12 <- omega.1234[1]
  omega.23 <- omega.1234[2]
  omega.34 <- omega.1234[3]
  # (m == 4) # ln(omega_12 omega_23 omega_34 / (0.5-omega_12)(0.5-omega_23)(0.5-omega_34))
  return (log(omega.12 * omega.23 * omega.34 / 
                ((0.5-omega.12)*(0.5-omega.23)*(0.5-omega.34))))
  
}

# Returns a term involving a given phi.
GetATerm <- function(phi) {
  m <- length(phi$alphaset)
  a <- phi$a
  return (log(a/(0.25-a)))
}

# generates EM test result according to the dimension of X
performEMtest <- function (sampl, an, m)
{
  library(doParallel) # workers might need information
  library(normalregMix)  # workers might need information
  testMode(TRUE) # for replication
  return (normalmixMEMtest (sampl, m = m, z = NULL, an = an, tauset = c(0.1,0.3,0.5),
                            crit.method = "asy",  parallel = 0))
}

# Returns frequency that the null H0: m=1 is rejected
# out of replications of given an and data that consists of columns of samples
GetSimulatedTypeIError <- function (an, datamat, m, crit = 0.05, rmpi = TRUE) {
  if (rmpi) {
    # need to transform data (matrix) to a list first.
    ldata <- lapply(seq_len(ncol(datamat)), function(i) datamat[,i])
    out  <- mpi.applyLB(ldata, performEMtest, an = an, m = m)
  }
  else
    out <- apply(datamat, 2, performEMtest, an = an, m = m)
  pvals <- sapply(out, "[[", "pvals")
  return (mean(pvals[2,] < crit))
}

# Generate a column that represents a sample using phi given.
GenerateSample <- function(phi){ 
  n				<- phi$n
  alpha   <- phi$alphaset
  mus 	  <- phi$muset
  sigma   <- phi$sigmaset 
  m       <- length(alpha)
  alpha_cumsum <- cumsum(alpha)
  u <- runif(n)
  
  sampl <- vector(mode = "double", n)  
  for (i in 1:m) {
    if (i == 1) {
      index <- (u<=alpha_cumsum[1])
      n_i <- length(u[index == T])
      sampl[index] <- rnorm(n_i, mean = mus[i], sd = sigma[i])
    } else {
      index <- (u>alpha_cumsum[i-1]&u<=alpha_cumsum[i])
      n_i <- length(u[index == T])
      sampl[index] <- rnorm(n_i, mean = mus[i], sd = sigma[i])
    }
  }
  
  return(sampl)
}

# Generate a pair of phi and data, where data is generated by replication.
GeneratePhiDataPairs <- function(phi, rep) {
  # data is an (n rep) matrix whose column represents a sample of size n,  
  datamat <- do.call(cbind, replicate(rep, GenerateSample(phi = phi), simplify = FALSE))
  return (list(phi = phi, datamat = datamat))
}

# Generate data for regression.
GetDataForRegression <- function(aset, nset, alphasets, musets, sigmasets, 
                                 rep = 2000, continue = 1)
{
  # initialization
  SEED <- 123456
  set.seed(SEED)
  phiset <- expand.grid(a=aset, n=nset, 
                        alphaset=alphasets, muset=musets, sigmaset=sigmasets)
  # create data first for reproducibility					
  phidatapairs <- apply(phiset, 1, GeneratePhiDataPairs, rep) # list of (phi data)
  regdata <- list()
  for (i in continue:length(phidatapairs)) {
    phi <- phidatapairs[[i]]$phi
    datamat <- phidatapairs[[i]]$datamat
    a <- phi$a
    n <- phi$n
    m <- length(phi$alphaset)
    aterm <- GetATerm(phi)
    misclterm <- GetMisclTerm(phi)
    phat <- GetSimulatedTypeIError(phi$a, datamat, m)
    regdata[[i]] <- list(y = log(phat/(0.15-phat)), 
                         aterm = aterm, misclterm = misclterm, nterm = 1/n, 
                         a=a, phat = phat)
    df <- data.frame(matrix(unlist(regdata), ncol = length(regdata[[1]]), byrow=T))
    colnames(df) <- names(regdata[[1]])
    print(df) # save every time
  }
  return (regdata)
}


# Rmpi setup 
print("collecting workers..")
    mpi.spawn.Rslaves()
    mpi.setup.rngstream()
    mpi.bcast.Robj2slave(performEMtest, all=TRUE)
print("workers loaded.")
# ====== BEGIN EXPERIMENT ======
## 1. Initialization
# Case when m = 3
aset <- c(0.04, 0.09, 0.14, 0.19, 0.24)
nset <- c(100, 300, 500)
alphasets <- list(c(0.33, 0.33, 0.34)) 
musets 		<- list(c(-4, 0, 4), c(-4, 0, 5), c(-5, 0, 5), c(-4, 0, 6), c(-5, 0, 6), c(-6, 0, 6))
sigmasets <- list(c(1, 1, 1), c(0.75, 1.5, 0.75))

## 2. Generate data
regdata <- GetDataForRegression(aset, nset, alphasets, musets, sigmasets)
df <- data.frame(matrix(unlist(regdata), ncol = length(regdata[[1]]), byrow=T))
colnames(df) <- names(regdata[[1]])
print(df)

## 3. Do regression 
df$phat <- NULL # phat itself is not used for regression.
df$a <- NULL # a as well
reg <- lm(y ~ ., data = df)
print(summary(reg))
intercept <- coef(summary(reg))["(Intercept)", "Estimate"]
atermcoeff <- coef(summary(reg))["aterm", "Estimate"]
miscltermcoeff <- coef(summary(reg))["misclterm", "Estimate"]
ntermcoeff <- coef(summary(reg))["nterm", "Estimate"]
qcoeffs <- -c(intercept, miscltermcoeff, ntermcoeff)/atermcoeff
print(qcoeffs)
# ====== END OF EXPERIMENT ======

# Rmpi termination
    mpi.close.Rslaves()
hkasahar/normalregMix documentation built on May 17, 2019, 4 p.m.