library(doParallel)
library(Rmpi)
library(normalregMix)
# Returns a term involving misclassification rates given phi.
GetMisclTerm <- function(phi) {
parlist <- list(alpha = phi$alphaset, mu = phi$muset, sigma = phi$sigmaset)
m <- length(phi$alphaset)
if (m == 2)
{
omega.12 <- omega.12(parlist)
return (log(omega.12 /(0.5-omega.12)))
}
if (m == 3) # ln(omega_12 omega_23 / (0.5-omega_12)(0.5-omega_23))
{
omega.123 <- omega.123(parlist)
omega.12 <- omega.123[1]
omega.23 <- omega.123[2]
return (log(omega.12 * omega.23 / ((0.5-omega.12)*(0.5-omega.23))))
}
omega.1234 <- omega.1234(parlist)
omega.12 <- omega.1234[1]
omega.23 <- omega.1234[2]
omega.34 <- omega.1234[3]
# (m == 4) # ln(omega_12 omega_23 omega_34 / (0.5-omega_12)(0.5-omega_23)(0.5-omega_34))
return (log(omega.12 * omega.23 * omega.34 /
((0.5-omega.12)*(0.5-omega.23)*(0.5-omega.34))))
}
# Returns a term involving a given phi.
GetATerm <- function(phi) {
m <- length(phi$alphaset)
a <- phi$a
return (log(a/(0.25-a)))
}
# generates EM test result according to the dimension of X
performEMtest <- function (sampl, an, m)
{
library(doParallel) # workers might need information
library(normalregMix) # workers might need information
testMode(TRUE) # for replication
return (normalmixMEMtest (sampl, m = m, z = NULL, an = an, tauset = c(0.1,0.3,0.5),
crit.method = "asy", parallel = 0))
}
# Returns frequency that the null H0: m=1 is rejected
# out of replications of given an and data that consists of columns of samples
GetSimulatedTypeIError <- function (an, datamat, m, crit = 0.05, rmpi = TRUE) {
if (rmpi) {
# need to transform data (matrix) to a list first.
ldata <- lapply(seq_len(ncol(datamat)), function(i) datamat[,i])
out <- mpi.applyLB(ldata, performEMtest, an = an, m = m)
}
else
out <- apply(datamat, 2, performEMtest, an = an, m = m)
pvals <- sapply(out, "[[", "pvals")
return (mean(pvals[2,] < crit))
}
# Generate a column that represents a sample using phi given.
GenerateSample <- function(phi){
n <- phi$n
alpha <- phi$alphaset
mus <- phi$muset
sigma <- phi$sigmaset
m <- length(alpha)
alpha_cumsum <- cumsum(alpha)
u <- runif(n)
sampl <- vector(mode = "double", n)
for (i in 1:m) {
if (i == 1) {
index <- (u<=alpha_cumsum[1])
n_i <- length(u[index == T])
sampl[index] <- rnorm(n_i, mean = mus[i], sd = sigma[i])
} else {
index <- (u>alpha_cumsum[i-1]&u<=alpha_cumsum[i])
n_i <- length(u[index == T])
sampl[index] <- rnorm(n_i, mean = mus[i], sd = sigma[i])
}
}
return(sampl)
}
# Generate a pair of phi and data, where data is generated by replication.
GeneratePhiDataPairs <- function(phi, rep) {
# data is an (n rep) matrix whose column represents a sample of size n,
datamat <- do.call(cbind, replicate(rep, GenerateSample(phi = phi), simplify = FALSE))
return (list(phi = phi, datamat = datamat))
}
# Generate data for regression.
GetDataForRegression <- function(aset, nset, alphasets, musets, sigmasets,
rep = 2000, continue = 1)
{
# initialization
SEED <- 123456
set.seed(SEED)
phiset <- expand.grid(a=aset, n=nset,
alphaset=alphasets, muset=musets, sigmaset=sigmasets)
# create data first for reproducibility
phidatapairs <- apply(phiset, 1, GeneratePhiDataPairs, rep) # list of (phi data)
regdata <- list()
for (i in continue:length(phidatapairs)) {
phi <- phidatapairs[[i]]$phi
datamat <- phidatapairs[[i]]$datamat
a <- phi$a
n <- phi$n
m <- length(phi$alphaset)
aterm <- GetATerm(phi)
misclterm <- GetMisclTerm(phi)
phat <- GetSimulatedTypeIError(phi$a, datamat, m)
regdata[[i]] <- list(y = log(phat/(0.15-phat)),
aterm = aterm, misclterm = misclterm, nterm = 1/n,
a=a, phat = phat)
df <- data.frame(matrix(unlist(regdata), ncol = length(regdata[[1]]), byrow=T))
colnames(df) <- names(regdata[[1]])
print(df) # save every time
}
return (regdata)
}
# Rmpi setup
print("collecting workers..")
mpi.spawn.Rslaves()
mpi.setup.rngstream()
mpi.bcast.Robj2slave(performEMtest, all=TRUE)
print("workers loaded.")
# ====== BEGIN EXPERIMENT ======
## 1. Initialization
# Test
aset <- c(0.6, 1.2)
nset <- c(200, 400)
alphasets <- list(c(0.25, 0.75))
musets <- list(c(-1.5, 1.5))
sigmasets <- list(c(1, 1), c(1.5, 0.75))
# Case when m = 2
# aset <- c(0.04, 0.09, 0.14, 0.19, 0.24)
# nset <- c(100, 300, 500)
# alphasets <- list(c(0.25, 0.75), c(0.5, 0.5))
# musets <- list(c(-1.5, 1.5), c(-2, 2), c(-2.5, 2.5))
# sigmasets <- list(c(1, 1), c(1.5, 0.75))
# Case when m = 3
# aset <- c(0.04, 0.09, 0.14, 0.19, 0.24)
# nset <- c(100, 300, 500)
# alphasets <- list(c(0.33, 0.33, 0.34))
# musets <- list(c(-4, 0, 4), c(-4, 0, 5), c(-5, 0, 5), c(-4, 0, 6), c(-5, 0, 6), c(-6, 0, 6))
# sigmasets <- list(c(1, 1, 1), c(0.75, 1.5, 0.75))
# Case when m = 4
# aset <- c(0.04, 0.09, 0.14, 0.19, 0.24)
# nset <- c(200, 400, 600)
# alphasets <- list(c(0.25, 0.25, 0.25, 0.25))
# musets <- list(c(-4,-1,1,4), c(-5,-1,1,5), c(-6,-2,2,6), c(-6,-1,2,5), c(-5,0,2,4), c(-6,0,2,4))
# sigmasets <- list(c(1, 1, 1, 1), c(1, 0.75, 0.5, 0.25))
## 2. Generate data
regdata <- GetDataForRegression(aset, nset, alphasets, musets, sigmasets)
df <- data.frame(matrix(unlist(regdata), ncol = length(regdata[[1]]), byrow=T))
colnames(df) <- names(regdata[[1]])
print(df)
## 3. Do regression
df$phat <- NULL # phat itself is not used for regression.
df$a <- NULL # a as well
reg <- lm(y ~ ., data = df)
print(summary(reg))
intercept <- coef(summary(reg))["(Intercept)", "Estimate"]
atermcoeff <- coef(summary(reg))["aterm", "Estimate"]
miscltermcoeff <- coef(summary(reg))["misclterm", "Estimate"]
ntermcoeff <- coef(summary(reg))["nterm", "Estimate"]
qcoeffs <- -c(intercept, miscltermcoeff, ntermcoeff)/atermcoeff
print(qcoeffs)
# ====== END OF EXPERIMENT ======
# Rmpi termination
mpi.close.Rslaves()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.