# simulate data
simdatsimple <- function(n, t.max=24, tempTrend = TRUE, lodRemove = FALSE, marRemove = FALSE,
lodmis = 0, marmis = 0, sf = 1){
K <- 6
if(tempTrend){
z.true <- list()
for(i in 1:n){
probs <- rdirichlet(1, rep(10, K));probs # different for each person, but from same dist and same order
nums <- rmultinom(1, t.max, prob = probs); nums; sum(nums)
num1 <- nums[1]; num1
numFirsthalf <- floor(nums[1]/2); numFirsthalf
numSecondhalf <- num1 - numFirsthalf; numSecondhalf
nums[1] <- numFirsthalf; nums
nums <- c(nums, numSecondhalf); nums; sum(nums)
states <- c(1:K, 1)
z <- unlist(lapply(1:(K+1), FUN = function(j) rep(states[j], nums[j])))
#idx <- sample(c(0,1,2), 1)
z.true[[i]] <- z
}
}else{
# set states: this needs to be a temporal trend, but not shared among people
z.true <- list()
for(i in 1:n){
probs <- rdirichlet(1, rep(10, K))
ord <- sample(1:K, size = K, replace = FALSE) # order
samps <- sample(1:K, t.max, replace = TRUE, prob = probs) # count
reps <- sapply(1:K, FUN = function(i) length(which(samps==i))) # num
states <- c(unlist(lapply(1:K, FUN = function(k) rep(ord[k], reps[k])))); states
half1 <- ceiling(0.5*length(which(states==states[1])))
z <- c(states[-(1:half1)], rep(states[1], half1))
#idx <- sample(c(0,1,2), 1)
z.true[[i]] <- z
}
}
p <- 3 # number of exposures
# different covariances for each cluster
Sigma.true <- list()
D <- sf*diag(p)
for(k in 1:K){
L <- diag(p)
lowerTriangle(L) <- rnorm(p, 0, .5)
Sigma.true[[k]] <- solve(L)%*%D%*%t(solve(L))
}
# different means for each cluster
corMean <- matrix(c(1, 0.7, 0.4, 0.7, 1, -0.2, 0.4, -0.2, 1), ncol = 3); is.positive.definite(corMean)
mu.true <- rmvnorm(K, mean = rep(0, p), sigma = corMean); mu.true
y <- list()
for(i in 1:n){
y[[i]] <- matrix(0, t.max, p) # p exposures at t time points
for(k in 1:K){
if(any(z.true[[i]]==k)){
which.zk <- which(z.true[[i]] == k)
y[[i]][which.zk,] <- rmvnorm(length(which.zk), mu.true[k,], Sigma.true[[k]])
}
}
}
# standardize data
yMat <- numeric()
for(i in 1:n){
yMat <- rbind(yMat, y[[i]])
}
# scale mu for MSE
meanY <- apply(yMat, 2, mean)
sdY <- apply(yMat, 2, sd)
mu.true.sc <- cbind((mu.true[,1] - meanY[1])/sdY[1],
(mu.true[,2] - meanY[2])/sdY[2],
(mu.true[,3] - meanY[3])/sdY[3])
# i don't use sigma again so not worth scaling it
ys <- apply(yMat, 2, scale)
# relist
splits <- seq(1,n*t.max, t.max)
y.list <- lapply(1:length(splits), FUN = function(i) ys[splits[i]:(splits[i]+t.max-1),])
y <- y.list
y.complete <- y # list of complete data
ymat <- NULL
for(i in 1:n){
ymat <- rbind(ymat, y[[i]])
}
# chunk removal
if(marRemove){
# chunks of size 1 to 10
misnum <- ceiling(marmis*t.max*p)
numChunks <- marmis*200; numChunks
chunks <- rmultinom(1, size = misnum, prob = rep(1/numChunks, numChunks)) # generate size of chunks
for(i in 1:n){
for(k in 1:numChunks){
j <- sample(1:p, 1, prob = rep(1/p,p)) # randomly choose an exposure
repeat{
timePoint <- sample(2:(t.max-chunks[k,]),1, prob = rep(1/(t.max-chunks[k,]), (t.max-chunks[k,]-1)) ) # choose a time point to start at
if(!anyNA(y[[i]][,j][timePoint:(timePoint+chunks[k,]-1)])){
break
}
}
y[[i]][,j][timePoint:(timePoint+chunks[k,]-1)] <- NA
}
}
}
if(lodRemove){
lod = list()
lod.old = apply(ymat, 2, FUN = function(x) quantile(x, lodmis, na.rm = TRUE))
for(i in 1:n){
lod[[i]] = lod.old
#lod[[i]] = apply(y[[i]], 2, FUN = function(x) quantile(x, lodmis, na.rm = TRUE))
for(j in 1:p){
y[[i]][which(y[[i]][,j] <= lod[[i]][j]),j] <- -Inf
}
}
}else{
lod <- NULL
}
list1 <- list(y = y, y.complete = y.complete,
z.true = z.true, K = K, mu.true = mu.true.sc, Sigma.true = Sigma.true,
lod = lod)
return(list1)
}
# additional simulation for biometrics revision #
simdatnew <- function(n=20, K = 20, tempTrend = FALSE, t.max=288, lodRemove = FALSE, marRemove = FALSE,
lodmis = 0, marmis = 0, sf = 0.1){
dir_params = sort(1:K, decreasing = TRUE)
if(tempTrend){
z.true <- list()
for(i in 1:n){
probs <- rdirichlet(1, dir_params);probs # different for each person, but from same dist and same order
nums <- rmultinom(1, t.max, prob = probs); nums; sum(nums)
num1 <- nums[1]; num1
numFirsthalf <- floor(nums[1]/2); numFirsthalf
numSecondhalf <- num1 - numFirsthalf; numSecondhalf
nums[1] <- numFirsthalf; nums
nums <- c(nums, numSecondhalf); nums; sum(nums)
states <- c(1:K, 1)
z <- unlist(lapply(1:(K+1), FUN = function(j) rep(states[j], nums[j])))
#idx <- sample(c(0,1,2), 1)
z.true[[i]] <- z
}
}else{
# set states: this needs to be a temporal trend, but not shared among people
z.true <- list()
for(i in 1:n){
probs <- rdirichlet(1, dir_params)
ord <- sample(1:K, size = K, replace = FALSE) # order
samps <- sample(1:K, t.max, replace = TRUE, prob = probs) # count
reps <- sapply(1:K, FUN = function(i) length(which(samps==i))) # num
states <- c(unlist(lapply(1:K, FUN = function(k) rep(ord[k], reps[k])))); states
half1 <- ceiling(0.5*length(which(states==states[1])))
z <- c(states[-(1:half1)], rep(states[1], half1))
#idx <- sample(c(0,1,2), 1)
z.true[[i]] <- z
}
}
p <- 3 # number of exposures
# different covariances for each cluster
Sigma.true <- list()
D <- sf*diag(p)
for(k in 1:K){
L <- diag(p)
lowerTriangle(L) <- rnorm(p, 0, .5)
Sigma.true[[k]] <- solve(L)%*%D%*%t(solve(L))
}
# different means for each cluster
corMean <- matrix(c(1, 0.7, 0.4, 0.7, 1, -0.2, 0.4, -0.2, 1), ncol = 3); is.positive.definite(corMean)
mu.true <- rmvnorm(K, mean = rep(0, p), sigma = corMean); mu.true
y <- list()
for(i in 1:n){
y[[i]] <- matrix(0, t.max, p) # p exposures at t time points
for(k in 1:K){
if(any(z.true[[i]]==k)){
which.zk <- which(z.true[[i]] == k)
y[[i]][which.zk,] <- rmvnorm(length(which.zk), mu.true[k,], Sigma.true[[k]])
}
}
}
# standardize data
yMat <- numeric()
for(i in 1:n){
yMat <- rbind(yMat, y[[i]])
}
# scale mu for MSE
meanY <- apply(yMat, 2, mean)
sdY <- apply(yMat, 2, sd)
mu.true.sc <- cbind((mu.true[,1] - meanY[1])/sdY[1],
(mu.true[,2] - meanY[2])/sdY[2],
(mu.true[,3] - meanY[3])/sdY[3])
# i don't use sigma again so not worth scaling it
ys <- apply(yMat, 2, scale)
# relist
splits <- seq(1,n*t.max, t.max)
y.list <- lapply(1:length(splits), FUN = function(i) ys[splits[i]:(splits[i]+t.max-1),])
y <- y.list
y.complete <- y # list of complete data
ymat <- NULL
for(i in 1:n){
ymat <- rbind(ymat, y[[i]])
}
# chunk removal
if(marRemove){
# chunks of size 1 to 10
misnum <- ceiling(marmis*t.max*p)
numChunks <- marmis*200; numChunks
chunks <- rmultinom(1, size = misnum, prob = rep(1/numChunks, numChunks)) # generate size of chunks
for(i in 1:n){
for(k in 1:numChunks){
j <- sample(1:p, 1, prob = rep(1/p,p)) # randomly choose an exposure
timePoint <- sample(2:(t.max-chunks[k,]),1, prob = rep(1/(t.max-chunks[k,]), (t.max-chunks[k,]-1)) ) # choose a time point to start at
y[[i]][,j][timePoint:(timePoint+chunks[k,]-1)] <- NA
}
}
}
if(lodRemove){
lod = list()
lod.old = apply(ymat, 2, FUN = function(x) quantile(x, lodmis, na.rm = TRUE))
for(i in 1:n){
lod[[i]] = lod.old
#lod[[i]] = apply(y[[i]], 2, FUN = function(x) quantile(x, lodmis, na.rm = TRUE))
for(j in 1:p){
y[[i]][which(y[[i]][,j] <= lod[[i]][j]),j] <- -Inf
}
}
}else{
lod <- NULL
}
list1 <- list(y = y, y.complete = y.complete,
z.true = z.true, K = K, mu.true = mu.true.sc,
lod = lod)
return(list1)
}
###############################################################
### function for unlisting results from independent methods ###
###############################################################
unlistMethod <- function(fit1, marmiss=NULL, lodmiss=NULL, y, ycomplete){
n = length(fit1)
t.max = 288
ham1 <- unlist(mclapply(1:n, FUN = function(i){
if(!anyNA(fit1[[i]]$hamming)) return(fit1[[i]]$hamming)
else return(NULL)
}))
mu.sse1 <- mclapply(1:n, FUN = function(i){
if(!anyNA(fit1[[i]]$mu.sse)) return(fit1[[i]]$mu.sse)
else return(NULL)
})
muMat <- numeric()
for(i in 1:n){
muMat <- cbind(muMat, mu.sse1[[i]])
}
mu.mse1 <- apply(muMat,1, sum)/(n*t.max)
if(!is.null(marmiss)){
# mar MSE #
mar.sse1 <- mclapply(1:n, FUN = function(i){
if(!anyNA(fit1[[i]]$mar.sse)) return(fit1[[i]]$mar.sse)
else return(NULL)
})
marMat <- numeric()
for(i in 1:n){
marMat <- cbind(marMat, mar.sse1[[i]])
}
mar.mse1 <- apply(marMat,1,sum)/marmiss # sum the rows to get total sse, then divide by total mar data
# posterior predictive mean imputes
# ppmarMean
ppmar.sse <- mclapply(1:n, FUN = function(i){
if(!anyNA(fit1[[i]]$mar.sse)) {
truemar <- unlist(ycomplete[[i]])[which(is.na(unlist(y[[i]])))]
ymarmean <- apply(matrix(fit1[[i]]$ymar, nrow = len.imp), 2, mean)
ppmarSum <- sum((ymarmean - truemar)^2)
}else return(NULL)
})
ppMarMean <- sum(unlist(ppmar.sse))/marmiss
# mar bias #
mar.sbias1 <- mclapply(1:n, FUN = function(i){
if(!anyNA(fit1[[i]]$mar.sum.bias)) return(fit1[[i]]$mar.sum.bias)
else return(NULL)
})
marMatb <- numeric()
for(i in 1:n){
marMatb <- cbind(marMatb, mar.sbias1[[i]])
}
mar.bias1 <- apply(marMatb,1,sum)/marmiss # sum the rows to get total sse, then divide by total mar data
}else{
mar.mse1 = NULL
mar.bias1 = NULL
}
if(!is.null(lodmiss)){
# lod MSE #
lod.sse1 <- mclapply(1:n, FUN = function(i){
if(!anyNA(fit1[[i]]$lod.sse)) return(fit1[[i]]$lod.sse)
else return(NULL)
})
lodMat <- numeric()
for(i in 1:n){
lodMat <- cbind(lodMat, lod.sse1[[i]])
}
lod.mse1 <- apply(lodMat,1,sum)/lodmiss # sum the rows to get total sse, then divide by total mar data
# pplodMean
pplod.sse <- mclapply(1:n, FUN = function(i){
if(!anyNA(fit1[[i]]$lod.sse)) {
truelod <- unlist(ycomplete[[i]])[which(unlist(y[[i]])==-Inf)]
ylodmean <- apply(matrix(fit1[[i]]$ylod, nrow = len.imp), 2, mean)
pplodSum <- sum((ylodmean - truelod)^2)
}else return(NULL)
})
ppLodMean <- sum(unlist(pplod.sse))/lodmiss
# lod bias #
lod.sbias1 <- mclapply(1:n, FUN = function(i){
if(!anyNA(fit1[[i]]$lod.sum.bias)) return(fit1[[i]]$lod.sum.bias)
else return(NULL)
})
lodMatb <- numeric()
for(i in 1:n){
lodMatb <- cbind(lodMatb, lod.sbias1[[i]])
}
lod.bias1 <- apply(lodMatb,1,sum)/lodmiss # sum the rows to get total sse, then divide by total mar data
}else{
lod.mse1 = NULL
lod.bias1 = NULL
ppLodMean = NULL
ppMarMean = NULL
}
list1 = list(ham = ham1, mu.mse = mu.mse1, ppMarMean = ppMarMean, mar.mse = mar.mse1,
ppLodMean = ppLodMean, lod.mse = lod.mse1, mar.bias = mar.bias1, lod.bias = lod.bias1)
}
###############################################################
### function for unlisting a partial share methods in FCCS ###
###############################################################
unlistPartialShare <- function(fit1s, fit2s, fit3s, fit4s, marmiss=NULL, lodmiss=NULL){
n1 = length(fit1s) # indep
# n2 = length(fit2s) # shared 2
# n3 = length(fit3s) # shared 2
# n4 = length(fit4s) # shared 2
t.max = 288
# mar MSE #
mar.sse1 <- mclapply(1:n1, FUN = function(i){
if(!anyNA(fit1s[[i]]$mar.sse)) return(fit1s[[i]]$mar.sse)
else return(NULL)
})
marMat <- numeric()
for(i in 1:n1){
marMat <- cbind(marMat, mar.sse1[[i]])
}
marMat <- cbind(marMat, fit2s$mar.sse, fit3s$mar.sse, fit4s$mar.sse)
mar.mse1 <- apply(marMat,1,sum)/marmiss # sum the rows to get total sse, then divide by total mar data
# mar bias #
mar.sbias1 <- mclapply(1:n1, FUN = function(i){
if(!anyNA(fit1s[[i]]$mar.sum.bias)) return(fit1s[[i]]$mar.sum.bias)
else return(NULL)
})
marMatb <- numeric()
for(i in 1:n1){
marMatb <- cbind(marMatb, mar.sbias1[[i]])
}
marMatb <- cbind(marMatb, fit2s$mar.sum.bias, fit3s$mar.sum.bias, fit4s$mar.sum.bias)
mar.bias1 <- apply(marMatb,1,sum)/marmiss # sum the rows to get total sse, then divide by total mar data
# lod MSE #
lod.sse1 <- mclapply(1:n1, FUN = function(i){
if(!anyNA(fit1s[[i]]$lod.sse)) return(fit1s[[i]]$lod.sse)
else return(NULL)
})
lodMat <- numeric()
for(i in 1:n1){
lodMat <- cbind(lodMat, lod.sse1[[i]])
}
lodMat <- cbind(lodMat, fit2s$lod.sse, fit3s$lod.sse, fit4s$lod.sse)
lod.mse1 <- apply(lodMat,1,sum)/lodmiss
# lod bias #
lod.sbias1 <- mclapply(1:n1, FUN = function(i){
if(!anyNA(fit1s[[i]]$lod.sum.bias)) return(fit1s[[i]]$lod.sum.bias)
else return(NULL)
})
lodMatb <- numeric()
for(i in 1:n1){
lodMatb <- cbind(lodMatb, lod.sbias1[[i]])
}
lodMatb <- cbind(lodMatb, fit2s$lod.sum.bias, fit3s$lod.sum.bias, fit4s$lod.sum.bias)
lod.bias1 <- apply(lodMatb,1,sum)/lodmiss # sum the rows to get total sse, then divide by total mar data
list1 = list(ham = NULL, mu.mse = NULL, mar.mse = mar.mse1,
lod.mse = lod.mse1, mar.bias = mar.bias1, lod.bias = lod.bias1)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.