#'
#' Simulation model1 for longitudinal composition data
#'
#' Simulate sparse observation from longitudinal compositional data \eqn{X}.
#' @inheritParams Model
#' @export
Amodel2 <- function(n = 50,
p = 30, # 30
m = 0,#2
intercept = TRUE,
interval = c(0, 1),
ns = 100,
obs_spar = 0.6,#0.5,
discrete = FALSE,
SNR = 1, #4
sigma = sqrt(2), #9
rho_X = 0,
Corr_X = "CorrCS",
rho_W = 0, #0
Corr_W = "CorrCS",
Nzero_group = 4,
range_beta = c(0.5, 1),
range_beta_c = 1, #1
theta.add = c(1, 2, 5, 6), #FALSE#c(1, 2, 5, 6)#c(1, 2) #FALSE#c(1, 2)#c(1:6)
gamma = 0.5, #0.5
basis_W = "bs",
df_W = 4,
degree_W = 3,
basis_beta = "bs",
df_beta = 5,
degree_beta = 3,
insert = "FALSE",
method = "trapezoidal"){
cat("1")
beta_C = matrix(0, nrow = p, ncol = df_beta)
beta_C[3, ] <- c(-0.8, -0.8 , 0.4 , 1 , 1)
beta_C[4, ] <- c(0.5, 0.5, -0.6 ,-0.6, -0.6)
beta_C[1, ] <- c(-0.5, -0.5, -0.5 , -1, -1)
beta_C[2, ] <- c(0.8, 0.8, 0.7, 0.6, 0.6)
cat("Column Sums of beta equal 0's: ", all.equal(drop(colSums(beta_C)), rep(0, times = df_beta)), "\r\n")
beta_C <- as.vector(t(beta_C))
if(obs_spar > 1 || obs_spar < 0) stop("obs_spar is a percentage")
if(basis_W %in% c("bs", "OBasis") && df_W < 4) {
warning("With B-spline basis including intercept DF at least 4")
df_W <- 4 }
if(basis_beta %in% c("bs", "OBasis") && df_beta < 4) {
warning("With B-spline basis including intercept DF at least 4")
df_beta <- 4 }
if(basis_W == "fourier" && df_W %% 2 == 0) {
warning("With Fourier basis, DF needs to be odd")
df_W <- df_W - 1 }
if(basis_beta == "fourier" && df_beta %% 2 == 0) {
warning("With Fourier basis, DF needs to be odd")
df_beta <- df_beta - 1 }
#### Warnings ####
#### Coefficients: beta_C generation.####
## beta_C: df = df_beta, degree = degree_beta
cat("1")
#### Coefficients: beta and beta_c generation ####
#### Mean pattern #####
add.on <- 0
if (theta.add && !is.numeric(theta.add)) {
## If true, first ceiling(Nzero_group/2) of the None-zero groups are set mean with log(p*gamma)
## first ceiling(Nzero_group/2) of the zero groups are set mean with log(p*gamma)
add.on <- c(1:ceiling(Nzero_group/2), Nzero_group + (1:ceiling(Nzero_group/2)))
} else if (is.numeric(theta.add)) {
if(length(theta.add) > p) stop("None zero theta must be smaller than p")
add.on <- theta.add
}
#### Mean pattern #####
#### time points and basis functions ####
ns_dense <- ns
if(discrete) ns_dense <- max(500, 2*ns)
sseq <- round(seq(0, 1, length.out = ns_dense), 10)
if(ns * obs_spar < 5) stop("Too Sparse")
W_nknots <- df_W - (degree_W + 1)
if(W_nknots > 0) {
W_knots <- ((1:W_nknots) / (W_nknots + 1)) * diff(interval) + interval[1]
} else {
W_knots <- NULL
}
beta_nknots <- df_beta - (degree_beta + 1)
if(beta_nknots > 0) {
beta_knots <- ((1:beta_nknots) / (beta_nknots + 1)) * diff(interval) + interval[1]
} else {
beta_knots <- NULL
}
beta.basis <- switch(basis_beta,
"bs" = bs(x = sseq, df = df_beta, degree = degree_beta,
Boundary.knots = interval, intercept = TRUE),
"fourier" = eval.basis(sseq, basisobj = create.fourier.basis(rangeval = interval, nbasis = df_beta )),
"OBasis" = evaluate(OBasis(expand.knots(c(interval[1], beta_knots, interval[2])),
order = degree_beta + 1),
sseq)
)
W.basis <- switch(basis_W,
"bs" = bs(x = sseq, df = df_W, degree = degree_W,
Boundary.knots = interval, intercept = TRUE),
"fourier" = eval.basis(sseq, basisobj = create.fourier.basis(rangeval = interval, nbasis = df_W )),
"OBasis" = evaluate(OBasis(expand.knots(c(interval[1], W_knots, interval[2])),
order = degree_W + 1),
sseq)
)
#### time points and basis functions ####
#### Correlation matrix: CorrMIX, of linear combination coefficients vector W ####
##X.sigma is between group (Compositional variables) correlation for W vector
##W.sigma is within group (linear combination coeffcients for each compsitional varaible) correlation for W vector
##CovMIX = sigma^2 * kronecker(X.Sigma, W.Sigma,)
X.Sigma <- do.call(Corr_X, list(dim = p, rho = rho_X))
W.Sigma <- do.call(Corr_W, list(dim = df_W, rho = rho_W))
CovMIX <- sigma^2 * kronecker(X.Sigma, W.Sigma) #sigma*kronecker(W.Sigma, X.Sigma)
#### Correlation matrix: CorrMIX ####
#### linear combination coefficients vector: W generation. Mean variation included ####
#=================================================================================#
#== Mean is log(gamma*p)/0 for X_j(t) if basis generated from bs with Intercept ==#
#== since mean(X_j(t))= W.linear[t, ] %*% theat_j= log(gamma*p)/0 * sum(W.linear[t, ]) ==#
#== sum(W.linear[t, ])= 1, other basis might not be true ==#
#=================================================================================#
mean_curve <- rep(0
#log(p*gamma / 2)
, times = p * df_W)
group2 <- matrix(1:(df_W * p), nrow = df_W)
b <- tail(1:p, floor(p/2))
mean_curve[as.vector(group2[, add.on])] <- mean_curve[as.vector(group2[, add.on])] + log(p*gamma)
# mean_curve[as.vector(group2[, add.on[1]])] <- mean_curve[as.vector(group2[, add.on[1]])] + log(p*gamma)
# mean_curve[as.vector(group2[, add.on[-1]])] <- mean_curve[as.vector(group2[, add.on[-1]])] + log(p*gamma / 4)
#
#mean_curve[as.vector(group2[, b])] <- mean_curve[as.vector(group2[, b])] - log(p*gamma)
#
# mean_curve[as.vector(group2[, add.on[1]])] <- mean_curve[as.vector(group2[, add.on[1]])] + log(p*gamma)
#mean_curve <- rep(0, times = p * df_W)
W.linear <- mvrnorm(n, mean_curve, CovMIX)
# group2 <- matrix(1:(df_W * p), nrow = df_W)
# for(j in 1:p){
#
# W.linear[, sample(group2[, j], floor(df_W/2))] <- 0
# }
#### linear combination coefficients vector: W ####
#### longitinal composition: X.comp_full generation ####
X <- array(dim = c(n, p, ns_dense), NA)
dimnames(X)[[2]] <- paste0("X", 1:p)
X.comp <- X
I <- diag(p)
if(add.on != 0 || length(add.on) > 1) {
if(length(add.on) == 1) {
a <- rep(log(p*gamma), n)
} else {
a <- cbind(rep(log(p*gamma), n),replicate(length(add.on) - 1, rep(log(p*gamma ) / 2, n))) #matrix(rnorm(n* length(add.on), log(gamma * p), 1), nrow = n) #gamma#log(gamma * p) #matrix(rnorm(n* length(add.on), log(gamma * p), 1), nrow = n)
}
} else{
a <- 0
}
#a <- as.matrix(a)
#a <- log(gamma * p)
for(s in 1:ns_dense) {
W.Phi <- kronecker(I, t(W.basis[s, ]))
## X curve follows normal distribution
X[, , s] <- t( apply(W.linear, 1, function(x, W.Phi) W.Phi %*% x, W.Phi = W.Phi) ) #?
#X[, add.on , s] <- X[, add.on , s] + a #+ a #+ matrix(rnorm(n * length(add.on), gamma), nrow = n)
#X[, tail(1:p, floor(p/2)) , s] <- X[, tail(1:p, floor(p/2)) , s] - log(p*gamma)
#X[, -add.on, s] <- X[, -add.on, s] + 1
## X.comp follows a logistic normal distribution for each time point after exp(x(s))/sum(exp(x(s)))
X.comp[, , s] <- exp(X[, , s])
X.comp[, , s] <- X.comp[, , s] / rowSums(X.comp[, , s])
#plot(X.comp[1, , s])
## Under threshold, measurement could not be detected and recorded as 0.
## If threshold=0, all measurement detected.
## 0's are replaced by replace value
##X.comp[, , s][X.comp[, , s] <= threshold] <- replace
}
#### longitinal composition: X.comp_full ####
min(X.comp)
min(log(X.comp))
# min(exp(X))
# q <- quantile(exp(X), probs = 0.05)
# X.comp[exp(X)<q]
#### Integration:Z_ITG ####
#X.comp_log <- log(X.comp)
#DD <- alply(X.comp_log, .margins = 1,
# function(x, sseq) data.frame(t(rbind(TIME = sseq, x))),
# sseq = sseq )
D <- alply(X.comp, .margins = 1,
function(x, sseq) data.frame(t(rbind(TIME = sseq, x))),
sseq = sseq )
if(discrete) {
D <- lapply(D, function(x, ns, ns_dense) {
T.obs <- sample(ns_dense, ns)
T.obs <- sort(T.obs)
x <- x[T.obs, ]
return(x)
}, ns = ns, ns_dense = ns_dense)
}
Z_t.full <- ldply(D[1:n], data.frame, .id = "Subject_ID")
cat("Compostional data: ", max(abs(apply(Z_t.full[,-c(1:2)], 1, sum)) - 1) < 1e-10, "\r\n")
Z_ITG <- sapply(D, function(x, sseq, beta.basis, insert, method){
x[, -1] <- log(x[, -1])
ITG(X = x, basis = beta.basis, sseq = sseq, insert = insert, method = method)$integral
} ,sseq = sseq, beta.basis = beta.basis, insert = insert, method = method)
# Z_ITG <- sapply(D, function(x, sseq, beta.basis, insert, method){
# x[, -1] <- log(x[, -1])
# ITG(X = x, basis = beta.basis, sseq = sseq, insert = insert, method = method)$integral
# } ,sseq = sseq, beta.basis = beta.basis, insert = "X", method = method)
Z_ITG <- t(Z_ITG)
#cat("column-wize sd of Z_ITG \r\n")
print(apply(Z_ITG, 2, sd))
cat("\r\n")
#### Integration:Z_ITG ####
#### Control varaible combining Intercept: Z_c, generation ####
## beta_c and beta0
beta0 <- range_beta_c
if(m > 0) {
Z_control <- matrix(NA, nrow = n, ncol = m)
## ceiling(m/2) is generated by norm;
## (m - ceiling(m/2)) is generated by binary with prob = 0.5
Z_control[, (floor(m/2) + 1):m] <- matrix(rnorm(n * length((floor(m/2) + 1):m)),
nrow = n)
Z_control[, -((floor(m/2) + 1):m)] <- matrix(sample(c(0,1), n * floor(m/2), replace = TRUE, prob = c(0.5, 0.5)),
nrow = n)
## Combine Intercept with control variables
beta_c <- rep(range_beta_c, times = m)
} else {
Z_control <- matrix(0, nrow = n, ncol = 1)
beta_c <- 0
}
#### Control varaible combining Intercept ####
#### Response: y, generation ####
Y.tru <- Z_control %*% beta_c + Z_ITG %*% beta_C + ifelse(intercept, beta0, 0)
sd_ratio <- sd( Z_control * beta_c) / sd( Z_ITG %*% beta_C)
#error <- rnorm(n, 0, 0.5)
error <- rnorm(n, 0, 1)
sigma_e <- sd(Y.tru) / (sd(error) * SNR)
error <- sigma_e*error
Y.obs <- Y.tru + error
y_sd <- sd(Y.obs)
#### Response: y ####
effec <- matrix(NA, nrow = n, ncol = Nzero_group)
group <- matrix(1:(p*df_beta), nrow = df_beta)
for(i in 1:Nzero_group){
idx <- ((i-1)*df_beta + 1):(i*df_beta)
effec[, i] <- Z_ITG[, idx] %*% beta_C[idx]
}
effec <- cbind(effec, Z_control %*% beta_c, error, Y.tru, Y.obs)
colnames(effec) <- c(paste0("g", 1:Nzero_group), "control", "error", "Y.tru", "Y.obs")
cat("effec for each group \r\n")
#print(effec)
cat("\r\n")
#### Compositioanal Data output: Z_t.full and Z_t.obs ####
## Each time point is with prob obs_spar to be observed, #obs follows bin(n, obs_spar)
## Different Subject with varying #observations and observed time points
if(obs_spar == 1) {
Z_t.obs <- Z_t.full
} else {
Z_t.obs <- lapply(D, function(x, obs_spar) {
n <- dim(x)[1]
#lambda <- obs_spar * n
#n.obs <- rpois(1, lambda)
#T.obs <- sample(n, size = n.obs)
T.obs <- replicate(n, sample(c(0,1), 1, prob = c(1 - obs_spar, obs_spar)))
T.obs <- which(T.obs == 1)
x.obs <- x[T.obs, ]
return(x.obs)
}, obs_spar = obs_spar)
Z_t.obs <- plyr::ldply(Z_t.obs, data.frame, .id = "Subject_ID")
}
#### Compositioanal Data output: Z_t.full and Z_t.obs ####
#### Output ####
if(m == 0) {
Z_control <- NULL
beta_c <- NULL}
data <- list(y = Y.obs, Comp = Z_t.obs, Zc = Z_control, intercept = intercept)
beta <- c(beta_C, beta_c, ifelse(intercept, beta0, 0))
data.raw <- list(Z_t.full = Z_t.full, Z_ITG = Z_ITG,
Y.tru = Y.tru, X = X, W = W.linear)
basis.info <- cbind(sseq, beta.basis)
parameter <- list(p = p, n = n, m = m,
k = df_beta, Jx = df_W, ns = ns, basis_W = basis_W, basis_beta = basis_beta,
rho_X = rho_X, rho_W = rho_W, Corr_X = Corr_X, Corr_W = Corr_W, sigma = sigma,
degree_W = degree_W, degree_beta = degree_beta,
SNR = SNR, sigma_e = sigma_e, sd_ratio = sd_ratio, y_sd = y_sd,
theta.add = add.on, obs_spar = obs_spar, intercept = intercept, Nzero_group= Nzero_group)
output <- list(data = data, beta = beta, basis.info = basis.info, data.raw = data.raw,
parameter = parameter)
#### Output ####
# return(output)
# }
#data <- output
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.