Run_simulation/Amodle2.R

#'
#' Simulation model1 for longitudinal composition data
#'
#' Simulate sparse observation from longitudinal compositional data \eqn{X}.
#' @inheritParams Model
#' @export
Amodel2 <- function(n = 50,
                    p = 30, # 30
                    m = 0,#2
                    intercept = TRUE,
                    interval = c(0, 1),
                    ns = 100,
                    obs_spar = 0.6,#0.5,
                    discrete = FALSE,
                    SNR = 1, #4
                    sigma = sqrt(2), #9
                    rho_X = 0,
                    Corr_X = "CorrCS",
                    rho_W = 0, #0
                    Corr_W = "CorrCS",
                    Nzero_group = 4,
                    range_beta = c(0.5, 1),
                    range_beta_c = 1, #1
                    theta.add = c(1, 2, 5, 6), #FALSE#c(1, 2, 5, 6)#c(1, 2) #FALSE#c(1, 2)#c(1:6)
                    gamma = 0.5, #0.5
                    basis_W = "bs",
                    df_W = 4,
                    degree_W = 3,
                    basis_beta = "bs",
                    df_beta = 5,
                    degree_beta = 3,
                    insert = "FALSE",
                    method = "trapezoidal"){
  cat("1")
  beta_C = matrix(0, nrow = p, ncol = df_beta)
  beta_C[3, ] <- c(-0.8, -0.8 , 0.4 , 1 , 1)
  beta_C[4, ] <- c(0.5, 0.5, -0.6  ,-0.6, -0.6)
  beta_C[1, ] <- c(-0.5, -0.5, -0.5 , -1, -1)
  beta_C[2, ] <- c(0.8, 0.8,  0.7,  0.6,  0.6)

  cat("Column Sums of beta equal 0's: ", all.equal(drop(colSums(beta_C)), rep(0, times = df_beta)), "\r\n")
  beta_C <- as.vector(t(beta_C))

  if(obs_spar > 1 || obs_spar < 0) stop("obs_spar is a percentage")
  if(basis_W %in% c("bs", "OBasis") && df_W < 4) {
    warning("With B-spline basis including intercept DF at least 4")
    df_W <- 4 }
  if(basis_beta %in% c("bs", "OBasis") && df_beta < 4) {
    warning("With B-spline basis including intercept DF at least 4")
    df_beta <- 4 }
  if(basis_W == "fourier" && df_W %% 2 == 0) {
    warning("With Fourier basis, DF needs to be odd")
    df_W <- df_W - 1 }
  if(basis_beta == "fourier" && df_beta %% 2 == 0) {
    warning("With Fourier basis, DF needs to be odd")
    df_beta <- df_beta - 1 }
  #### Warnings ####
  #### Coefficients:  beta_C generation.####
  ## beta_C: df = df_beta, degree = degree_beta

  cat("1")
  #### Coefficients: beta and beta_c generation ####
  #### Mean pattern #####
  add.on <- 0
  if (theta.add && !is.numeric(theta.add)) {
    ## If true, first ceiling(Nzero_group/2) of the None-zero groups are set mean with log(p*gamma)
    ## first ceiling(Nzero_group/2) of the zero groups are set mean with log(p*gamma)
    add.on <- c(1:ceiling(Nzero_group/2), Nzero_group + (1:ceiling(Nzero_group/2)))
  } else if (is.numeric(theta.add)) {
    if(length(theta.add) > p) stop("None zero theta must be smaller than p")
    add.on <- theta.add
  }
  #### Mean pattern  #####
  #### time points and basis functions ####
  ns_dense <- ns
  if(discrete) ns_dense <- max(500, 2*ns)
  sseq <- round(seq(0, 1, length.out = ns_dense), 10)
  if(ns * obs_spar < 5) stop("Too Sparse")
  W_nknots <- df_W - (degree_W + 1)
  if(W_nknots > 0) {
    W_knots <- ((1:W_nknots) / (W_nknots + 1)) * diff(interval) + interval[1]
  } else {
    W_knots <- NULL
  }
  beta_nknots <- df_beta - (degree_beta + 1)
  if(beta_nknots > 0) {
    beta_knots <- ((1:beta_nknots) / (beta_nknots + 1)) * diff(interval) + interval[1]
  } else {
    beta_knots <- NULL
  }
  beta.basis <- switch(basis_beta,
                       "bs" = bs(x = sseq, df = df_beta, degree = degree_beta,
                                 Boundary.knots = interval, intercept = TRUE),
                       "fourier" = eval.basis(sseq, basisobj = create.fourier.basis(rangeval = interval, nbasis = df_beta )),
                       "OBasis" = evaluate(OBasis(expand.knots(c(interval[1], beta_knots, interval[2])),
                                                  order = degree_beta + 1),
                                           sseq)
  )
  W.basis <- switch(basis_W,
                    "bs" = bs(x = sseq, df = df_W, degree = degree_W,
                              Boundary.knots = interval, intercept = TRUE),
                    "fourier" = eval.basis(sseq, basisobj = create.fourier.basis(rangeval = interval, nbasis = df_W )),
                    "OBasis" = evaluate(OBasis(expand.knots(c(interval[1], W_knots, interval[2])),
                                               order = degree_W + 1),
                                        sseq)
  )
  #### time points and basis functions ####
  #### Correlation matrix: CorrMIX, of linear combination coefficients vector W ####
  ##X.sigma is between group (Compositional variables) correlation for W vector
  ##W.sigma is within group (linear combination coeffcients for each compsitional varaible) correlation for W vector
  ##CovMIX = sigma^2 * kronecker(X.Sigma, W.Sigma,)
  X.Sigma <- do.call(Corr_X, list(dim = p, rho = rho_X))
  W.Sigma <- do.call(Corr_W, list(dim = df_W, rho = rho_W))
  CovMIX <- sigma^2 * kronecker(X.Sigma, W.Sigma) #sigma*kronecker(W.Sigma, X.Sigma)
  #### Correlation matrix: CorrMIX ####
  #### linear combination coefficients vector: W generation. Mean variation included ####
  #=================================================================================#
  #== Mean is log(gamma*p)/0 for X_j(t) if basis generated from bs with Intercept ==#
  #== since mean(X_j(t))= W.linear[t, ] %*% theat_j= log(gamma*p)/0 * sum(W.linear[t, ]) ==#
  #== sum(W.linear[t, ])= 1, other basis might not be true ==#
  #=================================================================================#
  mean_curve <- rep(0
                    #log(p*gamma / 2)
                    , times = p * df_W)
  group2 <- matrix(1:(df_W * p), nrow = df_W)
  b <- tail(1:p, floor(p/2))
  mean_curve[as.vector(group2[, add.on])] <- mean_curve[as.vector(group2[, add.on])] + log(p*gamma)
  # mean_curve[as.vector(group2[, add.on[1]])] <- mean_curve[as.vector(group2[, add.on[1]])] + log(p*gamma)
  # mean_curve[as.vector(group2[, add.on[-1]])] <- mean_curve[as.vector(group2[, add.on[-1]])] + log(p*gamma / 4)
  #
  #mean_curve[as.vector(group2[, b])] <- mean_curve[as.vector(group2[, b])] - log(p*gamma)
  #
  # mean_curve[as.vector(group2[, add.on[1]])] <- mean_curve[as.vector(group2[, add.on[1]])] + log(p*gamma)

  #mean_curve <- rep(0, times = p * df_W)
  W.linear <- mvrnorm(n, mean_curve, CovMIX)
  # group2 <- matrix(1:(df_W * p), nrow = df_W)
  # for(j in 1:p){
  #
  #   W.linear[, sample(group2[, j], floor(df_W/2))] <- 0
  # }
  #### linear combination coefficients vector: W ####
  #### longitinal composition: X.comp_full generation ####
  X <- array(dim = c(n, p, ns_dense), NA)
  dimnames(X)[[2]] <- paste0("X", 1:p)
  X.comp <- X
  I <- diag(p)
  if(add.on != 0 || length(add.on) > 1) {
    if(length(add.on) == 1) {
      a <- rep(log(p*gamma), n)
    } else {
      a <- cbind(rep(log(p*gamma), n),replicate(length(add.on) - 1, rep(log(p*gamma ) / 2, n))) #matrix(rnorm(n* length(add.on), log(gamma * p), 1), nrow = n) #gamma#log(gamma * p) #matrix(rnorm(n* length(add.on), log(gamma * p), 1), nrow = n)

    }
  } else{
    a <- 0
  }
  #a <- as.matrix(a)
  #a <- log(gamma * p)
  for(s in 1:ns_dense) {
    W.Phi <- kronecker(I, t(W.basis[s, ]))
    ## X curve follows normal distribution
    X[, , s] <- t( apply(W.linear, 1, function(x, W.Phi) W.Phi %*% x, W.Phi = W.Phi) ) #?
    #X[, add.on , s] <- X[, add.on , s] + a #+ a #+ matrix(rnorm(n * length(add.on), gamma), nrow = n)
    #X[, tail(1:p, floor(p/2)) , s] <- X[, tail(1:p, floor(p/2)) , s] - log(p*gamma)
    #X[, -add.on, s] <- X[, -add.on, s] + 1
    ## X.comp follows a logistic normal distribution for each time point after exp(x(s))/sum(exp(x(s)))
    X.comp[, , s] <- exp(X[, , s])
    X.comp[, , s] <- X.comp[, , s] / rowSums(X.comp[, , s])
    #plot(X.comp[1, , s])
    ## Under threshold, measurement could not be detected and recorded as 0.
    ## If threshold=0, all measurement detected.
    ## 0's are replaced by replace value
    ##X.comp[, , s][X.comp[, , s] <= threshold] <- replace
  }
  #### longitinal composition: X.comp_full ####
  min(X.comp)
  min(log(X.comp))
  # min(exp(X))
  # q <- quantile(exp(X), probs = 0.05)
  # X.comp[exp(X)<q]
  #### Integration:Z_ITG ####
  #X.comp_log <- log(X.comp)
  #DD <- alply(X.comp_log, .margins = 1,
  #           function(x, sseq) data.frame(t(rbind(TIME = sseq, x))),
  #           sseq = sseq )
  D <- alply(X.comp, .margins = 1,
             function(x, sseq) data.frame(t(rbind(TIME = sseq, x))),
             sseq = sseq )
  if(discrete) {
    D <- lapply(D, function(x, ns, ns_dense) {
      T.obs <- sample(ns_dense, ns)
      T.obs <- sort(T.obs)
      x <- x[T.obs, ]
      return(x)
    }, ns = ns, ns_dense = ns_dense)
  }
  Z_t.full <- ldply(D[1:n], data.frame, .id = "Subject_ID")
  cat("Compostional data: ", max(abs(apply(Z_t.full[,-c(1:2)], 1, sum)) - 1) < 1e-10, "\r\n")
  Z_ITG <- sapply(D, function(x, sseq, beta.basis, insert, method){
    x[, -1] <- log(x[, -1])
    ITG(X = x, basis = beta.basis, sseq = sseq, insert = insert, method = method)$integral
  } ,sseq = sseq, beta.basis = beta.basis, insert = insert, method = method)

  # Z_ITG <- sapply(D, function(x, sseq, beta.basis, insert, method){
  #   x[, -1] <- log(x[, -1])
  #   ITG(X = x, basis = beta.basis, sseq = sseq, insert = insert, method = method)$integral
  # } ,sseq = sseq, beta.basis = beta.basis, insert = "X", method = method)

  Z_ITG <- t(Z_ITG)
  #cat("column-wize sd of Z_ITG \r\n")
  print(apply(Z_ITG, 2, sd))
  cat("\r\n")
  #### Integration:Z_ITG ####
  #### Control varaible combining Intercept: Z_c, generation ####
  ## beta_c and beta0
  beta0 <- range_beta_c
  if(m > 0) {
    Z_control <- matrix(NA, nrow = n, ncol = m)
    ## ceiling(m/2) is generated by norm;
    ## (m - ceiling(m/2)) is generated by binary with prob = 0.5
    Z_control[, (floor(m/2) + 1):m] <- matrix(rnorm(n * length((floor(m/2) + 1):m)),
                                              nrow = n)
    Z_control[, -((floor(m/2) + 1):m)] <- matrix(sample(c(0,1), n * floor(m/2), replace = TRUE, prob = c(0.5, 0.5)),
                                                 nrow = n)
    ## Combine Intercept with control variables
    beta_c <- rep(range_beta_c, times = m)
  } else {
    Z_control <- matrix(0, nrow = n, ncol = 1)
    beta_c <- 0
  }
  #### Control varaible combining Intercept ####
  #### Response: y, generation ####
  Y.tru <- Z_control %*% beta_c + Z_ITG %*% beta_C + ifelse(intercept, beta0, 0)
  sd_ratio <- sd( Z_control * beta_c) / sd( Z_ITG %*% beta_C)
  #error <- rnorm(n, 0, 0.5)
  error <- rnorm(n, 0, 1)
  sigma_e <- sd(Y.tru) / (sd(error) * SNR)
  error <- sigma_e*error
  Y.obs <- Y.tru + error
  y_sd <- sd(Y.obs)
  #### Response: y ####
  effec <- matrix(NA, nrow = n, ncol = Nzero_group)
  group <- matrix(1:(p*df_beta), nrow = df_beta)
  for(i in 1:Nzero_group){
    idx <- ((i-1)*df_beta + 1):(i*df_beta)
    effec[, i] <- Z_ITG[, idx] %*% beta_C[idx]
  }
  effec <- cbind(effec, Z_control %*% beta_c, error, Y.tru, Y.obs)
  colnames(effec) <- c(paste0("g", 1:Nzero_group), "control", "error", "Y.tru", "Y.obs")
  cat("effec for each group \r\n")
  #print(effec)
  cat("\r\n")
  #### Compositioanal Data output: Z_t.full and Z_t.obs ####
  ## Each time point is with prob obs_spar to be observed, #obs follows bin(n, obs_spar)
  ## Different Subject with varying #observations and observed time points
  if(obs_spar == 1) {
    Z_t.obs <- Z_t.full
  } else {
    Z_t.obs <- lapply(D, function(x, obs_spar) {
      n <- dim(x)[1]
      #lambda <- obs_spar * n
      #n.obs <- rpois(1, lambda)
      #T.obs <- sample(n, size = n.obs)
      T.obs <- replicate(n, sample(c(0,1), 1, prob = c(1 - obs_spar, obs_spar)))
      T.obs <- which(T.obs == 1)
      x.obs <- x[T.obs, ]
      return(x.obs)
    }, obs_spar = obs_spar)
    Z_t.obs <- plyr::ldply(Z_t.obs, data.frame, .id = "Subject_ID")
  }
  #### Compositioanal Data output: Z_t.full and Z_t.obs ####
  #### Output ####
  if(m == 0) {
    Z_control <- NULL
    beta_c <- NULL}
  data <- list(y = Y.obs, Comp = Z_t.obs, Zc = Z_control, intercept = intercept)
  beta <- c(beta_C, beta_c, ifelse(intercept, beta0, 0))
  data.raw <- list(Z_t.full = Z_t.full, Z_ITG = Z_ITG,
                   Y.tru = Y.tru, X = X, W = W.linear)
  basis.info <- cbind(sseq, beta.basis)
  parameter <- list(p = p, n = n, m = m,
                    k = df_beta, Jx = df_W, ns = ns, basis_W = basis_W, basis_beta = basis_beta,
                    rho_X = rho_X, rho_W = rho_W, Corr_X = Corr_X, Corr_W = Corr_W, sigma = sigma,
                    degree_W = degree_W, degree_beta = degree_beta,
                    SNR = SNR,  sigma_e = sigma_e, sd_ratio = sd_ratio, y_sd = y_sd,
                    theta.add = add.on, obs_spar = obs_spar, intercept = intercept, Nzero_group= Nzero_group)
  output <- list(data = data, beta = beta, basis.info = basis.info, data.raw = data.raw,
                 parameter = parameter)
  #### Output ####
  #   return(output)
  # }
  #data <- output
  return(output)
}
jiji6454/Rpac_compReg documentation built on May 31, 2019, 5:01 a.m.