Rpac_compReg: Compositional Data Regression

##Auto
CorrAR <- function(dim, rho){
  Corr <- abs(matrix(rep(1:dim, length = dim) - rep(1:dim, each = dim), nrow = dim))
  Corr <- rho^Corr
  return(Corr)
}

##Compound
CorrCS <- function(dim, rho){
  Corr <- matrix(rho, nrow = dim, ncol = dim)
  diag(Corr) <- 1
  return(Corr)
}

#'
#' Simulation model1 for longitudinal composition data
#'
#' Simulate sparse observation from longitudinal compositional data \eqn{X}.
# @usage
# Model <- function(n, p, m = 0, intercept = TRUE,
#                   interval = c(0, 1), ns = 100, obs_spar = 0.6,
#                   discrete = FALSE, SNR = 1, sigma = 2,
#                   rho_X, Corr_X = c("CorrAR", "CorrCS"),
#                   rho_W, Corr_W = c("CorrAR", "CorrCS"),
#                   Nzero_group = 4,
#                   range_beta = c(0.5, 1), range_beta_c = 1,
#                   beta_C , theta.add = c(1, 2, 5, 6), gamma = 0.5,
#                   basis_W = c("bs", "OBasis", "fourier"),
#                   df_W = 5, degree_W = 3,
#                   basis_beta = c("bs", "OBasis", "fourier"),
#                   df_beta = 5, degree_beta = 3,
#                   insert = c("FALSE", "basis"), method = c("trapezoidal", "step"))
#
#'
#'
#'
#'
#' @param n sample size
#'
#' @param p size of compositional predictors falling in \eqn{S^p}
#'
#' @param intercept including intercept or not to generate response variable, default is TRUE
#'
#' @param m size of time-invariant predictors. First \code{ceiling(m/2)} columns are generated by  \code{bin(1,0.5)} independently;
#'          latter \code{(m - ceiling(m/2))} columns are generated \code{norm(0, 1)} independently.
#'
#' @param ns \code{ns} is a scaler specifying length of equally spaced time sequence on domian \code{interval}.
#'
#' @param discrete is logical, specifying whether \eqn{X} is generated at different time points.
#'                 If \code{distrete = TRUE}, generate \eqn{X} on dense sequence created by \code{max(ns_dense = 1000, 5*ns)} and Then for
#'                 each subject, random sample \code{ns} points. recommend \code{ns < 200} when \code{distrete = TRUE}.
#'
#' @param interval a length 2 vector indicating time domain.
#'
#' @param obs_spar a percentage used to get sparse ovbservation. Each time point probability \code{obs_spar} to be observed. It allows different subject with
#'                 different observed time points and size.
#'                 \code{obs_spar * ns > 5} is required.
#'
#' @param SNR signal to noise ratio.
#'
#' @param sigma,rho_X,Corr_X,rho_W,Corr_W linear combination scaler \code{W}, \code{df_W*p}, is generated from from Multivariate Normal Distribution with mean 0's and
#'                                        ovariance matrix = \code{simga^2 * kronecker(Sigma_X, Sigma_W)}.
#'                                        \code{Corr_X} is correlation structure of \code{Sigma_X} with \eqn{\rho} = \code{rho_X},
#'                                        which controls canonical-correlation between groups for W;
#'                                        \code{Corr_W} is correlation structure of \code{Sigma_W} with \eqn{\rho} = \code{rho_W},
#'                                        which controls correlation within groups of W.
#'
#'
#' @param Nzero_group a even scaler. First \code{Nzero_group} compositional predictors are considered having none zero effect, while others are with 0 coefficients.
#'
#' @param range_beta  a sorted vector of length 2 used to generate coefficient matrix \code{B} for compositional predict, which is with demension \code{p*k}.
#'                    For each column of \code{B}, generate \code{Nzero_group/2} from unifom distribution with range \code{range_beta}, and together with their negatives
#'                    are ramdom assigned to the first \code{Nzero_group} rows.
#'
#' @param range_beta_c value of coefficients for beta0 and beta_c (coefficients for time-invariant predictors)
#' @param beta_C vectorized coefficients of coefficient matrix for compositional predictors. Could be missing.
#'
#' @param theta.add logical or numerical. If numerical, indicating which ones of compositional predictors of high level mean.
#'                  If logical, \code{c(1:ceiling(Nzero_group/2), Nzero_group + (1:ceiling(Nzero_group/2)))} are set to with high level mean
#'
#' @param gamma high level mean groups adding log(p * gamma) before convertint into compositional data, otherwise 0.
#'
#' @param basis_W,df_W,degree_W longitudinal compositional data is generated from linear combination of basis \eqn{\Psi(t)}, take exponetial and change into compositional data.
#'                              \itemize{
#'                                 \item \code{basis_W} is the basis function for \eqn{\Psi(t)}  - default is \code{"bs"}.
#'                                        Other choise are \code{"OBasis"} and \code{"fourier"};
#'                                 \item \code{df_W} is the degree of freedom for basis \eqn{\Psi(t)} - default is 10 ;
#'                                 \item \code{degree_W} the is degree for \eqn{\Psi(t)} - default is 3.
#'                               }
#'
#'@param basis_beta,df_beta,degree_beta coefficinet curve is generate by linear combination of basis \eqn{\Phi(t)}.
#'                                      \itemize{
#'                                           \item \code{basis_beta} is the basis function for \eqn{\Phi(t)} - default is \code{"bs"}.
#'                                                  Other choise are \code{"OBasis"} and \code{"fourier"};
#'                                            \item \code{df_beta} is the degree of freedom for basis \eqn{\Phi(t)} - default is 5;
#'                                            \item \code{degree_beta} is the degree for \eqn{\Phi(t)} - default is 3.
#'                                       }
#'
#' @param insert way to interpolation.
#'               \itemize{
#'                  \item \code{"FALSE"} no interpolation.
#'                  \item \code{"basis"} compositional data is considered as step function, imposing basis on un-observed time points for each subject.
#'               }
#'               Default is \code{"FALSE"}.
#'
#' @inheritParams FuncompCGL
#'
#' @return a list
#' \item{data}{a list, a vector \code{y} of response variable, a data frame \code{Comp} of sparse observation of longitudinal compositional data,
#'                     a matrix \code{Zc} of time-invariable predictors, a logtical \code{intercept}.}
#' \item{beta}{a length \code{p*df_beta + m + 1} vector for coefficients}
#' \item{basis.info}{ matrix for basis for beta, combining the first column as time sequence.}
#' \item{data.raw}{ a list, \code{Z_t.full} full observation of longitudinal compositional data,
#'                          \code{Z_ITG} integral for full observation of longitudinal compositional data,
#'                          \code{Y.tru} true response without noise,
#'                          \code{X} longitudinal before converting into compositional data,
#'                          \code{W} matrix of linear combination scalers, \code{n * (df_W * p)}.}
#'
#' \item{parameter}{ a list of parameters.}
#'
#'
#' @examples
#'
#' df_beta = 5
#' p = 20
#' Data <- Model(n = 50, p = p, m = 2, intercept = TRUE, ns = 50, SNR = 1,
#'               rho_X = 0.1, rho_W = 0.2, df_W = 10, df_beta = df_beta, obs_spar = 0.5)
#' names(Data$data)
#' Data.test <- Model(n = 50, p = p, m = 2, intercept = TRUE, ns = 50, SNR = 1,
#'                    rho_X = 0.1, rho_W = 0.2, df_W = 10, df_beta = df_beta, obs_spar = 0.5,
#'                    beta_C = Data$beta[1:(p*df_beta)])
#'
#'
#' @export
#'
#' @importFrom MASS mvrnorm
#' @importFrom plyr alply ldply
#' @importFrom utils tail
#'




Model <- function(n, p, m = 0, intercept = TRUE,
                  interval = c(0, 1), ns = 100, obs_spar = 0.6, discrete = FALSE,
                  SNR = 1, sigma = 2,
                  rho_X, Corr_X = c("CorrAR", "CorrCS"),
                  rho_W, Corr_W = c("CorrAR", "CorrCS"),
                  Nzero_group = 4,
                  range_beta = c(0.5, 1), range_beta_c = 1,
                  beta_C ,
                  theta.add = c(1, 2, 5, 6), gamma = 0.5,
                  basis_W = c("bs", "OBasis", "fourier"), df_W = 5, degree_W = 3,
                  basis_beta = c("bs", "OBasis", "fourier"), df_beta = 5, degree_beta = 3,
                  insert = c("FALSE", "basis"), method = c("trapezoidal", "step")) {



  #### Warnings ####
  Corr_X <- match.arg(Corr_X)
  Corr_W <- match.arg(Corr_W)
  basis_W <- match.arg(basis_W)
  insert <- match.arg(insert)
  method <- match.arg(method)
  basis_beta <- match.arg(basis_beta)

  if(obs_spar > 1 || obs_spar < 0) stop("obs_spar is a percentage")

  if(basis_W %in% c("bs", "OBasis") && df_W < 4) {
    warning("With B-spline basis including intercept DF at least 4")
    df_W <- 4 }
  if(basis_beta %in% c("bs", "OBasis") && df_beta < 4) {
    warning("With B-spline basis including intercept DF at least 4")
    df_beta <- 4 }
  if(basis_W == "fourier" && df_W %% 2 == 0) {
    warning("With Fourier basis, DF needs to be odd")
    df_W <- df_W - 1 }
  if(basis_beta == "fourier" && df_beta %% 2 == 0) {
    warning("With Fourier basis, DF needs to be odd")
    df_beta <- df_beta - 1 }

  #### Warnings ####

  #### Coefficients:  beta_C generation.####
  ## beta_C: df = df_beta, degree = degree_beta
  if(missing(beta_C)) {
    C <- matrix(0, nrow = p, ncol = df_beta)
    for(j in 1:df_beta){
      col_input <- round(runif((Nzero_group/2), min = range_beta[1], max = range_beta[2]),
                         digits = 2)
      col_input <- c(col_input, -col_input)
      col_permuation <- sample(Nzero_group)
      C[1:Nzero_group, j] <- col_input[col_permuation]
    }
    beta_C <- as.vector(t(C))
  }

  #### Coefficients: beta and beta_c generation ####


  #### Mean pattern #####
  add.on <- 0
  if (theta.add && !is.numeric(theta.add)) {
    ## If true, first ceiling(Nzero_group/2) of the None-zero groups are set mean with log(p*gamma)
    ## first ceiling(Nzero_group/2) of the zero groups are set mean with log(p*gamma)
    add.on <- c(1:ceiling(Nzero_group/2), Nzero_group + (1:ceiling(Nzero_group/2)))
  } else if (is.numeric(theta.add)) {
    if(length(theta.add) > p) stop("None zero theta must be smaller than p")
    add.on <- theta.add
  }

  #### Mean pattern  #####


  #### time points and basis functions ####
  ns_dense <- ns
  if(discrete) ns_dense <- max(1000, 5*ns)
  sseq <- round(seq(0, 1, length.out = ns_dense), 10)

  if(ns * obs_spar < 5) stop("Too Sparse")

  W_nknots <- df_W - (degree_W + 1)
  if(W_nknots > 0) {
    W_knots <- ((1:W_nknots) / (W_nknots + 1)) * diff(interval) + interval[1]
  } else {
    W_knots <- NULL
  }

  beta_nknots <- df_beta - (degree_beta + 1)
  if(beta_nknots > 0) {
    beta_knots <- ((1:beta_nknots) / (beta_nknots + 1)) * diff(interval) + interval[1]
  } else {
    beta_knots <- NULL
  }


  beta.basis <- switch(basis_beta,
                       "bs" = bs(x = sseq, df = df_beta, degree = degree_beta,
                                 Boundary.knots = interval, intercept = TRUE),
                       "fourier" = eval.basis(sseq, basisobj = create.fourier.basis(rangeval = interval, nbasis = df_beta )),
                       "OBasis" = evaluate(OBasis(expand.knots(c(interval[1], beta_knots, interval[2])),
                                                  order = degree_beta + 1),
                                           sseq)

  )

  W.basis <- switch(basis_W,
                    "bs" = bs(x = sseq, df = df_W, degree = degree_W,
                              Boundary.knots = interval, intercept = TRUE),

                    "fourier" = eval.basis(sseq, basisobj = create.fourier.basis(rangeval = interval, nbasis = df_W )),

                    "OBasis" = evaluate(OBasis(expand.knots(c(interval[1], W_knots, interval[2])),
                                               order = degree_W + 1),
                                        sseq)

  )

  #### time points and basis functions ####


  #### Correlation matrix: CorrMIX, of linear combination coefficients vector W ####
  ##X.sigma is between group (Compositional variables) correlation for W vector
  ##W.sigma is within group (linear combination coeffcients for each compsitional varaible) correlation for W vector
  ##CovMIX = sigma^2 * kronecker(X.Sigma, W.Sigma,)
  X.Sigma <- do.call(Corr_X, list(dim = p, rho = rho_X))
  W.Sigma <- do.call(Corr_W, list(dim = df_W, rho = rho_W))
  CovMIX <- sigma^2 * kronecker(X.Sigma, W.Sigma) #sigma*kronecker(W.Sigma, X.Sigma)
  #### Correlation matrix: CorrMIX ####



  #### linear combination coefficients vector: W generation. Mean variation included ####
  #=================================================================================#
  #== Mean is log(gamma*p)/0 for X_j(t) if basis generated from bs with Intercept ==#
  #== since mean(X_j(t))= W.linear[t, ] %*% theat_j= log(gamma*p)/0 * sum(W.linear[t, ]) ==#
  #== sum(W.linear[t, ])= 1, other basis might not be true ==#
  #=================================================================================#

  mean_curve <- rep(0
                    #log(p*gamma / 2)
                    , times = p * df_W)
  group2 <- matrix(1:(df_W * p), nrow = df_W)
  b <- tail(1:p, floor(p/2))
  mean_curve[as.vector(group2[, add.on])] <- mean_curve[as.vector(group2[, add.on])] + log(p*gamma)

  #W.linear <- mvrnorm(n, rep(0, times = p * df_W), CovMIX)
  W.linear <- mvrnorm(n, mean_curve, CovMIX)
  #### linear combination coefficients vector: W ####


  #### longitinal composition: X.comp_full generation ####
  X <- array(dim = c(n, p, ns_dense), NA)
  dimnames(X)[[2]] <- paste0("X", 1:p)
  X.comp <- X
  I <- diag(p)
  for(s in 1:ns_dense) {

    W.Phi <- kronecker(I, t(W.basis[s, ]))

    ## X curve follows normal distribution
    X[, , s] <- t( apply(W.linear, 1, function(x, W.Phi) W.Phi %*% x, W.Phi = W.Phi) ) #?
    #X[, add.on , s] <- X[, add.on , s] + log(gamma * p)
    ## X.comp follows a logistic normal distribution for each time point after exp(x(s))/sum(exp(x(s)))
    X.comp[, , s] <- exp(X[, , s])
    X.comp[, , s] <- X.comp[, , s] / rowSums(X.comp[, , s])

    #plot(X.comp[1, , s])

    ## Under threshold, measurement could not be detected and recorded as 0.
    ## If threshold=0, all measurement detected.
    ## 0's are replaced by replace value
    ##X.comp[, , s][X.comp[, , s] <= threshold] <- replace
  }

  #### longitinal composition: X.comp_full ####


  #### Integration:Z_ITG ####
  #X.comp_log <- log(X.comp)
  #DD <- alply(X.comp_log, .margins = 1,
  #           function(x, sseq) data.frame(t(rbind(TIME = sseq, x))),
  #           sseq = sseq )

  D <- alply(X.comp, .margins = 1,
              function(x, sseq) data.frame(t(rbind(TIME = sseq, x))),
              sseq = sseq )

  if(discrete) {
    D <- lapply(D, function(x, ns, ns_dense) {
      T.obs <- sample(ns_dense, ns)
      T.obs <- sort(T.obs)
      x <- x[T.obs, ]
      return(x)
    }, ns = ns, ns_dense = ns_dense)
  }
  Z_t.full <- ldply(D[1:n], data.frame, .id = "Subject_ID")


  Z_ITG <- sapply(D, function(x, sseq, beta.basis, insert, method){
    x[, -1] <- log(x[, -1])
    ITG(X = x, basis = beta.basis, sseq = sseq, insert = insert, method = method)$integral
  } ,sseq = sseq, beta.basis = beta.basis, insert = insert, method = method)

  Z_ITG <- t(Z_ITG)
  #### Integration:Z_ITG ####

  #### Control varaible combining Intercept: Z_c, generation ####
  ## beta_c and beta0
  beta0 <- range_beta_c

  if(m > 0) {
    Z_control <- matrix(NA, nrow = n, ncol = m)

    ## ceiling(m/2) is generated by norm;
    ## (m - ceiling(m/2)) is generated by binary with prob = 0.5
    Z_control[, (floor(m/2) + 1):m] <- matrix(rnorm(n * length((floor(m/2) + 1):m)),
                                              nrow = n)
    Z_control[, -((floor(m/2) + 1):m)] <- matrix(sample(c(0,1), n * floor(m/2), replace = TRUE, prob = c(0.5, 0.5)),
                                                 nrow = n)
    ## Combine Intercept with control variables
    beta_c <- rep(range_beta_c, times = m)
  } else {
    Z_control <- matrix(0, nrow = n, ncol = 1)
    beta_c <- 0
  }
  #### Control varaible combining Intercept ####

  #### Response: y, generation ####
  Y.tru <- Z_control %*% beta_c + Z_ITG %*% beta_C + ifelse(intercept, beta0, 0)
  sd_ratio <- sd( Z_control * beta_c) / sd( Z_ITG %*% beta_C)
  error <- rnorm(n, 0, 1)
  sigma_e <- sd(Y.tru) / (sd(error) * SNR)
  error <- sigma_e*error
  Y.obs <- Y.tru + error
  y_sd <- sd(Y.obs)
  #### Response: y ####


  #### Compositioanal Data output: Z_t.full and Z_t.obs ####


  ## Each time point is with prob obs_spar to be observed, #obs follows bin(n, obs_spar)
  ## Different Subject with varying #observations and observed time points

  if(obs_spar == 1) {
    Z_t.obs <- Z_t.full
  } else {
    Z_t.obs <- lapply(D, function(x, obs_spar) {
      n <- dim(x)[1]
      #lambda <- obs_spar * n
      #n.obs <- rpois(1, lambda)
      #T.obs <- sample(n, size = n.obs)
      T.obs <- replicate(n, sample(c(0,1), 1, prob = c(1 - obs_spar, obs_spar)))
      T.obs <- which(T.obs == 1)
      x.obs <- x[T.obs, ]
      return(x.obs)
    }, obs_spar = obs_spar)

    Z_t.obs <- plyr::ldply(Z_t.obs, data.frame, .id = "Subject_ID")
  }
  #### Compositioanal Data output: Z_t.full and Z_t.obs ####

  #### Output ####
  if(m == 0) {
    Z_control <- NULL
    beta_c <- NULL}

  data <- list(y = Y.obs, Comp = Z_t.obs, Zc = Z_control, intercept = intercept)
  beta <- c(beta_C, beta_c, ifelse(intercept, beta0, 0))
  data.raw <- list(Z_t.full = Z_t.full, Z_ITG = Z_ITG,
                   Y.tru = Y.tru, X = X, W = W.linear)
  basis.info <- cbind(sseq, beta.basis)
  parameter <- list(p = p, n = n, m = m,
                    k = df_beta, Jx = df_W, ns = ns, basis_W = basis_W, basis_beta = basis_beta,
                    rho_X = rho_X, rho_W = rho_W, Corr_X = Corr_X, Corr_W = Corr_W, sigma = sigma,
                    degree_W = degree_W, degree_beta = degree_beta,
                    SNR = SNR,  sigma_e = sigma_e, sd_ratio = sd_ratio, y_sd = y_sd,
                    theta.add = add.on, obs_spar = obs_spar)

  output <- list(data = data, beta = beta, basis.info = basis.info, data.raw = data.raw,
                 parameter = parameter)
  #### Output ####

  return(output)
}

#'
#' Simulation model2 for longitudinal composition data
#'
#' Simulate sparse observation from longitudinal compositional data \eqn{X}.
#'
# @usage
# Model2 <- function(n, p, m = 0, intercept = TRUE,
#                    interval = c(0, 1), ns = 100, obs_spar = 0.6, discrete = FALSE,
#                    SNR = 1, sigma = 2,
#                    rho_X, Corr_X = c("CorrAR", "CorrCS"),
#                    rho_W, Corr_W = c("CorrAR", "CorrCS"),
#                    Nzero_group = 4,
#                    range_beta = c(0.5, 1), range_beta_c = 1, beta_C ,
#                    theta.add = c(1, 2, 5, 6), gamma = 0.5,
#                    basis_W = c("bs", "OBasis", "fourier"), df_W = 5, degree_W = 3,
#                    basis_beta = c("bs", "OBasis", "fourier"),
#                    df_beta = 5, degree_beta = 3,
#                    insert = c("FALSE", "basis"), method = c("trapezoidal", "step"))
#
#'
#' @param n sample size
#'
#' @param p size of compositional predictors falling in \eqn{S^p}
#'
#' @param intercept including intercept or not to generate response variable, default is TRUE
#'
#' @param m size of time-invariant predictors. First \code{ceiling(m/2)} columns are generated by  \code{bin(1,0.5)} independently;
#'          latter \code{(m - ceiling(m/2))} columns are generated \code{norm(0, 1)} independently.
#'
#' @param ns \code{ns} is a scaler specifying length of equally spaced time sequence on domian \code{interval}.
#'
#' @param discrete is logical, specifying whether \eqn{X} is generated at different time points.
#'                 If \code{distrete = TRUE}, generate \eqn{X} on dense sequence created by \code{max(ns_dense = 1000, 5*ns)} and Then for
#'                 each subject, random sample \code{ns} points. recommend \code{ns < 200} when \code{distrete = TRUE}.
#'
#' @param interval a length 2 vector indicating time domain.
#'
#' @param obs_spar a percentage used to get sparse ovbservation. Each time point probability \code{obs_spar} to be observed. It allows different subject with
#'                 different observed time points and size.
#'                 \code{obs_spar * ns > 5} is required.
#'
#' @param SNR signal to noise ratio.
#'
#' @param sigma,rho_X,Corr_X,rho_W,Corr_W linear combination scaler \code{W}, \code{df_W*p}, is generated from from Multivariate Normal Distribution with mean 0's and
#'                                        ovariance matrix = \code{simga^2 * kronecker(Sigma_X, Sigma_W)}.
#'                                        \code{Corr_X} is correlation structure of \code{Sigma_X} with \eqn{\rho} = \code{rho_X},
#'                                        which controls canonical-correlation between groups for W;
#'                                        \code{Corr_W} is correlation structure of \code{Sigma_W} with \eqn{\rho} = \code{rho_W},
#'                                        which controls correlation within groups of W.
#'
#'
#' @param Nzero_group a even scaler. First \code{Nzero_group} compositional predictors are considered having none zero effect, while others are with 0 coefficients.
#'
#' @param range_beta  a sorted vector of length 2 used to generate coefficient matrix \code{B} for compositional predict, which is with demension \code{p*k}.
#'                    For each column of \code{B}, generate \code{Nzero_group/2} from unifom distribution with range \code{range_beta}, and together with their negatives
#'                    are ramdom assigned to the first \code{Nzero_group} rows.
#'
#' @param range_beta_c value of coefficients for beta0 and beta_c (coefficients for time-invariant predictors)
#' @param beta_C vectorized coefficients of coefficient matrix for compositional predictors. Could be missing.
#'
#' @param theta.add logical or numerical. If numerical, indicating which ones of compositional predictors of high level mean.
#'                  If logical, \code{c(1:ceiling(Nzero_group/2), Nzero_group + (1:ceiling(Nzero_group/2)))} are set to with high level mean
#'
#' @param gamma high level mean groups adding log(p * gamma) before convertint into compositional data, otherwise 0.
#'
#' @param basis_W,df_W,degree_W longitudinal compositional data is generated from linear combination of basis \eqn{\Psi(t)}, take exponetial and change into compositional data.
#'                              \itemize{
#'                                 \item \code{basis_W} is the basis function for \eqn{\Psi(t)}  - default is \code{"bs"}.
#'                                        Other choise are \code{"OBasis"} and \code{"fourier"};
#'                                 \item \code{df_W} is the degree of freedom for basis \eqn{\Psi(t)} - default is 10 ;
#'                                 \item \code{degree_W} the is degree for \eqn{\Psi(t)} - default is 3.
#'                               }
#'
#'@param basis_beta,df_beta,degree_beta coefficinet curve is generate by linear combination of basis \eqn{\Phi(t)}.
#'                                      \itemize{
#'                                           \item \code{basis_beta} is the basis function for \eqn{\Phi(t)} - default is \code{"bs"}.
#'                                                  Other choise are \code{"OBasis"} and \code{"fourier"};
#'                                            \item \code{df_beta} is the degree of freedom for basis \eqn{\Phi(t)} - default is 5;
#'                                            \item \code{degree_beta} is the degree for \eqn{\Phi(t)} - default is 3.
#'                                       }
#'
#' @param insert way to interpolation.
#'               \itemize{
#'                  \item \code{"FALSE"} no interpolation.
#'                  \item \code{"basis"} compositional data is considered as step function, imposing basis on un-observed time points for each subject.
#'               }
#'               Default is \code{"FALSE"}.
#'
#' @inheritParams FuncompCGL
#'
#' @return a list
#' \item{data}{a list, a vector \code{y} of response variable, a data frame \code{Comp} of sparse observation of longitudinal compositional data,
#'                     a matrix \code{Zc} of time-invariable predictors, a logtical \code{intercept}.}
#' \item{beta}{a length \code{p*df_beta + m + 1} vector for coefficients}
#' \item{basis.info}{ matrix for basis for beta, combining the first column as time sequence.}
#' \item{data.raw}{ a list, \code{Z_t.full} full observation of longitudinal compositional data,
#'                          \code{Z_ITG} integral for full observation of longitudinal compositional data,
#'                          \code{Y.tru} true response without noise,
#'                          \code{X} longitudinal before converting into compositional data,
#'                          \code{W} matrix of linear combination scalers, \code{n * (df_W * p)}.}
#'
#' \item{parameter}{ a list of parameters.}
#'
#'
#' @examples
#'
#' df_beta = 5
#' p = 20
#' Data <- Model2(n = 50, p = p, m = 2, intercept = TRUE, ns = 50, SNR = 1,
#'               rho_X = 0.1, rho_W = 0.2, df_W = 10, df_beta = df_beta, obs_spar = 0.5)
#' names(Data$data)
#' Data.test <- Model2(n = 50, p = p, m = 2, intercept = TRUE, ns = 50, SNR = 1,
#'                    rho_X = 0.1, rho_W = 0.2, df_W = 10, df_beta = df_beta, obs_spar = 0.5,
#'                    beta_C = Data$beta[1:(p*df_beta)])
#'
#'
#' @export
#'
#' @importFrom MASS mvrnorm
#' @importFrom plyr alply ldply
#' @importFrom utils tail
#'
Model2 <- function(n, p, m = 0, intercept = TRUE,
                   interval = c(0, 1), ns = 100, obs_spar = 0.6, discrete = FALSE,
                   SNR = 1, sigma = 2,
                   rho_X, Corr_X = c("CorrAR", "CorrCS"),
                   rho_W, Corr_W = c("CorrAR", "CorrCS"),
                   Nzero_group = 4,
                   range_beta = c(0.5, 1), range_beta_c = 1,
                   beta_C ,
                   theta.add = c(1, 2, 5, 6), gamma = 0.5,
                   basis_W = c("bs", "OBasis", "fourier"), df_W = 5, degree_W = 3,
                   basis_beta = c("bs", "OBasis", "fourier"), df_beta = 5, degree_beta = 3,
                   insert = c("FALSE", "basis"), method = c("trapezoidal", "step")) {



  #### Warnings ####
  Corr_X <- match.arg(Corr_X)
  Corr_W <- match.arg(Corr_W)
  basis_W <- match.arg(basis_W)
  insert <- match.arg(insert)
  method <- match.arg(method)
  basis_beta <- match.arg(basis_beta)
  this.call <- match.call()

  if(obs_spar > 1 || obs_spar < 0) stop("obs_spar is a percentage")

  if(basis_W %in% c("bs", "OBasis") && df_W < 4) {
    warning("With B-spline basis including intercept DF at least 4")
    df_W <- 4 }
  if(basis_beta %in% c("bs", "OBasis") && df_beta < 4) {
    warning("With B-spline basis including intercept DF at least 4")
    df_beta <- 4 }
  if(basis_W == "fourier" && df_W %% 2 == 0) {
    warning("With Fourier basis, DF needs to be odd")
    df_W <- df_W - 1 }
  if(basis_beta == "fourier" && df_beta %% 2 == 0) {
    warning("With Fourier basis, DF needs to be odd")
    df_beta <- df_beta - 1 }

  #### Warnings ####

  #### Coefficients:  beta_C generation.####
  ## beta_C: df = df_beta, degree = degree_beta
  if(missing(beta_C)) {
    C <- matrix(0, nrow = p, ncol = df_beta)
    for(j in 1:df_beta){
      col_input <- round(runif((Nzero_group/2), min = range_beta[1], max = range_beta[2]),
                         digits = 2)
      col_input <- c(col_input, -col_input)
      col_permuation <- sample(Nzero_group)
      C[1:Nzero_group, j] <- col_input[col_permuation]
    }
    beta_C <- as.vector(t(C))
  }

  #### Coefficients: beta and beta_c generation ####


  #### Mean pattern #####
  add.on <- 0
  if (theta.add && !is.numeric(theta.add)) {
    ## If true, first ceiling(Nzero_group/2) of the None-zero groups are set mean with log(p*gamma)
    ## first ceiling(Nzero_group/2) of the zero groups are set mean with log(p*gamma)
    add.on <- c(1:ceiling(Nzero_group/2), Nzero_group + (1:ceiling(Nzero_group/2)))
  } else if (is.numeric(theta.add)) {
    if(length(theta.add) > p) stop("None zero theta must be smaller than p")
    add.on <- theta.add
  }

  #### Mean pattern  #####


  #### time points and basis functions ####
  ns_dense <- ns
  if(discrete) ns_dense <- max(1000, 5*ns)
  sseq <- round(seq(0, 1, length.out = ns_dense), 10)

  if(ns * obs_spar < 5) stop("Too Sparse")

  W_nknots <- df_W - (degree_W + 1)
  if(W_nknots > 0) {
    W_knots <- ((1:W_nknots) / (W_nknots + 1)) * diff(interval) + interval[1]
  } else {
    W_knots <- NULL
  }

  beta_nknots <- df_beta - (degree_beta + 1)
  if(beta_nknots > 0) {
    beta_knots <- ((1:beta_nknots) / (beta_nknots + 1)) * diff(interval) + interval[1]
  } else {
    beta_knots <- NULL
  }


  beta.basis <- switch(basis_beta,
                       "bs" = bs(x = sseq, df = df_beta, degree = degree_beta,
                                 Boundary.knots = interval, intercept = TRUE),
                       "fourier" = eval.basis(sseq, basisobj = create.fourier.basis(rangeval = interval, nbasis = df_beta )),
                       "OBasis" = evaluate(OBasis(expand.knots(c(interval[1], beta_knots, interval[2])),
                                                  order = degree_beta + 1),
                                           sseq)

  )

  # W.basis <- switch(basis_W,
  #                   "bs" = bs(x = sseq, df = df_W, degree = degree_W,
  #                             Boundary.knots = interval, intercept = TRUE),
  #
  #                   "fourier" = eval.basis(sseq, basisobj = create.fourier.basis(rangeval = interval, nbasis = df_W )),
  #
  #                   "OBasis" = evaluate(OBasis(expand.knots(c(interval[1], W_knots, interval[2])),
  #                                              order = degree_W + 1),
  #                                       sseq)
  #
  # )

  #### time points and basis functions ####


  #### Correlation matrix: CorrMIX, of linear combination coefficients vector W ####
  ##X.sigma is between group (Compositional variables) correlation for W vector
  ##W.sigma is within group (linear combination coeffcients for each compsitional varaible) correlation for W vector
  ##CovMIX = sigma^2 * kronecker(X.Sigma, W.Sigma,)
  X.Sigma <- do.call(Corr_X, list(dim = p, rho = rho_X))
  ##Now use this for the correlation over time
  W.Sigma <- do.call(Corr_W, list(dim = ns_dense, rho = rho_W))
  CovMIX <- sigma^2 * kronecker(W.Sigma, X.Sigma) #sigma*kronecker(W.Sigma, X.Sigma)
  #### Correlation matrix: CorrMIX ####



  #### linear combination coefficients vector: W generation. Mean variation included ####
  #=================================================================================#
  #== Mean is log(gamma*p)/0 for X_j(t) if basis generated from bs with Intercept ==#
  #== since mean(X_j(t))= W.linear[t, ] %*% theat_j= log(gamma*p)/0 * sum(W.linear[t, ]) ==#
  #== sum(W.linear[t, ])= 1, other basis might not be true ==#
  #=================================================================================#

  mean_curve <- rep(0
                    #log(p*gamma / 2)
                    , times = p * df_W)
  group2 <- matrix(1:(df_W * p), nrow = df_W)
  b <- tail(1:p, floor(p/2))
  mean_curve[as.vector(group2[, add.on])] <- mean_curve[as.vector(group2[, add.on])] + log(p*gamma)

  W.linear <- mvrnorm(n, rep(0, p * ns_dense), CovMIX)
  #W.linear <- mvrnorm(n, mean_curve, CovMIX)
  #### linear combination coefficients vector: W ####


  #### longitinal composition: X.comp_full generation ####
  X <- array(dim = c(n, p, ns_dense), NA)
  dimnames(X)[[2]] <- paste0("X", 1:p)
  X.comp <- X
  I <- diag(p)
  for(s in 1:n) {

    ## X curve follows normal distribution
    #X[, , s] <- t( apply(W.linear, 1, function(x, W.Phi) W.Phi %*% x, W.Phi = W.Phi) ) #?
    X[s, ,] <- matrix(nrow = p, ncol = ns_dense, W.linear[s,],byrow = FALSE)
    #X[, add.on , s] <- X[, add.on , s] + log(gamma * p)
    ## X.comp follows a logistic normal distribution for each time point after exp(x(s))/sum(exp(x(s)))
  }

  for(s in 1:ns) {
    X.comp[, , s] <- exp(X[, , s])
    X.comp[, , s] <- X.comp[, , s] / rowSums(X.comp[, , s])
    #plot(X.comp[1, , s])
    ## Under threshold, measurement could not be detected and recorded as 0.
    ## If threshold=0, all measurement detected.
    ## 0's are replaced by replace value
    ##X.comp[, , s][X.comp[, , s] <= threshold] <- replace
  }

  #### longitinal composition: X.comp_full ####


  #### Integration:Z_ITG ####
  #X.comp_log <- log(X.comp)
  #DD <- alply(X.comp_log, .margins = 1,
  #           function(x, sseq) data.frame(t(rbind(TIME = sseq, x))),
  #           sseq = sseq )

  D <- alply(X.comp, .margins = 1,
             function(x, sseq) data.frame(t(rbind(TIME = sseq, x))),
             sseq = sseq )

  if(discrete) {
    D <- lapply(D, function(x, ns, ns_dense) {
      T.obs <- sample(ns_dense, ns)
      T.obs <- sort(T.obs)
      x <- x[T.obs, ]
      return(x)
    }, ns = ns, ns_dense = ns_dense)
  }
  Z_t.full <- ldply(D[1:n], data.frame, .id = "Subject_ID")


  Z_ITG <- sapply(D, function(x, sseq, beta.basis, insert, method){
    x[, -1] <- log(x[, -1])
    ITG(X = x, basis = beta.basis, sseq = sseq, insert = insert, method = method)$integral
  } ,sseq = sseq, beta.basis = beta.basis, insert = insert, method = method)

  Z_ITG <- t(Z_ITG)
  #### Integration:Z_ITG ####

  #### Control varaible combining Intercept: Z_c, generation ####
  ## beta_c and beta0
  beta0 <- range_beta_c

  if(m > 0) {
    Z_control <- matrix(NA, nrow = n, ncol = m)

    ## ceiling(m/2) is generated by norm;
    ## (m - ceiling(m/2)) is generated by binary with prob = 0.5
    Z_control[, (floor(m/2) + 1):m] <- matrix(rnorm(n * length((floor(m/2) + 1):m)),
                                              nrow = n)
    Z_control[, -((floor(m/2) + 1):m)] <- matrix(sample(c(0,1), n * floor(m/2), replace = TRUE, prob = c(0.5, 0.5)),
                                                 nrow = n)
    ## Combine Intercept with control variables
    beta_c <- rep(range_beta_c, times = m)
  } else {
    Z_control <- matrix(0, nrow = n, ncol = 1)
    beta_c <- 0
  }
  #### Control varaible combining Intercept ####

  #### Response: y, generation ####
  Y.tru <- Z_control %*% beta_c + Z_ITG %*% beta_C + ifelse(intercept, beta0, 0)
  sd_ratio <- sd( Z_control * beta_c) / sd( Z_ITG %*% beta_C)
  error <- rnorm(n, 0, 1)
  sigma_e <- sd(Y.tru) / (sd(error) * SNR)
  error <- sigma_e*error
  Y.obs <- Y.tru + error
  y_sd <- sd(Y.obs)
  #### Response: y ####


  #### Compositioanal Data output: Z_t.full and Z_t.obs ####


  ## Each time point is with prob obs_spar to be observed, #obs follows bin(n, obs_spar)
  ## Different Subject with varying #observations and observed time points

  if(obs_spar == 1) {
    Z_t.obs <- Z_t.full
  } else {
    Z_t.obs <- lapply(D, function(x, obs_spar) {
      n <- dim(x)[1]
      #lambda <- obs_spar * n
      #n.obs <- rpois(1, lambda)
      #T.obs <- sample(n, size = n.obs)
      T.obs <- replicate(n, sample(c(0,1), 1, prob = c(1 - obs_spar, obs_spar)))
      T.obs <- which(T.obs == 1)
      x.obs <- x[T.obs, ]
      return(x.obs)
    }, obs_spar = obs_spar)

    Z_t.obs <- plyr::ldply(Z_t.obs, data.frame, .id = "Subject_ID")
  }
  #### Compositioanal Data output: Z_t.full and Z_t.obs ####

  #### Output ####
  if(m == 0) {
    Z_control <- NULL
    beta_c <- NULL}

  data <- list(y = Y.obs, Comp = Z_t.obs, Zc = Z_control, intercept = intercept)
  beta <- c(beta_C, beta_c, ifelse(intercept, beta0, 0))
  data.raw <- list(Z_t.full = Z_t.full, Z_ITG = Z_ITG,
                   Y.tru = Y.tru, X = X, W = W.linear)
  basis.info <- cbind(sseq, beta.basis)
  parameter <- list(p = p, n = n, m = m,
                    k = df_beta, Jx = df_W, ns = ns, basis_W = basis_W, basis_beta = basis_beta,
                    rho_X = rho_X, rho_W = rho_W, Corr_X = Corr_X, Corr_W = Corr_W, sigma = sigma,
                    degree_W = degree_W, degree_beta = degree_beta,
                    SNR = SNR,  sigma_e = sigma_e, sd_ratio = sd_ratio, y_sd = y_sd,
                    theta.add = add.on, obs_spar = obs_spar)

  output <- list(data = data, beta = beta, basis.info = basis.info, data.raw = data.raw,
                 parameter = parameter)
  output$call <- this.call
  #### Output ####

  return(output)
}




#'
#' Simulation for composition data
#'
#' Simulate compositional data.
#'
#' @param n sample size
#'
#' @param p size of compositional predictors falling in \eqn{S^p}
#'
#' @param intercept including intercept or not to generate response variable. Default value is FALSE
#'
#' @param rho a scaler used to generate \code{p*p} autocorrelation matrix
#'
#' @param sigma standard deviation for noise terms, which follows iid norm distribution with mean 0
#'
#' @param gamma,add.on To generate compsotional data, we first generate \code{n*p} matrix \code{X} from multivariate normal distribution and
#'                     than transform it to compostional data.
#'                     \code{gamma} is a scaler and add.on is a index vector. Assign \code{log(p)*gamma} to means of \code{X} for columns add.on ,
#'                     while others with 0's.
#'
#' @param beta coefficients for composition variables
#'
#' @param beta0 coefficient for intercept
#'
#'
#' @details
#' The setup of this simulation follows Variable selection in regression with compositional covariates by WEI LIN, PIXU SHI, RUI FENG AND HONGZHE LI.
#'
#' @examples
#'
#' n = 50
#' p = 30
#' rho = 0.2
#' sigma = 0.5
#' gamma = 0.5
#' add.on = 1:5
#' beta = c(1, -0.8, 0.6, 0, 0, -1.5, -0.5, 1.2)
#' beta = c( beta, rep(0, times = p - length(beta)) )
#' intercept = FALSE
#' Comp_data = comp_simulation(n = n, p = p, rho = rho, sigma = sigma, gamma  = gamma, add.on = add.on,
#'                             beta = beta, intercept = intercept)
#'
#' @export
#'
#' @importFrom MASS mvrnorm
#'


comp_simulation <- function(n, p, rho = 0.2, sigma = 0.5, gamma = 0.5, add.on = 1:5,
                            beta = c( c(1, -0.8, 0.6, 0, 0, -1.5, -0.5, 1.2), rep(0, times = p - 8) ),
                            beta0  = 1, intercept = FALSE) {

  SIGMA <- CorrAR(p, rho)
  theta <- rep(0, times = p)
  theta[add.on] <- log(gamma * p)
  W <- mvrnorm(n, theta, SIGMA)
  X <- exp(W)
  X.comp <- X / rowSums(X)
  Z <- log(X.comp)
  Y.tru <- Z %*% beta + as.integer(intercept)*beta0
  y <- Y.tru + rnorm(n, 0, sigma)
  data <- list(y = y, Z = Z, Zc = NULL, intercept = intercept, beta = beta, X.comp = X.comp)
  return(data)
}