R/sim_MOTTE_data.R

Defines functions create.Z create.B sim_MOTTE_data

Documented in create.B create.Z sim_MOTTE_data

#' Simulate MOTTE formated data
#'
#' @param n.train An integer value, sample size for training data
#' @param n.test An integer value, sample size for testing data
#' @param p An integer value, the dimensionality of covaraites (X)
#' @param q An integer value, the dimensionality of responses (Y)
#' @param ratio An fraction value, the ratio between treatment groups
#' @param cov.mat Covariance matrix for X.b
#' @param trt.f character, the name of treatment effect funciton
#' @param link.f character, the name of treatment effect funciton
#' @param B matrix, the coefficient matrix used in the trt.f
#' @param Z matrix, the coefficient matrix used in the link.f
#'
#' @return A nested list that contain training data and testing data. TODO: add more introdcution to whats in the list
#'
#' @export
#'
#' @importFrom MASS mvrnorm
#' @importFrom stats rbinom
#'
#' @examples
#' set.seed(1)
#'
#' n.train = 400
#' n.test = 1000
#' p = 10
#' q = 3
#' ratio = 0.5
#' cov.mat = diag(p)
#' trt.f = "Polynomial"
#' link.f = "Polynomial"
#' B = create.B(p)
#' Z = create.Z(p,q)
#'
#' # B <- create.B(10)
#' # Z <- create.Z(10, 3)
#'
#' sim.dat <- sim_MOTTE_data( n.train = 500, n.test = 200,
#' p = 10, q = 3, ratio = 0.5,
#' B = B, Z = Z)
#'




# TODO: setup the magnitude for the errors
sim_MOTTE_data <- function(
  n.train = 500, # <-  500
  n.test = 200, # <- 200
  p = 10, #  <- 10
  q = 3, #  <- 3
  ratio = 0.5, # <- 0.5,
  # TODO: set up error prevention for cov.mat, i.e. measuring the dimension
  cov.mat = diag(p),
  # TODO: incorporate the linear and polynormial for both treat.f and link.f in the code
  trt.f = c("Linear", "Polynomial", "Box"),
  link.f = c("Linear", "Polynomial"),
  B ,
  Z
){

  # TODO: extract this as argument for the parameter
  c.x <- sum((1:3)^2)/6
  c.y <- sum((1:3)^2)/6

  # c.x <- 0.5
  # c.y <- 0.5
  trt.f <- trt.f[[1]]
  link.f <- link.f[[1]]

  # TODO: Improve the warning language
  .link.f <- switch(link.f,
                    "Linear" = function(x){x%*%Z/5},
                    "Polynomial" = function(x){(x^2)%*%Z/5},
                    stop("Link function doesn't exist, choose from 'Linear' or 'Polynomial'"))
  .trt.f <- switch(trt.f,
                   "Linear" = function(x, trt){sweep(cbind(1,x), 1, trt, "*")%*%B},
                   # "Polynomial" = function(x, trt){(sweep(cbind(1,x^2), 1, trt, "*"))%*%B},
                    "Polynomial" = function(x, trt){(sweep((cbind(1,x)%*%B) + (cbind(1,x)%*%B)^2, 1, trt, "*"))},
                   "Box" = function(x, trt){
                     .x <- x
                     for (i in 1: nrow(.x)) {
                       if(abs(x[i,1])<1 & abs(x[i,2])<1) .x[i, 1:3] <- 0
                       if(abs(x[i,4])<1 & abs(x[i,5])<1) .x[i, 4:6] <- 0
                       if(abs(x[i,7])<1 & abs(x[i,8])<1) .x[i, 7:9] <- 0
                     }
                     sweep(cbind(1,.x), 1, trt, "*") %*% B
                   },
                   stop("Trt.f doesn't exist, choose from 'Linear' or 'Polynomial' or 'Box'")
  )

  # Simulate the binary treatment assignment for training data
  # Clarification, when trt1, X.e = X.b + X.b%*%B and trt2, X.2 =X.b - X.b%*%B
  Trt.lvls <- c("Trt 1", "Trt 2")
  Trt.train <- factor(Trt.lvls[rbinom(n.train, 1, ratio)+1])

  # Simulate x.b
  X.train.base <- MASS::mvrnorm(n.train,rep(0,p), cov.mat)
  X.train.end <- X.train.base + .trt.f(X.train.base, ifelse(Trt.train=="Trt 1", 1, -1)) + mvrnorm(n.train, rep(0,p), c.x*diag(p))

  Y.train.base <- .link.f(X.train.base) + mvrnorm(n.train, rep(0,q), c.y*diag(q))
  Y.train.end <- .link.f(X.train.end) + mvrnorm(n.train, rep(0,q), c.y*diag(q))


  ####################################
  # Simulate Testing data
  #
  ####################################

  #X.test.base  <- mvrnorm(n.test,rep(0,p),x.sig)
  X.test.base  <- MASS::mvrnorm(n.test,rep(0,p), cov.mat)
  # With/Without treatment X.end
  X.test.trt1.end <-  X.test.base + .trt.f(X.test.base, 1)
  X.test.trt2.end <-  X.test.base + .trt.f(X.test.base, -1)


  # With/Without treatment Y.base
  Y.test.trt1.end <- .link.f(X.test.trt1.end)
  Y.test.trt2.end <- .link.f(X.test.trt2.end)

  # TODO: add return list
  return(
    list(
      train = list(x.b = X.train.base,
                   x.e = X.train.end,
                   trt = Trt.train,
                   y.b = Y.train.base,
                   y.e = Y.train.end),
      test = list(
        x.b = X.test.base,
        y.e.1 = Y.test.trt1.end,
        y.e.2 = Y.test.trt2.end
      )
    )
  )
}

#' Title
#' #TODO: improve the documentation
#' @param p The dimension of the design matrix
#'
#' @return A matrix
#' @export
#'
#' @examples
#' create.B(10)

create.B <- function(p, intercept=T){
  if(p < 9)
    stop("Minimum value for p is 9")
  rbind(ifelse(intercept, 1, 0),
        cbind(
          matrix(
            c(1:3/sqrt(3), rep(0, p-3),
              rep(0,3), 1:3/sqrt(3), rep(0, p-6),
              rep(0,6), 1:3/sqrt(3), rep(0, p-9)),
            nrow = p, ncol = 3),
          matrix(
            c(c(1,-1,-1)*1:3/sqrt(3), rep(0, p-3),
              rep(0,3), c(1,-1,-1)*1:3/sqrt(3), rep(0, p-6),
              rep(0,6), c(1,-1,-1)*1:3/sqrt(3), rep(0, p-9)),
            nrow = p, ncol = 3),
          matrix(
            c(c(1,-1,1)*(1:3)/sqrt(3), rep(0, p-3),
              rep(0,3), c(1,-1,1)*(1:3)/sqrt(3), rep(0, p-6),
              rep(0,6), c(1,-1,1)*(1:3)/sqrt(3), rep(0, p-9)),
            nrow = p, ncol = 3),
          matrix(0,nrow = p, ncol = p-9)
        )
  )
}

#' Title
#' #TODO: improve the documentation
#' @param p The number of rows
#' @param q The number of columns
#'
#' @return A design matrix
#' @export
#'
#' @examples
#' create.Z(10,3)
create.Z <- function(p, q){
  matrix(
    c(1:3/sqrt(7), rep(0, p-3),
      rep(0,3), -1*(1:3)/sqrt(7), rep(0, p-6),
      rep(0,6), c(1,-1,1)*(1:3)/sqrt(7), rep(0, p-9),
      rep(0, p*(q-3))
    ),
    nrow = p, ncol = q
  )
}
boyiguo1/MOTTE.RF documentation built on June 14, 2020, 4:12 p.m.