R/WAPL.R

Defines functions predict.gSAM

Documented in predict.gSAM

#' Group-Sparse Additive Machine
#' A group version of Sparse Additive Machine
#'  @param X, n by p Covariate matrix
#'  @param y, Treatment assigned, length n vector
#'  @param w, instance weight
#'  @param p: integer, total number of basis
#'  @param lambda: A user supplied lambda sequence(ordered in decreasing value). Normally set it to null so the computing algorithm calculate it automatically.
#'  @param nlambda: number of lambda, default is 50
#'  @param lambda.min.ratio: the ration between max lambda and minimal lambda
#'  @param group: group information, should be consective, default is NULL i.e. no group information presents
#'  @param thol: Stopping precision. The default value is 1e-5.
#'  @param mu: Smoothing parameter used in approximate the Hinge Loss. The default value is 0.05.
#'  @param max.ite: The number of maximum iterations. The default value is 1e5
#'  @import gglasso
#'  @import glmnet
#'  @import SAM
#'  @export
#'  @return fitted ITR, an object of "gSAM" class
#'  @examples
#'  train.data <- gSim(N=200, sigma=0, scenario=1)
#'  H <- train.data[[1]]
#'  A <- train.data[[2]]
#'  R2 <- train.data[[3]]
#'  group=rep(1:20, each=3)
#'  tst = gSAM(X=H, y=A, w=R2, p=3, prop=rep(1,200), pentype = "lasso",lambda.min.ratio=0.2, m=7, group= group, plist=c(3:5))
gSAM <- function (X, y, w = NULL, p = 3, lambda = NULL, nlambda = 50, lambda.min.ratio = 0.2, group=NULL,
                  thol = 1e-05, mu = 0.05, max.ite = 1e+05)
{
  gcinfo(FALSE)
  fit = list()

  X = as.matrix(X)
  y = as.vector(y)
  n = nrow(X)
  d = ncol(X)

  m = d * p
  if (is.null(w)) {
    w = rep(1, n)
  }
  np = sum(y == 1)
  nn = sum(y == -1)
  if ((np + nn) != n) {
    cat("Please check the labels. (Must be coded in 1 and -1)")
    fit = "Please check the labels."
    return(fit)
  }
  if (np > nn)
    a0 = 1 - nn/np * mu
  else a0 = np/nn * mu - 1

  X.min = apply(X, 2, min)
  X.max = apply(X, 2, max)
  X.ran = X.max - X.min
  X.min.rep = matrix(rep(X.min, n), nrow = n, byrow = T)
  X.ran.rep = matrix(rep(X.ran, n), nrow = n, byrow = T)
  X = (X - X.min.rep)/X.ran.rep
  fit$X.min = X.min
  fit$X.ran = X.ran
  Z = matrix(0, n, m)
  fit$nkots = matrix(0, p - 1, d)
  fit$Boundary.knots = matrix(0, 2, d)
  for (j in 1:d) {
    tmp = (j - 1) * p + c(1:p)
    tmp0 = ns(X[, j], df = p)
    Z[, tmp] = tmp0
    fit$nkots[, j] = attr(tmp0, "knots")
    fit$Boundary.knots[, j] = attr(tmp0, "Boundary.knots")
  }

  if(!is.null(group)){
    tb = table(group)
  }else{tb= NULL}

  if (is.null(group) || length(tb)==d){
    ##suppose no group info.
    print("Consider No Group Effect")
    Z = cbind(matrix(rep(y, m), n, m) * Z, y)
    if (is.null(lambda)) {
      u = cbind((rep(1, n) - a0 * y)/mu, rep(0, n), rep(1, n))
      u = apply(u, 1, median)
      if (is.null(nlambda))
        nlambda = 50
      lambda_max = max(sqrt(colSums(matrix(t(Z[, 1:(p * d)]) %*%  u, p, d)^2)))
      lambda = exp(seq(log(1), log(lambda.min.ratio), length = nlambda)) *  lambda_max

    }
    else nlambda = length(lambda)
    L0 = norm(Z, "f")^2/mu
    out = .C("grpSVM", Z = as.double(Z), w = as.double(w), lambda = as.double(lambda),
             nnlambda = as.integer(nlambda), LL0 = as.double(L0),
             nn = as.integer(n), dd = as.integer(d), pp = as.integer(p),
             aa0 = as.double(a0), xx = as.double(matrix(0, m + 1,
                                                        nlambda)), mmu = as.double(mu), mmax_ite = as.integer(max.ite),
             tthol = as.double(thol), aalpha = as.double(0.5), df = as.double(rep(0,
                                                                                  nlambda)), func_norm = as.double(matrix(0, d, nlambda)),
             package = "SAM")

    fit$w = matrix(out$xx, ncol = nlambda)
  } else{
    if (length(group) != d){
      cat("Please check the group sizes (Must be same as the number of x)")
      fit = "Please check the group lables."
      return(fit)
    }
    print("Consider Group Effect")
    maxsize <- max(tb)
    ngroup <- length(tb)

    Z <- reformX(z=Z, group=group, maxsize=maxsize, ngroup=ngroup, d, p)
    Z <- cbind(matrix(rep(y, ngroup*maxsize*p), n,  ngroup*maxsize*p) * Z, y)
    if (is.null(lambda)) {
      u = cbind((rep(1, n) - a0 * y)/mu, rep(0, n), rep(1, n))
      u = apply(u, 1, median)
      if (is.null(nlambda))
        nlambda = 50
      lambda_max = max(sqrt(colSums(matrix(t(Z[, 1:( ngroup*maxsize*p)]) %*%  u, maxsize*p, ngroup)^2)))
      lambda = exp(seq(log(1), log(lambda.min.ratio), length = nlambda)) * lambda_max
    }
    else nlambda = length(lambda)
    L0 = norm(Z, "f")^2/mu

    out =.C("grpSVM", Z = as.double(Z), w = as.double(w), lambda = as.double(lambda),
            nnlambda = as.integer(nlambda), LL0 = as.double(L0),
            nn = as.integer(n), dd = as.integer(ngroup), pp = as.integer(maxsize * p),
            aa0 = as.double(a0), xx = as.double(matrix(0,  ngroup* maxsize * p + 1,
                                                       nlambda)), mmu = as.double(mu), mmax_ite = as.integer(max.ite),
            tthol = as.double(thol), aalpha = as.double(0.5), df = as.double(rep(0, nlambda)), func_norm = as.double(matrix(0, d, nlambda)),
            package = "SAM")
    fit$w = DeformCoef(newW=matrix(out$xx, ncol = nlambda), group=group,
                       maxsize=maxsize, ngroup=ngroup, d=d, p=p)
  }
  fit$p = p
  fit$lambda = out$lambda
  fit$df = out$df
  fit$func_norm = matrix(out$func_norm, ncol = nlambda)
  fit$group = group
  rm(out, X, y, Z, X.min.rep, X.ran.rep)
  class(fit) = "gSAM"
  return(fit)
}


#'predict function for gSAM object
#' @param object: model fitted using gSAM function
#' @param newdata: n by p characteristics matrix
#' @return  a list contians the contrast function and the estimation optimal treatment
predict.gSAM <- function(object, newdata, thol = 0, ...){
  gcinfo(FALSE)
  out = list()
  nt = nrow(newdata)
  d = ncol(newdata)
  X.min.rep = matrix(rep(object$X.min,nt),nrow=nt,byrow=T)
  X.ran.rep = matrix(rep(object$X.ran,nt),nrow=nt,byrow=T)
  group = object$group
  p=object$p

  newdata = (newdata-X.min.rep)/X.ran.rep
  newdata = pmax(newdata,0)
  newdata = pmin(newdata,1)

  #m = object$p*d
  Zt = matrix(0, nt, p*d)
  for(j in 1:d){
    tmp = (j-1)*p + c(1:p)
    Zt[,tmp] = ns(newdata[,j],df=object$p,knots=object$knots[,j],Boundary.knots=object$Boundary.knots[,j])
  }

  out$values = cbind(Zt,rep(1,nt))%*% as.matrix(object$w)
  out$labels = sign(out$values-0)

  rm(Zt,newdata)

  return(out)
}
sambiostat/WAPL documentation built on May 26, 2020, 12:17 a.m.