R/mdm_ica.R

#' Independent Component Analysis via Mutual Dependence Measures
#'
#' \code{mdm_ica} performs independent component analysis by minimizing mutual dependence measures 
#' of all univariate components in \code{X}.
#'
#' @param X A matrix or data frame, where rows represent samples, and columns represent components.
#' @param num_lhs The number of points generated by Latin hypercube sampling.
#'   If omitted, an adaptive number is used.
#' @param type The type of mutual dependence measures, including
#' \itemize{
#'   \item \code{asym}: asymmetric measure \eqn{\mathcal{R}_n} based on distance covariance 
#'     \eqn{\mathcal{V}_n};
#'   \item \code{sym}: symmetric measure \eqn{\mathcal{S}_n} based on distance covariance 
#'     \eqn{\mathcal{V}_n};
#'   \item \code{comp}: simplified complete measure \eqn{\mathcal{Q}_n^\star} based on 
#'     incomplete V-statistics;
#'   \item \code{dhsic}: d-variable Hilbert--Schmidt independence criterion dHSIC\eqn{_n} based on 
#'     Hilbert--Schmidt independence criterion HSIC\eqn{_n}. 
#' }
#' @param num_bo The number of points evaluated by Bayesian optimization.
#' @param kernel The kernel of the underlying Gaussian process in Bayesian optimization, including
#' \itemize{
#'   \item \code{exp}: squared exponential kernel;
#'   \item \code{mat}: Matern 5/2 kernel.
#' }
#' @param algo The algorithm of optimization, including
#' \itemize{
#'   \item \code{def}: deflation algorithm, where the components are extracted one at a time;
#'   \item \code{par}: parallel algorithm, where the components are extracted simultaneously.
#' }
#'      
#' @return \code{mdm_ica} returns a list including the following components:
#' \item{theta}{The rotation angles of the estimated unmixing matrix.}
#' \item{W}{The estimated unmixing matrix.}
#' \item{obj}{The objective value of the estimated independence components.}
#' \item{S}{The estimated independence components.}
#'
#' @references Jin, Z., and Matteson, D. S. (2017).
#'   Generalizing Distance Covariance to Measure and Test Multivariate Mutual Dependence.
#'   arXiv preprint arXiv:1709.02532.
#'   \url{https://arxiv.org/abs/1709.02532}.
#' @references Pfister, N., et al. (2018).
#'   Kernel-based tests for joint independence.
#'   Journal of the Royal Statistical Society: Series B (Statistical Methodology), 80(1), 5-31.
#'   \url{http://dx.doi.org/10.1111/rssb.12235}.
#' 
#' @importFrom energy dcov
#' @importFrom MDMeasure mdm
#' @importFrom dHSIC dhsic
#' @importFrom rBayesianOptimization BayesianOptimization
#' 
#' @include functions.R
#'
#' @export
#'
#' @examples
#' # X is a 10 x 3 matrix with 10 samples and 3 components
#' X <- matrix(rnorm(10 * 3), 10, 3)
#'
#' # deflation algorithm
#' mdm_ica(X, type = "asym", algo = "def")
#' # parallel algorithm
#' mdm_ica(X, type = "asym", algo = "par")
#' 
#' \dontrun{
#' # bayesian optimization with exponential kernel
#' mdm_ica(X, type = "sym", num_bo = 1, kernel = "exp", algo = "par")
#' # bayesian optimization with matern kernel
#' mdm_ica(X, type = "comp", num_bo = 1, kernel = "mat", algo = "par")
#' }

mdm_ica <- function(X, num_lhs = NULL, type = "comp", num_bo = NULL, kernel = "exp", algo = "par") 
{
  X <- as.matrix(X)
  num_obs <- nrow(X)
  num_comp <- ncol(X)

  X <- scale(X, center = TRUE, scale = FALSE)
  H <- sqrt_inv(cov(X))
  Z <- X %*% t(H)
  
  if (is.null(num_lhs)) {
    num_lhs <- 10 * num_comp
  }
      
  theta_comb <- latin_hc_samp(num_comp, num_lhs)
  theta_list <- theta_comb$l
  theta_mat <- theta_comb$m
  W_list <- lapply(theta_list, theta_to_W)
  W_list <- lapply(W_list, t)
  S_list <- lapply(W_list, FUN = function(W) Z %*% W)

  if (type == "asym") {
    obj_list <- unlist(lapply(S_list, asym_obj))
    if (is.null(num_bo)) {
      theta_init <- theta_list[[which(obj_list == min(obj_list))[1]]]
    } else {
      theta_init <- bayes_opt(Z, theta_eva = theta_mat, obj_eva = obj_list, type = type, 
        num_bo = num_bo, kernel = kernel)$theta
    }
  } else if (type == "sym") {
    obj_list <- unlist(lapply(S_list, sym_obj))
    if (is.null(num_bo)) {
      theta_init <- theta_list[[which(obj_list == min(obj_list))[1]]]
    } else {
      theta_init <- bayes_opt(Z, theta_eva = theta_mat, obj_eva = obj_list, type = type, 
        num_bo = num_bo, kernel = kernel)$theta
    }
  } else if (type == "comp") {
    obj_list <- unlist(lapply(S_list, comp_obj))
    if (is.null(num_bo)) {
      theta_init <- theta_list[[which(obj_list == min(obj_list))[1]]]
    } else {
      theta_init <- bayes_opt(Z, theta_eva = theta_mat, obj_eva = obj_list, type = type, 
        num_bo = num_bo, kernel = kernel)$theta
    }
  } else if (type == "dhsic") {
    obj_list <- unlist(lapply(S_list, dhsic_obj))
    if (is.null(num_bo)) {
      theta_init <- theta_list[[which(obj_list == min(obj_list))[1]]]
    } else {
      theta_init <- bayes_opt(Z, theta_eva = theta_mat, obj_eva = obj_list, type = type, 
        num_bo = num_bo, kernel = kernel)$theta
    }
  } else {
    stop("Invalid type. Read ?mdm_ica for proper syntax.")
  }

  out <- mdm_ica_opt(Z, theta_init = theta_init, type = type, algo = algo)
  
  return(out)
}

# input Z should be prewhitened
mdm_ica_opt <- function(Z, theta_init = NULL, type, algo) {
  d <- dim(Z)[2]
  p <- d * (d - 1) / 2
  
  if (is.null(theta_init)) {
    theta_init <- numeric(p)
  }

  if (p != length(theta_init)) {
    stop("theta_init must have length d * (d - 1) / 2.")  
  }
  
  if (algo == "def") {
    index <- seq(d - 1, 1)
    cum_index <- c(0, cumsum(index))
    theta_hat <- theta_init  
    obj <- 0
    for (i in 1:(d - 1)) {
      l <- cum_index[i] + 1
      u <- cum_index[i + 1]
      theta_hat[l:u] <- rep(NA, index[i])
      p_init <- theta_init[l:u] 
      fun <- mdm_ica_def_obj(Z = Z, index = i, theta = theta_hat, type = type)
      opt <- nlm(f = fun, p = p_init)
      obj <- obj + opt$minimum
      theta_hat[l:u] <- opt$estimate
    }
  } else if (algo == "par") {
    fun <- mdm_ica_par_obj(Z = Z, type = type)
    opt <- nlm(f = fun, p = theta_init)
    obj <- opt$minimum
    theta_hat <- opt$estimate
  } else {
    stop("Invalid algo. Read ?mdm_ica for proper syntax.")
  }

  W_hat <- theta_to_W(theta_hat)
  S_hat <- Z %*% t(W_hat)

  return(list(theta = theta_hat, W = W_hat, obj = obj, S = S_hat))
}

mdm_ica_def_obj <- function(Z, index, theta, type) {
  if (type == "asym") {
    function(p) {
      theta[is.na(theta)] <- p
      d <- dim(Z)[2]
      W <- theta_to_W(theta)
      S <- Z %*% t(W)
      # (1) full method
      # asym_obj(S)
      # (2) cumu method
      # val <- 0
      # for (j in index:(d - 1)) {
      #   val <- val + dcov(S[, j], S[, (j + 1):d])^2
      # }
      # val
      # (3) curr method
      dcov(S[, index], S[, (index + 1):d])^2
    }
  } else {
    stop("algo = \"def\" is only for type = \"asym\".")
  }
}

mdm_ica_par_obj <- function(Z, type) {
  if (type == "asym") {
    function(p) {
      W <- theta_to_W(p)
      S <- Z %*% t(W) 
      asym_obj(S)
    }
  } else if (type == "sym") {
    function(p) {
      W <- theta_to_W(p)
      S <- Z %*% t(W)
      sym_obj(S)
    }
  } else if (type == "comp") {
    function(p) {
      W <- theta_to_W(p)
      S <- Z %*% t(W)
      comp_obj(S)
    }
  } else if (type == "dhsic") {
    function(p) {
      W <- theta_to_W(p)
      S <- Z %*% t(W)
      dhsic_obj(S)
    }
  } else {
    stop("Invalid type. Read ?mdm_ica for proper syntax.")
  }
}

bayes_opt <- function(Z, theta_eva, obj_eva, type, num_bo, kernel) {
  d <- dim(Z)[2]
  p <- d * (d - 1) / 2
  
  eval(parse(text = paste(
    "bayes_opt_obj <- function(Z) {
    function(", paste(paste("p", 1:p, sep = ""), collapse = ", "), ") {
    theta <- c(", paste(paste("p", 1:p, sep = ""), collapse = ", "), ")
    sizeZ <- dim(Z)
    n <- sizeZ[1]
    d <- sizeZ[2]
    W <- theta_to_W(theta)
    SH <- (Z %*% t(W))
    list(", 
    if (type == "asym") {
      "Score = -asym_obj(SH), Pred = 0)\n"
    } else if (type == "sym") {
      "Score = -sym_obj(SH), Pred = 0)\n"
    } else if (type == "comp") {
      "Score = -comp_obj(SH), Pred = 0)\n"
    } else if (type == "dhsic") {
      "Score = -dhsic_obj(SH), Pred = 0)\n"
    } else {
      stop("Invalid type. Read ?mdm_ica for proper syntax.")
    },
    "}\n", 
    "}",
    sep = "")))
  
  obj <- bayes_opt_obj(Z = Z)
  
  init <- data.frame(p = theta_eva, Value = -obj_eva)
  colnames(init) <- c(paste("p", 1:p, sep = ""), "Value")

  if (p >= d) {
    eval(parse(text = paste("bounds <- list(", 
                            paste(c(paste("p", 1:(d-1), " = c(0, 2 * pi)", sep = ""), 
                                    paste("p", d:p, " = c(0, pi)", sep = "")), collapse = ","),
                            ")", sep = "")))
  } else {
    eval(parse(text = paste("bounds <- list(", 
                            paste(paste("p", 1:(d-1), " = c(0, 2 * pi)", sep = ""), collapse = ","),
                            ")", sep = "")))
  }
  
  if (kernel == "exp") {
    out <- BayesianOptimization(obj,
                                bounds = bounds,
                                init_grid_dt = init, n_iter = num_bo,
                                acq = "ei", eps = 0, verbose = FALSE,
                                kernel = list(type = "exponential", power = 2))
    
  } else if (kernel == "mat") {
    out <- BayesianOptimization(obj,
                                bounds = bounds,
                                init_grid_dt = init, n_iter = num_bo,
                                acq = "ei", eps = 0, verbose = FALSE,
                                kernel = list(type = "matern", nu = 5/2))
  } else {
    stop("Invalid kernel. Read ?mdm_ica for proper syntax.")
  }
  
  obj <- -out$Best_Value
  theta.hat <- out$Best_Par
  W.hat <- theta_to_W(theta.hat)

  return(list(theta = theta.hat, W = W.hat, obj = obj))
}
zejin/MDMICA documentation built on May 14, 2019, 10:36 p.m.