Nothing
#' Multimodal independent component analysis
#'
#' The input datasets are assumed to be two matrices sharing the column space.
#' MICA decomposes the matrices simutanously
#' and extracts the components that maximizes the mutual information
#' between the components.
#' @param X A matrix sharing the column space with Y (??? x N)
#' @param Y A matrix sharing the column space with X (??? x N)
#' @param J The rank parameter to decompose the matrices
#' @param eta A learning rate parameter of stochastic gradient descent
#' @param verbose Verbose option
#' @param mu A learning rate parameter of stochastic gradient descent
#' @param gamma_ts Weighting factor for dependence on independence
#' @return A list containing the result of the decomposition
#' @examples
#' X <- array(runif(10*20), dim=c(10,20))
#' Y <- array(runif(15*20), dim=c(15,20))
#' J <- 20
#' out <- MICA(X, Y, J=J)
#' @importFrom stats rnorm
#' @export
MICA <- function(X, Y, J, eta=1000*1e-4, verbose=FALSE,
mu=50.0*1e-4, gamma_ts=1.0){
###########
# gamma_ts example:
# gamma_ts <- 0.2 / (c(1:nSample) + 1) # used in simulation study of MICA paper
# gamma_ts <- rep(0, nSample) # only independence is evaluated.
# gamma_ts <- rep(1, nSample) # only dependence is evaluated.
######################################
# Argument Check
######################################
.checkMICA(X, Y, J, eta, verbose)
######################################
# Initialization (e.g. CCA)
######################################
int <- .initMICA(X, Y, J)
A <- int$A
B <- int$B
X <- int$XScaled
Y <- int$YScaled
nSample <- nrow(X)
N <- J # Match the notation for MICA paper.
ABChange <- 1
# Time varying learning rate for online estimation of statistics
if (length(mu) < 2){
# if constant mu is given, convert it to timeseries.
mu <- rep(mu, nSample)
}
# Weighting factor for dependence on independence
if (length(gamma_ts) < 2){
# if constant gamma_ts is given, convert it to timeseries.
gamma_ts <- rep(gamma_ts, nSample)
}
# None of the diagonal elements must be 1.
# because rho_bar_i is computed using the term R[i,i] / (1 - R[i,i]^2).
R <- matrix(0, ncol = N, nrow = N)
# initialize \beta_i^{k, l}
# definition: \beta_i^{k, l} == beta_i_k_l__zero_origin[i, k+1, l+1]
beta_i_k_l__zero_origin <- array(data = 1, dim = c(N, 5, 5))
for (i in 1:N){
for (k in 0:4){
for (l in 0:4){
# use joint cumulants of random normal variable
beta_i_k_l__zero_origin[i, k+1, l+1] <- mean(
rnorm(1000)^k * rnorm(1000)^l) - .beta_zero_k_l(k, l)
}
}
}
######################################
# Iteration
######################################
# use online algorithm
for (iter in 1:nSample){
u <- as.numeric(A %*% as.numeric(X[iter, ])) # size = (N, )
v <- as.numeric(B %*% as.numeric(Y[iter, ])) # size = (N, )
r <- rep(0, N) # size = (N, )
s <- rep(0, N) # size = (N, )
# Make u and v to uncorrelated variables r and s.
for (i in 1:N){
rho_i <- R[i, i]
c_plus_i <- 1 / 2 * ((1 + rho_i)^(-1/2) + (1 - rho_i)^(-1/2))
c_minus_i <- 1 / 2 * ((1 + rho_i)^(-1/2) - (1 - rho_i)^(-1/2))
r[i] <- c_plus_i * u[i] + c_minus_i * v[i]
s[i] <- c_minus_i * u[i] + c_plus_i * v[i]
}
# Online estimation of statistic beta_i^{k,l}
for (i in 1:N){
for (k in 0:4){
for (l in 0:4){
beta_i_k_l__zero_origin[i, k+1, l+1] <-
beta_i_k_l__zero_origin[i, k+1, l+1] -
mu[iter] * (
beta_i_k_l__zero_origin[i, k+1, l+1] -
r[i]^k * s[i]^l + .beta_zero_k_l(k, l))
}
}
}
# Online estimation of statistic R
for (i in 1:N){
for (j in 1:N){
R[i, j] <- R[i, j] - mu[iter] * (R[i, j] - u[i]*v[j])
}
}
# .gradA() means (-1) * ∂ε_tot/∂A A^TA
A_new <- A - eta * .gradA(u, v, A, B, N, J, beta_i_k_l__zero_origin, R, r, s, gamma_ts[iter])
B_new <- B - eta * .gradB(u, v, A, B, N, J, beta_i_k_l__zero_origin, R, r, s, gamma_ts[iter])
# Orthgonalize
A_new <- .orthgonalize(A_new)
B_new <- .orthgonalize(B_new)
# Record change of A and B
# ABChange[iter] <- max(max(abs(A_new - A)), max(abs(B_new - B)))
ABChange[iter] <- (mean(abs(A_new - A)) + mean(abs(B_new - B))) / 2
# Verbose
if (verbose){
cat(paste0(iter,
" |max(max(abs(A_new - A)), max(abs(B_new - B)))| = ",
ABChange[iter], "\n"))
}
# Update
A <- A_new
B <- B_new
}
# Estimate original source using last A and B.
# Note that this method is difficult in an online setting.
U <- X %*% t(A)
V <- Y %*% t(B)
# Output
list(U=U, V=V, A=A, B=B, J=J, eta=eta, num.iter=iter,
verbose=verbose, ABChange=ABChange)
}
.checkMICA <- function(X, Y, J, eta, verbose){
stopifnot(is.matrix(X))
stopifnot(is.matrix(Y))
stopifnot(is.numeric(J))
stopifnot(length(J) == 1)
stopifnot(is.numeric(eta))
stopifnot(length(eta) == 1)
stopifnot(min(ncol(X), ncol(Y)) >= J)
stopifnot(is.logical(verbose))
}
.initMICA <- function(X, Y, J){
XScaled <- scale(X)
YScaled <- scale(Y)
A <- diag(J)
B <- diag(J)
list(A=A, B=B, XScaled=XScaled, YScaled=YScaled)
}
# factorial function
.fact <- function(x){
ifelse (x != floor(x) | x < 0, NA, ifelse(x == 0 , 1, prod(1:x)))
}
# phi() function are used in Section: 6 in
# Yang, Howard Hua, and Shun-ichi Amari.
# "Adaptive online learning algorithms for blind separation:
# maximum entropy and minimum mutual information." Neural computation 9.7 (1997): 1457-1482.
.phi <- function(y){2 * tanh(y)}
# phi <- function(y){ y^3 }
# phi <- function(y){ 3 / 4 * y^11 + 15 / 4 * y^9 - 14 / 3 * y^7 - 29 / 4 * y^5 + 29 / 4 * y^3 }
.beta_zero_k_l <- function(k, l){
if ((k == 4) || (l == 4)){
return(3)
} else if ((k == 2) && (l == 2)){
return(1)
} else {
return(0)
}
}
.der_H_beta_i_der_beta_i_k_l <- function(i, k, l, beta_i_k_l__zero_origin){
if ((k==3) && (l==0)){
return(- 1 / (2 * .fact(3) ) * 2 * .beta_i_k_l(i, k, l, beta_i_k_l__zero_origin))
} else if ((k==2) && (l==1)){
return(- 1 / (2 * .fact(3) ) * 3 * 2 * .beta_i_k_l(i, k, l, beta_i_k_l__zero_origin))
} else if ((k==1) && (l==2)){
return(- 1 / (2 * .fact(3) ) * 3 * 2 * .beta_i_k_l(i, k, l, beta_i_k_l__zero_origin))
} else if ((k==0) && (l==3)){
return(- 1 / (2 * .fact(3) ) * 2 * .beta_i_k_l(i, k, l, beta_i_k_l__zero_origin))
} else if ((k==4) && (l==0)){
return(- 1 / (2 * .fact(4)) * 2 * .beta_i_k_l(i, k, l, beta_i_k_l__zero_origin))
} else if ((k==3) && (l==1)){
return(- 1 / (2 * .fact(4)) * 4 * 2 * .beta_i_k_l(i, k, l, beta_i_k_l__zero_origin))
} else if ((k==2) && (l==2)){
return(- 1 / (2 * .fact(4)) * 6 * 2 * .beta_i_k_l(i, k, l, beta_i_k_l__zero_origin))
} else if ((k==1) && (l==3)){
return(- 1 / (2 * .fact(4)) * 4 * 2 * .beta_i_k_l(i, k, l, beta_i_k_l__zero_origin))
} else if ((k==0) && (l==4)){
return(- 1 / (2 * .fact(4)) * 2 * .beta_i_k_l(i, k, l, beta_i_k_l__zero_origin))
} else {
stop("given term is not included in this approximation.")
}
}
.beta_i_k_l <- function(i, k, l, beta_i_k_l__zero_origin){
beta_i_k_l__zero_origin[i, k+1, l+1]
}
.der_riksil_der_risiT <- function(k, l, r_i, s_i){
if (k > 0){
der_riksil_der_ri <- k * r_i^(k-1) * s_i^l
} else {
der_riksil_der_ri <- 0
}
if (l > 0){
der_riksil_der_si <- l * s_i^(l-1) * r_i^k
} else {
der_riksil_der_si <- 0
}
return(c(der_riksil_der_ri, der_riksil_der_si))
}
.gradA <- function(u, v, A, B, N, J, beta_i_k_l__zero_origin, R, r, s, gamma_t){
Phi <- rep(0, N) # size = (N, )
Xi <- rep(0, N) # size = (N, )
Psi <- rep(0, N) # size = (N, )
Rho_bar <- rep(0, J) # size = (N, )
for (i in 1:N){
rho_i <- R[i, i]
c_plus_i <- 1 / 2 * ((1 + rho_i)^(-1/2) + (1 - rho_i)^(-1/2))
c_minus_i <- 1 / 2 * ((1 + rho_i)^(-1/2) - (1 - rho_i)^(-1/2))
f_i_g_i <- c(0, 0)
for (k in 0:5){
for (l in 0:5){
# Only all subscript sets of k and l contained in equation (12) are considered.
if ((k + l <= 2) || (k + l > 4)) next
f_i_g_i <- f_i_g_i +
.der_H_beta_i_der_beta_i_k_l(i, k, l, beta_i_k_l__zero_origin) *
.der_riksil_der_risiT(k, l, r[i], s[i])
}
}
f_i <- as.numeric(f_i_g_i[1])
g_i <- as.numeric(f_i_g_i[2])
xi_i <- c_plus_i * f_i + c_minus_i * g_i
d_plus_i <- - 1 / 4 * ((1 + rho_i)^(-3/2) + (1 - rho_i)^(-3/2))
d_minus_i <- - 1 / 4 * ((1 + rho_i)^(-3/2) - (1 - rho_i)^(-3/2))
psi_i <- (d_plus_i * u[i] + d_minus_i * v[i]) * f_i + (d_minus_i * u[i] + d_plus_i * v[i]) * g_i
rho_bar_i <- rho_i / (1-rho_i^2)
# store them.
Phi[i] <- .phi(u[i])
Xi[i] <- xi_i
Psi[i] <- psi_i
Rho_bar[i] <- rho_bar_i
}
der_A_der_t_div_minus_eta_t <-
(diag(N) + (1 - gamma_t) * Phi %*% t(u) - gamma_t * Xi %*% t(u) +
gamma_t * diag(Rho_bar - Psi) %*% t(R)) %*% A
return(- der_A_der_t_div_minus_eta_t)
}
.gradB <- function(u, v, A, B, N, J, beta_i_k_l__zero_origin, R, r, s, gamma_t){
.gradA(v, u, B, A, N, J,
aperm(beta_i_k_l__zero_origin,
c(1,3,2)), t(R), s, r, gamma_t)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.