kliep.projection <- function(alpha, mean.X.de, X.nu, ss.mean.X.de) {
## <x,a.new> = <x,a> + <x,x>*(1 - <x,a>)/<x,x> = <x,a> - 1 - <x,a> = 1
alpha <- alpha + ((1 - sum(mean.X.de * alpha)) / ss.mean.X.de) * mean.X.de
alpha <- pmax(0, alpha)
alpha <- alpha / sum(mean.X.de * alpha)
X.nu.alpha <- X.nu %*% alpha
score <- mean(log(X.nu.alpha))
list(alpha = alpha, X.nu.alpha = X.nu.alpha, score = score)
}
kliep.learning.proj <- function(X.de, X.nu, lambda, max.iteration = 100,
eps.list = 10^seq(3, -3, -1)) {
## compare Sugiyama book algorithm p 58
## mean.X.de is psi_bar_de = sum_sample_i (psi_1(x_i),psi_2(x_i),...)'
## X.nu = psi_nu = (psi_1(x_1), psi_2(x_2) ...; psi_1(x_2), psi_2(x_2), ... ]
## ie samples down rows, functions along columns
mean.X.de <- apply(X.de, 2, mean)
nc <- ncol(X.nu)
dim(X.nu)
ss.mean.X.de <- sum(mean.X.de^2) # psi_bar_de' psi_bar_de
alpha <- rep(1, nc)
kp <- kliep.projection(alpha, mean.X.de, X.nu, ss.mean.X.de)
(score <- kp$score)
alpha <- kp$alpha
X.nu.alpha <- kp$X.nu.alpha
i <- 1
eps <- eps.list[1]
for (eps in eps.list) {
for (i in 1:max.iteration) {
## max 1/n.nu sum_i log (t(nu_i)*alpha), t(X.mean.de)*alpha = 1, alpha >= 1
## gradient d/dalpha log (t(nu_i)*alpha) = t(nu_i) * 1/t(nu_i)*alpha
## rows in t(X.nu) are basis fcts
alpha.new <- alpha + eps * t(X.nu) %*% (1 / X.nu.alpha)
kp <- kliep.projection(alpha.new, mean.X.de, X.nu, ss.mean.X.de)
if (kp$score <= score) break
score <- kp$score
alpha <- kp$alpha
X.nu.alpha <- kp$X.nu.alpha
}
}
list(alpha = alpha, score = score)
}
kliep.learning <- function(X.de, X.nu, lambda, max.iteration = 100,
eps.list = 10^seq(3, -3, -1)) {
## compare Sugiyama book algorithm p 58
## mean.X.de is psi_bar_de = sum_sample_i (psi_1(x_i),psi_2(x_i),...)'
## X.nu = psi_nu = (psi_1(x_1), psi_2(x_2) ...; psi_1(x_2), psi_2(x_2), ... ]
## ie samples down rows, functions along columns
mean.X.de <- apply(X.de, 2, mean)
ss.mean.X.de <- sum(mean.X.de^2) # psi_bar_de' psi_bar_de
alpha <- rep(10, ncol(X.nu))
# alpha <- rep(0.1,ncol(X.nu))
alpha <- alpha + ((1 - sum(mean.X.de * alpha)) / ss.mean.X.de) * mean.X.de
alpha <- pmax(0, alpha)
alpha <- alpha / sum(mean.X.de * alpha)
score <- mean(log(X.nu %*% alpha))
alpha <- rep(100, ncol(X.nu))
score <- -Inf
for (eps in eps.list) {
for (i in 1:max.iteration) {
## max 1/n.nu sum_i log (t(nu_i)*alpha), t(X.mean.de)*alpha = 1, alpha >= 1
## gradient d/dalpha log (t(nu_i)*alpha) = t(nu_i) * 1/t(nu_i)*alpha
## rows in t(X.nu) are basis fcts
alpha.new <- alpha + eps * t(X.nu) %*% (1 / (X.nu %*% alpha))
## <x,a.new> = <x,a> + <x,x>*(1 - <x,a>)/<x,x> = <x,a> - 1 - <x,a> = 1
alpha.new <- alpha.new + ((1 - sum(mean.X.de * alpha.new)) / ss.mean.X.de) * mean.X.de
alpha.new <- pmax(0, alpha.new)
alpha.new <- alpha.new / sum(mean.X.de * alpha.new)
score.new <- mean(log(X.nu %*% alpha.new))
if (score.new <= score) break
score <- score.new
alpha <- alpha.new
}
}
# alpha.new <- alpha
list(alpha = alpha, score = score)
}
kliep <- function(x.de, x.nu, x.grid, sigma.chosen = 0, is.adaptive = FALSE,
neigh.rank = 5, kernel.low = 0.5, kernel.high = 2, b = 50, fold = 6) {
## Notes: sigma.chosen needs to be accurate close to kernel bw of x.de
## b must not be too large (probably ill conditioned matrices) around 50
## matlab code from http://www.ms.k.u-tokyo.ac.jp/software.html#KLIEP
##
## Kullback-Leiblar importance estimation procedure (with cross validation)
##
## Estimating ratio of probability densities
## \frac{ p_{nu}(x) }{ p_{de}(x) }
## from samples
## { xde_i | xde_i\in R^{d} }_{i=1}^{n_{de}}
## drawn independently from p_{de}(x) and samples
## { xnu_i | xnu_i\in R^{d} }_{i=1}^{n_{nu}}
## drawn independently from p_{nu}(x).
##
## Usage:
## [wh_x_de,wh_x_re]=KLIEP(x_de,x_nu,x_re,sigma_chosen,b)
##
## Input:
## x_de: d by n_de sample matrix corresponding to `denominator' (iid from density p_de)
## x_nu: d by n_nu sample matrix corresponding to `numerator' (iid from density p_nu)
## x_re: (OPTIONAL) d by n_re reference input matrix
## sigma_chosen: (OPTIONAL) positive scalar representing Gaussian kernel width;
## if omitted (or zero), it is automatically chosen by cross validation
## b: (OPTINLAL) positive integer representing the number of kernels (default: 100);
##
## Output:
## wh_x_de: estimates of density ratio w=p_nu/p_de at x_de
## wh_x_re: estimates of density ratio w=p_nu/p_de at x_re (if x_re is provided)
##
## (c) Masashi Sugiyama, Department of Compter Science, Tokyo Institute of Technology, Japan.
## sugi@cs.titech.ac.jp, http://sugiyama-www.cs.titech.ac.jp/~sugi/software/KLIEP/
fit.dr(x.de, x.nu, x.grid,
lambda = 0, sigma.chosen = sigma.chosen,
is.adaptive = is.adaptive, neigh.rank = neigh.rank,
kernel.low = kernel.low, kernel.high = kernel.high,
b = b, fold = fold, learning.fct = kliep.learning
)
}
loglin.kliep.learning <- function(mean.X.nu, X.de, max.iteration = 100,
eps.list = 10^seq(3, -3, -1)) {
## compare Sugiyama "Density ratio" book algorithm p 60, loglinear model
## mean.X.de is psi_bar_de = sum_sample_i (psi_1(x_i),psi_2(x_i),...)'
## X.nu = psi_nu = (psi_1(x_1), psi_2(x_2) ...; psi_1(x_2), psi_2(x_2), ... ]
## ie samples down rows, functions along columns
n.de <- ncol(X.de)
alpha <- rep(0.1, n.de)
lg <- loglin.grad(alpha, mean.X.nu, X.de)
score <- lg$score
grad <- lg$grad
for (eps in eps.list) {
for (i in 1:max.iteration) {
alpha.new <- alpha + eps * grad
lg <- loglin.grad(alpha = alpha.new, mean.X.nu, X.de)
if (lg$score <= score) break
alpha <- alpha.new
score <- lg$score
grad <- lg$grad
}
}
list(alpha = alpha, score = score)
}
loglin.kliep <- function(x.de, x.nu, x.grid, sigma.chosen = 0, b = 100) {
if (!is.matrix(x.de)) x.de <- matrix(x.de, nrow = 1)
if (!is.matrix(x.nu)) x.nu <- matrix(x.nu, nrow = 1)
if (!is.matrix(x.grid)) x.grid <- matrix(x.grid, nrow = 1)
if (nrow(x.de) != nrow(x.nu)) {
cat("x.de, x.nu unequal dimension\n")
}
n.nu <- ncol(x.nu)
## Choosing Gaussian kernel center `x_ce', at most b
rand.index <- sample(1:n.nu)
b <- min(b, n.nu)
x.ce <- x.nu
x.ce <- x.nu[, rand.index[1:b], drop = FALSE]
## assuming sigma.chosen
X.de <- kernel.Gaussian(x = x.de, c = x.ce, sigma.chosen)
X.de <- cbind(X.de, 1, t(x.de), t(x.de^2), t(x.de^4))
## X.de <- cbind(X.de,lin.down(x.de,median(x.de)),lin.up(x.de,median(x.de)))
X.nu <- kernel.Gaussian(x.nu, x.ce, sigma.chosen)
X.nu <- cbind(X.nu, 1, t(x.nu), t(x.nu^2), t(x.nu^3))
# X.nu <- cbind(X.nu,lin.down(x.nu,median(x.de)),lin.up(x.nu,median(x.de)))
mean.X.nu <- apply(X.nu, 2, mean)
alpha <- loglin.kliep.learning(mean.X.nu, X.de)$alpha
if (!is.null(x.grid)) {
X.grid <- kernel.Gaussian(x.grid, x.ce, sigma.chosen)
X.grid <- cbind(X.grid, 1, t(x.grid), t(x.grid^2), t(x.grid^4))
# X.grid <- cbind(X.grid,lin.down(x.grid,median(x.de)),lin.up(x.grid,median(x.de)))
y.grid <- loglin.ratio(alpha, X.grid, X.de)
}
list(alpha = alpha, y.grid = y.grid)
}
loglin.grad <- function(alpha, mean.X.nu, X.de) {
n.de <- nrow(X.de)
alpha <- as.numeric(alpha)
exp.de <- exp(X.de %*% alpha)
score <- sum(mean.X.nu * alpha) - log(mean(exp.de))
r.de <- exp.de / mean(exp.de) ## mean so that 1/n.de (sum_i r(x.de.i)) = 1
sum(r.de) / n.de
## zeta is vector for each base fct in col i:
## 1/n.de sum_samples_j base_i(de_j) * r.de.j
zeta <- t(r.de) %*% X.de / n.de
list(grad = as.numeric(mean.X.nu - zeta), score = score)
}
loglin.ratio <- function(alpha, X.grid, X.de) {
n.grid <- nrow(X.grid)
alpha <- as.numeric(alpha)
exp.grid <- exp(X.grid %*% alpha)
exp.de <- exp(X.de %*% alpha)
r.grid <- exp.grid / mean(exp.de) ## mean so that 1/n.de (sum_i r(x.de.i)) = 1
r.grid
}
reduce.base <- function(kl, cutoff = 1e-5) {
inds <- which(abs(kl$alpha) > cutoff)
kl$alpha <- kl$alpha[inds]
kl$c.dists <- kl$c.dists[inds]
kl$x.ce <- kl$x.ce[, inds]
kl
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.