# Hello, world!
#
# This is an example function named 'hello'
# which prints 'Hello, world!'.
#
# You can learn more about package authoring with RStudio at:
#
# http://r-pkgs.had.co.nz/
#
# Some useful keyboard shortcuts for package authoring:
#
# Build and Reload Package: 'Ctrl + Shift + B'
# Check Package: 'Ctrl + Shift + E'
# Test Package: 'Ctrl + Shift + T'
##------self-defined function-----------------------
#-- generate sketch matrix
sketchfun <- function(m, n, type){
library(Matrix)
Praw <- Diagonal(n, x = 1)
ind <- sample(n, m)
R <- Diagonal(n, 1/2 - rbinom(n, 1, 0.5))
set.seed(1)
H <- Praw[sample(n,n),]
S <- switch(type,
subGaussian = matrix(rnorm(n*m),m,n),
ROS = sqrt(n/m)*Praw[ind,]%*% H %*% R,
subSampling = sqrt(n/m)*Praw[ind,])
return(S)
}
#-- main function to conduct fast random KPCA.
sKPCA_nc <- function(trdata, d, m=max(d+1,ceiling(sqrt(nrow(trdata))) ), Stype='subGaussian', kern=rbfdot(sigma=0.5), seed=2){
# # nc indicates non-centeralized
require(kernlab)
n <- nrow(trdata)
K <- kernelMatrix(kern, as.matrix(trdata))
set.seed(seed) # reproducible
S <- sketchfun(m,n, Stype)
Kt <- S %*% K %*% t(S)
eigv <- eigen(Kt, symmetric = T)
alpha_tilde <- eigv$vectors[,1:d]%*% diag(1/sqrt(eigv$values[1:d]))
alpha <- t(S) %*% alpha_tilde
# norm(cbind(alpha[,1]),'f')
alpha_rotate <- K %*% alpha # rotation for principal component scores.
res <- list()
res$alpha <- alpha
res$Kia <- qr.solve(t(alpha)%*% alpha_rotate )
res$kern <- kern
res$data <- trdata
res$alpha_rotate <- alpha_rotate
res$prop_var <- sum(eigv$values[1:d]) / sum(eigv$values)
class(res) <- c('sKPCA', 'sKPCA_nc')
return(res)
}
sKPCA_nc2 <- function(trdata,d, m=max(d+1,ceiling(sqrt(nrow(trdata))) ), Stype='subGaussian', kern=rbfdot(sigma=0.5), seed=2){
# # nc indicates non-centeralized
require(kernlab)
n <- nrow(trdata)
K <- kernelMatrix(kern, as.matrix(trdata))
set.seed(seed) # reproducible
S <- sketchfun(m,n, Stype)
SK <- S %*% K
SKS <- SK%*%t(S)
svdSKS <- svd(SKS)
Z <- t(SK)%*% svdSKS$u %*% diag(1/sqrt(svdSKS$d))
svdz <- svd(Z)
alpha <- svdz$u[,1:d]
# alpha[1:10,]
alpha_rotate <- K %*% alpha # rotation for principal component scores.
res <- list()
res$alpha <- alpha
res$alpha_rotate <- alpha_rotate
res$Kia <- qr.solve(t(alpha)%*% alpha_rotate )
res$kern <- kern
res$data <- trdata
res$prop_var <- sum(svdz$d[1:d]) / sum(svdz$d)
class(res) <- c('sKPCA', 'sKPCA_nc')
return(res)
}
#-- main function to conduct fast random centralized KPCA.
sKPCA_c <- function(trdata, d, m=max(d+1,ceiling(sqrt(nrow(trdata))) ), Stype='subGaussian', kern=rbfdot(sigma=0.5), seed=2){
require(kernlab)
n <- nrow(trdata)
K <- kernelMatrix(kern, as.matrix(trdata))
Krow <- apply(K,2, mean)
Ksum <- mean(Krow)
K <- K - matrix(Krow, n,n, byrow=T) - matrix(Krow, n, n) + Ksum
set.seed(seed) # reproducible
S <- sketchfun(m,n, Stype)
Kt <- S %*% K %*% t(S) /n
eigv <- eigen(Kt, symmetric = T)
alpha_tilde <- eigv$vectors[,1:d]%*% diag(1/sqrt(eigv$values[1:d]))
alpha <- t(S) %*% alpha_tilde
# norm(cbind(alpha[,1]),'f')
alpha_rotate <- K %*% alpha # rotation for principal component scores.
res <- list()
res$alpha <- alpha
res$alpha_rotate <- alpha_rotate
res$prop_var <- sum(eigv$values[1:d]) / sum(eigv$values)
res$Ktia <- qr.solve(t(alpha)%*%K%*%alpha)
res$Krow <- Krow
res$Ksum <- Ksum
res$kern <- kern
res$data <- trdata
class(res) <- c('sKPCA', 'sKPCA_c')
return(res)
}
sKPCA_c2 <- function(trdata, d, m=max(d+1,ceiling(sqrt(nrow(trdata))) ), Stype='subGaussian', kern=rbfdot(sigma=0.5), seed=2){
require(kernlab)
n <- nrow(trdata)
K <- kernelMatrix(kern, as.matrix(trdata))
Krow <- apply(K,2, mean)
Ksum <- mean(Krow)
K <- K - matrix(Krow, n,n, byrow=T) - matrix(Krow, n, n) + Ksum
set.seed(seed) # reproducible
S <- sketchfun(m,n, Stype)
S <- sketchfun(m,n, Stype)
SK <- S %*% K/n
SKS <- SK%*%t(S)
svdSKS <- svd(SKS)
Z <- t(SK)%*% svdSKS$u %*% diag(1/sqrt(svdSKS$d))
svdz <- svd(Z)
alpha <- svdz$u[,1:d] %*% diag(1/svdz$d[1:d])
# alpha[1:10,]
alpha_rotate <- K %*% alpha # rotation for principal component scores.
res <- list()
res$alpha <- alpha
res$alpha_rotate <- alpha_rotate
res$Ktia <- qr.solve(t(alpha)%*%K%*%alpha)
res$Krow <- Krow
res$Ksum <- Ksum
res$kern <- kern
res$data <- trdata
res$prop_var <- sum(svdz$d[1:d]) / sum(svdz$d)
class(res) <- c('sKPCA', 'sKPCA_c')
return(res)
}
sKPCA <- function(trdata,d, m=max(d+1,ceiling(sqrt(nrow(trdata))) ), Stype='subGaussian', kern=rbfdot(sigma=0.5), seed=2, fast_version=0, center=T){
if(center){
if(fast_version){
res <- sKPCA_c(trdata, d, m, Stype, kern, seed)
}else{
res <- sKPCA_c2(trdata, d, m, Stype, kern, seed)
}
res$center <- TRUE
}else{
if(fast_version){
res <- sKPCA_nc(trdata, d, m, Stype, kern, seed)
}else{
res <- sKPCA_nc2(trdata, d, m, Stype, kern, seed)
}
res$center <- FALSE
}
return(res)
}
# original KPCA
oKPCA <- function(trdata, d, kern=rbfdot(sigma=0.5), center=TRUE){
if(center){
res <- oKPCA_c(trdata, d, kern=kern)
res$center <- TRUE
}else{
res <- oKPCA_nc(trdata, d, kern=kern)
res$center <- FALSE
}
return(res)
}
# original KPCA
oKPCA_nc <- function(trdata, d, kern=rbfdot(sigma=0.5)){
require(kernlab)
n <- nrow(trdata)
K <- kernelMatrix(kern, as.matrix(trdata))
eigv <- eigen(K, symmetric = T)
alpha <- eigv$vectors[,1:d]
# alpha[1:10,]
alpha_rotate <- K %*% alpha # rotation for principal component scores.
res <- list()
res$alpha <- alpha
res$alpha_rotate <- alpha_rotate
res$Kia <- qr.solve(t(alpha)%*%K%*%alpha)
res$data <- trdata
res$kern <- kern
res$prop_var <- sum(eigv$values[1:d]) / sum(eigv$values)
class(res) <- 'oKPCA'
return(res)
}
# original KPCA
oKPCA_c <- function(trdata, d, kern=rbfdot(sigma=0.5)){
require(kernlab)
n <- nrow(trdata)
K <- kernelMatrix(kern, as.matrix(trdata))
Krow <- apply(K,2, mean)
Ksum <- mean(Krow)
Kt <- K - matrix(Krow, n,n, byrow=T) - matrix(Krow, n, n) + Ksum
eigv <- eigen(Kt/n, symmetric = T)
alpha <- eigv$vectors[,1:d]
alpha <- alpha %*% diag(1/sqrt(eigv$values[1:d]))
alpha_rotate <- Kt %*% alpha # rotation for principal component scores.
res <- list()
res$alpha <- alpha
res$alpha_rotate <- alpha_rotate
res$Ktia <- qr.solve(t(alpha)%*%K%*%alpha)
res$Krow <- Krow
res$Ksum <- Ksum
res$kern <- kern
res$data <- trdata
res$prop_var <- sum(eigv$values[1:d]) / sum(eigv$values)
class(res) <- 'oKPCA'
return(res)
}
rec_predict <- function(obj, newdata=NULL){
if((!inherits(obj, 'sKPCA')) && (!inherits(obj, 'oKPCA')))
stop('obj must be sKPCA or oKPCA class!')
if(obj$center){
re <- recerr_c(tsdata,obj$data,obj$alpha, obj$Ktia, obj$Krow, obj$Ksum,obj$kern)
attr(re, 'class') <- 'centralized'
}else{
re <- recerr_nc(tsdata,obj$data,obj$alpha, obj$Kia, obj$kern)
attr(re, 'class') <- 'noncentralized'
}
return(re)
}
recerr_c <- function(x,data,alpha, Ktia, Krow, Ksum, kern){ # nc indicates non-centeralized
# x <- tsdata; data <- obj$data; alpha <- obj$alpha; Ktia <- obj$Ktia; kern <- obj$kern
if(!inherits(x,'matrix')) x <- as.matrix(x)
if(!inherits(data,'matrix')) data <- as.matrix(data)
kmat <- kernelMatrix(kern, data,x)
nr <- nrow(kmat); nc <- ncol(kmat)
Ktzz <- diag(kernelMatrix(kern, x,x)) - 2*apply(kmat, 2, mean) + Ksum
Ktz <- kmat - matrix(Krow, nr, nc) - matrix(apply(kmat, 2, mean), nr, nc, byrow=T) + Ksum
res <- Ktzz - diag(t(Ktz)%*%alpha %*% Ktia %*%t(alpha)%*% Ktz)
return(as.vector(res))
}
rec_predict_c<- function(obj, tsdata){
if((!inherits(obj, 'sKPCA')) && (!inherits(obj, 'oKPCA')))
stop('obj must be sKPCA or oKPCA class!')
re <- recerr_c(tsdata,obj$data,obj$alpha, obj$Ktia, obj$Krow, obj$Ksum,obj$kern)
return(re)
}
rec_predict_nc <- function(obj, tsdata){
if((!inherits(obj, 'sKPCA')) && (!inherits(obj, 'oKPCA')))
stop('obj must be sKPCA or oKPCA class!')
re <- recerr_nc(tsdata,obj$data,obj$alpha, obj$Kia, obj$kern)
return(re)
}
#-- assist to evaluate reconstruction error
recerr_nc <- function(x,data,alpha, Kia, kern){ # nc indicates non-centeralized
# x <- tsdata
# data <- trdata
if(!inherits(x,'matrix')) x <- as.matrix(x)
if(!inherits(data,'matrix')) data <- as.matrix(data)
kmat <- kernelMatrix(kern, data,x)
f2 <- t(kmat)%*%alpha%*%Kia%*%t(alpha)%*%kmat
res <- diag(kernelMatrix(kern, x,x)) - diag(f2)
return(as.vector(res))
}
#--
vec2difmat <- function(x){
nx <- length(x)
matrix(x, nrow=nx, ncol=nx) - matrix(x, nrow=nx, ncol=nx, byrow=T)
}
sigmaselect <- function(X){
p <- ncol(X)
n <- nrow(X)
res <- array(0, c(n,n, p))
for(j in 1:p){
res[,,j] <- vec2difmat(X[,j])
}
return(sqrt(sum(res^2))/n^2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.