R/functions.R
In semisupKernelPCA: Kernel PCA projection, and semi-supervised variant.

Documented in add add.linkinfo check check.linkinfo computeKernel computeProjectionFromKernel del del.linkinfo distortions length.linkinfo linkinfo transformKernel transform.linkinfo

# perform the kernel PCA projection step from an existing kernel matrix
computeProjectionFromKernel <- function(kernel, dims=2, eigentype=c("basic", "irlba")) {
	eigentype <- match.arg(eigentype)
	eigenfunc <- switch(eigentype, basic=function(x) eigen(x, symmetric=TRUE), irlba=function(x) irlba(x, nu=dims, nv=0, adjust=dims))
	nelts <- dim(kernel)[1]
	if (dims > nelts) stop("dims cannot be greater than data rank")
	ones <- matrix(1/nelts, nrow=nelts, ncol=nelts)
	kernel <- kernel - ones%*%kernel - kernel%*%ones + ones %*%kernel%*%ones
	eigendecomp <- eigenfunc(kernel)
	members <- switch(eigentype, basic=c("values", "vectors"), irlba=c("d", "u"))
	# restric processing to dimensions actually used (ie dims)
	values <- eigendecomp[[members[1]]][1:dims]
	vectors <- eigendecomp[[members[2]]][,1:dims,drop=FALSE]
	# normalize eigenvectors according to (12.81)
	vectors <- sweep(vectors, 2, sqrt(values*nelts), "/") 
	# then compute projections according to (12.82), in matrix form
	projection <- kernel %*% vectors # transposed version of Bishop's description
}




computeKernel <- function(data, type=c("gaussian", "pgaussian")) {
	type <- match.arg(type)
	nelts <- dim(data)[1]
	d <- dim(data)[2]
	Ksim <- matrix(0,nrow=nelts, ncol=nelts)
	# first, standardize data
	means <- apply(data[,1:d], 2, mean)
	sds <- apply(data[,1:d], 2, sd)
	sds[sds==0] <- 1 # manage NULL case
	data[,1:d] <- sweep(data[,1:d], 2, means, "-")
	data[,1:d] <- sweep(data[,1:d], 2, sds, "/")

	# compute p and sigma parameters, that will be used later for the p-gaussian kernel	
	# (see Francois2005 for details)
	# NB : D_N = 5% quantile, and D_F = 95% quantile (necessary for p to remain positive)
	# from there, expressions for sigma seem to have been swapped (D_N with 0.95 and reciprocally)
	# => add as a footnote
	Ksim <- dist(data[,1:d]) # contains all unique non-trivial pairwise distances (n.(n-1)/2)
	if (type == "pgaussian") {
		quants <- quantile(Ksim, probs=c(0.05, 0.95))
		p <- log(log(0.05)/log(0.95)) / log(quants[2] / quants[1])
		sigma <- quants[2] / (-log(0.05))^(1/p)	
	} else if (type == "gaussian") {
		p <- 2
		sigma <- max(Ksim)
	} else {
		stop("unsupported kernel type")
	}

	# cast to sim matrix
	Ksim <- as.matrix(Ksim)

	# change distance to similarity values
	if (any(type==c("gaussian", "pgaussian"))) {
		Ksim <- exp((-Ksim^p) / sigma^p)
	}

	return(Ksim)

}


linkinfo <- function(mat) {
	res <- list()
	n <- dim(mat)[1]
	# use binary matrices for representation (should more easily match the kernel representation)
	# makes sense as we use binary relations.
	res$link <- matrix(FALSE, nrow=n, ncol=n)
	res$notlink <- matrix(FALSE, nrow=n, ncol=n)
	class(res) <- "linkinfo"
	return(res)
}

length.linkinfo <- function(x, ...) {
	return(dim(x$link)[1])
}

add <- function(object, ...) {
	UseMethod("add")
}

add.linkinfo <- function(object, inda, indb, type=c("link", "notlink"), ...) {
	type <- match.arg(type)
	n <- length(object)
	# support multiple args	
	
	# is the couple in the correct range ? single numeric values ?
	if (any(inda < 1) || any(inda > n) || any(indb < 1) || any(indb > n)) {
		stop("inda and indb must be in the range of the linkinfo object")
	}
		
	if (length(inda) != length(indb)) {
		stop("inda and indb must have same length")
	}
	
	if (length(inda) == 0) {
		stop("at least one couple must be specified")
	}
	
	if (any(inda == indb)) {
		stop("use with inda != indb")
	}
	
	# shape as matrix for indexing
	inds <- cbind(inda, indb)
	revinds <- cbind(indb, inda)
	
	# does the couple already exist ?
	if (any(object$link[inds]) || any(object$notlink[inds])) {
		stop("[inda, indb] contains at least 1 registered couple")
	}	
	
	# register the couple
	if (type == "link") {
		object$link[inds] <- object$link[revinds] <- TRUE
	} else {
		object$notlink[inds] <- object$notlink[revinds] <- TRUE
	}
	
	return(object)
}

# check consistency ?
# - if 1 is linked to 2, and 2 is linked to 3, it is reasonable that 1 is linked to 3
# - if 1 is linked to 2, and 2 is not linked to 3, it is reasonable that 1 is not linked to 3
# - no other constraint : indeed, if 1 is not linked to 2, and 2 is not linked to 3, nothing can be concluded about 1 and 3 -> they may as well be linked. If they are, contraint 2 is respected.
# may be understood under terms of relations : L is transitive, and we have to respect the logical induction :
# U o L(1,3) -> U(1,3)
check <- function(object, ...) {
	UseMethod("check")		
}


check.linkinfo <- function(object, ...) {
	templink <- transform(object)
	
	# consistency necessary and sufficient condition : matrices/relations cannot "overlap"
	if (any(templink$link & templink$notlink)) {
		return(FALSE) 
	} else {
		return(TRUE)
	}
}

transform.linkinfo <- function(`_data`, ...) {
	# apply transitivity of relation 1
	trans1 <- function(vec) {
		vec <- which(vec==TRUE) # convert to indices
		if (length(vec) > 1) {
			inds <- cbind(rep(vec, times=length(vec)), rep(vec, each=length(vec))) # convert to index matrix
			templink$link[inds] <<- TRUE
		}
	}
	
	# apply transitivity of relation 2
	trans2 <- function(vecN, vecL) {
		if ((length(vecN) > 0) && (length(vecL) > 0)) {
			#vec <- c(vecN, vecL) # no : asymetry must be respected.
			inds <- cbind(rep(vecL, times=length(vecN)), rep(vecN, each=length(vecL)))
			inds <- rbind(inds, cbind(rep(vecN, each=length(vecL)), rep(vecL, times=length(vecN))))
			templink$notlink[inds] <<- TRUE
		}
	}
	
	# for mapply : get list of cols indexes which are at 1 for each row
	indexes <- function(vec) {
		vec <- which(vec==TRUE)
		return(vec)
	}
	
	# copy original linkinfo
	templink <- `_data`
	
	# trans1 : not interested in the result, linkinfo copy is modified as side effect.
	apply(`_data`$link, 1, trans1)
	diag(templink$link) <- FALSE # reset diagonal before trans2 (side effect for simpler algorithm)
	
	# trans2 : first, index lists are extracted. then same remark as for trans1.
	indsL <- lapply(seq(nrow(`_data`$link)), function(i) indexes(`_data`$link[i,]))
	indsN <- lapply(seq(nrow(`_data`$notlink)), function(i) indexes(`_data`$notlink[i,]))
	mapply(trans2, vecN=indsN, vecL=indsL) 
	
	# modification to notlinkset may influence trans2 itself : second application is necessary for stable solution
	#diag(linkinfo$notlink) <- FALSE
	#indsN <- apply(linkinfo$notlink, 1, indexes)
	#mapply(trans2, vecN=indsN, vecL=indsL)
	
	diag(templink$notlink) <- FALSE # reset diagonal
	
	return(templink)
}



del <- function(object, ...) {
	UseMethod("del")		
}


del.linkinfo <- function(object, inda, indb, ...) {
	n <- length(object)
	
	# is the couple in the correct range ? single numeric values ?
	if (any(inda < 1) || any(inda > n) || any(indb < 1) || any(indb > n)) {
		stop("inda and indb must be in the range of the linkinfo object")
	}
	
	if (length(inda) != length(indb)) {
		stop("inda and indb must have same length")
	}

	if (length(inda) == 0) {
		stop("at least one couple must be specified")
	}
	
	if (any(inda == indb)) {
		stop("use with inda != indb")
	}

	# shape as matrix for indexing
	inds <- cbind(inda, indb)
	revinds <- cbind(indb, inda)

	# remove from both linking functions
	object$link[inds] <- object$link[revinds] <- FALSE
	object$notlink[inds] <- object$notlink[revinds] <- FALSE
	
	return(object)
}


transformKernel <- function(kern, linkinfo, type=c("none", "simple", "extended"), linkfun=function(x) x^(1/6), 
							notlinkfun=function(x) {1 - (1-x)^(1/6)}) {
	# transform kernel using linkinfo object.
	# "simple" : transform only similarities concerned by linking functions
	# "extended" : attach all elements that are not in the U and L relations to their closest neighbour in the relations (1NN)
	# 			and augment the relation with the set of elements associated to this closest neighbour, and link to that 
	#			closest element -> just add a "link" with the closest element : transtivity does the rest of the job.
	# linking functions may be parametrized away from defaults.
	type <- match.arg(type)
	relatedElts <- numeric(0)
	unrelatedElts <- numeric(0)
	
	# for debug purposes
	#savelink <- linkinfo

	indexes <- function(vec) {
		vec <- which(vec==TRUE)
		relatedElts <<- c(relatedElts, vec) # modification in the enclosing lexical scope, ie calling environment
		return(vec)
	}
	
	maxRelated <- function(ind) {
		maxind <- which.max(kern[ind,relatedElts])
		maxind <- relatedElts[maxind] # cast back to original index
		linkinfo <<- add(linkinfo, ind, maxind, type="link") # direct modification of linkinfo
		return(maxind)
	}

	
	# specific if "extended" : find those elements which are not in any relation,
	# and "attach" them to their closest element that is in a relation.
	if (type == "extended") {
		diag(kern) <- 0 # computation based on max similarity : exclude self.
		apply(linkinfo$link, 1, indexes)
		apply(linkinfo$notlink, 1, indexes)
		relatedElts <- unique(relatedElts)
		unrelatedElts <- 1:length(linkinfo)
		if (length(relatedElts > 0)) {
			unrelatedElts <- unrelatedElts[-relatedElts]
			apply(as.array(unrelatedElts), 1, maxRelated)
		}
		diag(kern) <- 1 # reestablish correct kernel
	}
	
	if (!check(linkinfo)) {
		#browser() # debug purposes
		stop("linkinfo is not consistent")
	}
	
	# apply transitivity relations
	templink <- transform(linkinfo)
	
	#browser()
	
	# transform only if appropriate
	if (type != "none") {
		kern[templink$link] <- linkfun(kern[templink$link])
		kern[templink$notlink] <- notlinkfun(kern[templink$notlink])
	}
	return(kern)
}




distortions <- function(origdata, projdata) {
	# standardize both data sets
	Reg <- 10^(-3) * dim(origdata)[1]
	origmeans <- apply(origdata, 2, mean)
	projmeans <- apply(projdata, 2, mean)
	origsds <- apply(origdata, 2, sd)
	projsds <- apply(projdata, 2, sd)
	origsds[origsds==0] <- 1 # manage NULL case
	projsds[projsds==0] <- 1
	origdata <- sweep(origdata, 2, origmeans, "-")
	projdata <- sweep(projdata, 2, projmeans, "-")
	origdata <- sweep(origdata, 2, origsds, "/")
	projdata <- sweep(projdata, 2, projsds, "/")
	
	# compute distance matrices
	origmat <- as.matrix(dist(origdata))
	projmat <- as.matrix(dist(projdata))
	# normalize in [0,1]
	origmat <- origmat / max(origmat)
	projmat <- projmat / max(projmat)
	
	# compute D^+ and D^- matrices
	posdiffs <- negdiffs <- diffs <- origmat - projmat
	posdiffs[posdiffs<0] <- 0
	negdiffs[negdiffs>0] <- 0
	negdiffs <- -negdiffs
	
	# mu aggregates, and results
	mucompress <- apply(posdiffs, 1, sum) # aggregate all columns for each row-element
	mustretch <- apply(negdiffs, 1, sum)
	
	# if all distortions are almost 0, then we may have significantly \neq 0 normalized dists -> problem.
	# see with michael's paper. maybe normalize with some maximal/minimal pairwise distance.
	# propose to "regularize" it - we use a Laplace regularization. In the context of summed pairwise distances, n.10^(-3) seems a reasonable value.
	rescompress <- apply(as.data.frame(mucompress), 1, 
							function(x) (x - min(mucompress)) / (max(mucompress) - min(mucompress) + Reg))
	resstretch <- apply(as.data.frame(mustretch), 1, 
							function(x) (x - min(mustretch)) / (max(mustretch) - min(mustretch)+ Reg))
	return(list(compress=rescompress, stretch=resstretch))
}

Any scripts or data that you put into this service are public.

semisupKernelPCA documentation built on May 29, 2017, 8:59 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

semisupKernelPCA
Kernel PCA projection, and semi-supervised variant.

R/functions.R
In semisupKernelPCA: Kernel PCA projection, and semi-supervised variant.

Defines functions computeProjectionFromKernel computeKernel linkinfo length.linkinfo add add.linkinfo check check.linkinfo transform.linkinfo del del.linkinfo transformKernel distortions

Documented in add add.linkinfo check check.linkinfo computeKernel computeProjectionFromKernel del del.linkinfo distortions length.linkinfo linkinfo transformKernel transform.linkinfo

Try the semisupKernelPCA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

semisupKernelPCA Kernel PCA projection, and semi-supervised variant.

R/functions.R In semisupKernelPCA: Kernel PCA projection, and semi-supervised variant.

Defines functions computeProjectionFromKernel computeKernel linkinfo length.linkinfo add add.linkinfo check check.linkinfo transform.linkinfo del del.linkinfo transformKernel distortions

Documented in add add.linkinfo check check.linkinfo computeKernel computeProjectionFromKernel del del.linkinfo distortions length.linkinfo linkinfo transformKernel transform.linkinfo

Try the semisupKernelPCA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

semisupKernelPCA
Kernel PCA projection, and semi-supervised variant.

R/functions.R
In semisupKernelPCA: Kernel PCA projection, and semi-supervised variant.