Nothing
#' Krein Support Vector Machine
#'
#' Solves a kernelized Support Vector Machine in the case where the kernel used may not be positive semidefinite.
#'
#' @param kernelmat the kernel matrix computed for all observations
#' @param ... additional parameters, see \code{\link{krein.svm.default}} for more details on how to use this function
#' @author Tullia Padellini, Francesco Palini, David Meyer. The included C++ library LIBSVM is authored by Chih-Chung Chang and Chih-Jen Lin)
#' @details This function implements the Krein Support Vector Machine solver as defined by Loosli et al. (2015).
#' The implementation of the solver is a modified version of the popular C++ library `LIBSVM`, while the connection to `R`
#' heavily relies on the `R`-package \pkg{e1701}.
#' @examples
#' \donttest{
#' library(TDA)
#' set.seed(123)
#' foo.data = list()
#' for(i in 1:20){
#' foo = circleUnif(100)
#' foo.data[[i]] = ripsDiag(foo, 1,1)$diagram}
#' for(i in 21:40){
#' foo = cbind(runif(100), runif(100))
#' foo.data[[i]] = ripsDiag(foo, 1,1)$diagram
#' }
#' GSWkernel = gaus.kernel(foo.data, h =1, dimension = 1, q = 2)
#' GGKclass = krein.svm(kernelmat = GSWkernel, y = rep(c(1,2), c(20,20)))
#' }
#' @return An object of class \code{krein.svm} containing the fitted model, including:
#' \describe{
#' \item{\code{SV}}{a matrix containing the Support Vectors}
#' \item{\code{index}}{index of the resulting support vectors in the data matrix}
#' \item{\code{coefs}}{a matrix containing corresponding coefficients times the training labels}
#' \item{\code{rho}}{value of the (negative) intercept}
#' }
#' @references
#' \insertRef{loosli2015learning}{kernelTDA}
#'
#' \insertRef{chang2011libsvm}{kernelTDA}
#'
#' \insertRef{dimitriadou2008misc}{kernelTDA}
#' @export
krein.svm <-
function (kernelmat, ...)
UseMethod ("krein.svm")
krein_svm <-
function (kernelmat = NULL,
y = NULL,
cost = 1,
n_classes = 2,
class.weights = NULL,
cross = 0,
probability = FALSE,
fitted = TRUE,
subset,
t_levels =NULL,
...)
{
scale = FALSE
sparse = FALSE
shrinking = FALSE
na.action = na.omit
degree = 0
gamma = 1
coef0 = 0
nu = 0.5
cachesize = 40
tolerance = 0.001
epsilon = 0.1
x = kernelmat
if(!is.factor(y)) y = as.factor(y)
yorig <- y
if(inherits(x, "Matrix")) {
loadNamespace("SparseM")
loadNamespace("Matrix")
x <- as(x, "matrix.csr")
}
if (sparse <- inherits(x, "matrix.csr"))
loadNamespace("SparseM")
## NULL parameters?
if(is.null(cost)) stop(sQuote("cost"), " must not be NULL!")
if(is.null(tolerance)) stop(sQuote("tolerance"), " must not be NULL!")
x.scale <- y.scale <- NULL
type = 0
kernel = 4
if (nrow(x) != ncol(x)) stop("kernel matrix must be squared")
if(kernel == 4) x = cbind(1:nrow(x), x)
xhold <- if (fitted) kernelmat else NULL
nac <- attr(x, "na.action")
## further parameter checks
nr <- nrow(x)
if (cross > nr)
stop(sQuote("cross"), " cannot exceed the number of observations!")
ytmp <- y
attributes(ytmp) <- NULL
if (!is.vector(ytmp) && !is.factor(y) && type != 2)
stop("y must be a vector or a factor.")
if (type != 2 && length(y) != nr)
stop("x and y don't match.")
if (cachesize < 0.1)
cachesize <- 0.1
if(is.factor(y)){
lev = levels(y)
class.weights = rep(1, length(lev))
names(class.weights) <- lev
weightlabels <- match (names(class.weights), lev)
}
if(is.numeric(y)){
lev <- 1:nr
weightlabels <- 1:nr
}
## in case of classification: transform factors into integers
if (type == 2) # one class classification --> set dummy
y <- rep(1, nr)
else
if (is.factor(y)) {
lev <- levels(y)
y <- as.integer(y)
} else {
if (type < 3) {
if(any(as.integer(y) != y))
stop("dependent variable has to be of factor or integer type for classification mode.")
y <- as.factor(y)
lev <- levels(y)
y <- as.integer(y)
} else lev <- unique(y)
}
if (type < 3 && !is.null(class.weights)) {
if (is.character(class.weights) && class.weights == "inverse")
class.weights <- 1 / table(y)
if (is.null(names(class.weights)))
stop("Weights have to be specified along with their according level names !")
weightlabels <- match (names(class.weights), lev)
if (any(is.na(weightlabels)))
stop("At least one level name is missing or misspelled.")
}
if(is.null(class.weights)){
if(is.factor(y)){
class.weights <- rep(1, length(levels(y)))
names(class.weights) <- levels(y)
weightlabels <- match (names(class.weights), levels(y))
} else {
class.weights <- rep(1, nr)
names(class.weights) <- y
}
}
nclass <- 2
if (type < 2) nclass <- length(lev)
if (is.null(type)) stop("type argument must not be NULL!")
if (is.null(kernel)) stop("kernel argument must not be NULL!")
if (is.null(degree)) stop("degree argument must not be NULL!")
if (is.null(gamma)) stop("gamma argument must not be NULL!")
if (is.null(coef0)) stop("coef0 seed argument must not be NULL!")
if (is.null(cost)) stop("cost argument must not be NULL!")
if (is.null(nu)) stop("nu argument must not be NULL!")
if (is.null(cachesize)) stop("cachesize argument must not be NULL!")
if (is.null(tolerance)) stop("tolerance argument must not be NULL!")
if (is.null(epsilon)) stop("epsilon argument must not be NULL!")
if (is.null(shrinking)) stop("shrinking argument must not be NULL!")
if (is.null(cross)) stop("cross argument must not be NULL!")
if (is.null(sparse)) stop("sparse argument must not be NULL!")
if (is.null(probability)) stop("probability argument must not be NULL!")
cret <- svmtrain_R(
## data
if (sparse) x@ra else t(x),
as.integer (nr), as.integer(ncol(x)),
as.double (y),
## sparse index info
as.integer (if (sparse) x@ia else 0),
as.integer (if (sparse) x@ja else 0),
## parameters
as.integer (type),
as.integer (kernel),
as.integer (degree),
as.double (gamma),
as.double (coef0),
as.double (cost),
as.double (nu),
as.integer (weightlabels),
as.double (class.weights),
as.integer (length (class.weights)),
as.double (cachesize),
as.double (tolerance),
as.double (epsilon),
as.integer (shrinking),
as.integer (cross),
as.integer (sparse),
as.integer (probability),
## results
nclasses = integer (1),
nr = integer (1), # nr of support vectors
index = integer (nr),
labels = integer (nclass),
nSV = integer (nclass),
rho = double (nclass * (nclass - 1) / 2),
coefs = double (nr * (nclass - 1)),
sigma = double (1),
probA = double (nclass * (nclass - 1) / 2),
probB = double (nclass * (nclass - 1) / 2),
cresults = double (cross),
ctotal1 = double (1),
ctotal2 = double (1)
)
if (cret$error != "")
stop(paste(cret$error, "!", sep=""))
cret$index <- cret$index[1:cret$nr]
ret <- list (
call = match.call(),
type = type,
kernel = kernel,
cost = cost,
degree = degree,
gamma = gamma,
coef0 = coef0,
nu = nu,
epsilon = epsilon,
sparse = sparse,
scaled = scale,
x.scale = x.scale,
y.scale = y.scale,
nclassTOT= n_classes,
levelsTOT= t_levels,
nclasses = cret$nclasses,
#number of classes
# complab = compl_labels, #complete labels
levels = lev,
tot.nSV = cret$nr, #total number of sv
nSV = cret$nSV[1:cret$nclasses], #number of SV in diff. classes
labels = cret$labels[1:cret$nclasses], #labels of the SVs.
SV = if (sparse) SparseM::t(SparseM::t(x[cret$index]))
else t(t(x[cret$index,,drop = FALSE])), #copy of SV
index = cret$index, #indexes of sv in x
##constants in decision functions
rho = cret$rho[1:(cret$nclasses * (cret$nclasses - 1) / 2)],
##probabilites
compprob = probability,
probA = if (!probability) NULL else
cret$probA[1:(cret$nclasses * (cret$nclasses - 1) / 2)],
probB = if (!probability) NULL else
cret$probB[1:(cret$nclasses * (cret$nclasses - 1) / 2)],
sigma = if (probability) cret$sigma else NULL,
##coefficiants of sv
coefs = if (cret$nr == 0) NULL else
t(matrix(cret$coefs[1:((cret$nclasses - 1) * cret$nr)],
nrow = cret$nclasses - 1,
byrow = TRUE)),
na.action = nac
)
## cross-validation-results
# if (cross > 0){
# suppressWarnings(vgr<-split(sample(1:m,m),1:cross))
#
# ret$accuracies <- cret$cresults;
# ret$tot.accuracy <- cret$ctotal1;
# }
class (ret) <- "krein.svm"
ret
}
#' Krein Support Vector Machine
#'
#' Solves a kernelized Support Vector Machine in the case where the kernel used may not be positive semidefinite.
#'
#' @param kernelmat the kernel matrix computed for all observations
#' @param y a vector of labels
#' @param cost cost of violating the constraint
#' @param class.weights a named vector of weights for the different classes, used for asymmetric class sizes. Not all factor levels have to be supplied (default weight: 1). All components have to be named. Specifying "inverse" will choose the weights inversely proportional to the class distribution.
#' @param cross number of fold in a k-fold cross validation
#' @param probability logical indicating whether the model should allow for probability predictions (default: \code{FALSE}).
#' @param fitted logical indicating whether the fitted values should be computed and included in the model or not (default: \code{TRUE})
#' @param subset an index vector specifying the cases to be used in the training sample. (NOTE: If given, this argument must be named.)
#' @param ... additional parameters
#' @author Tullia Padellini, Francesco Palini, David Meyer. The included C++ library LIBSVM is authored by Chih-Chung Chang and Chih-Jen Lin)
#' @details This function implements the Krein Support Vector Machine solver as defined by Loosli et al. (2015).
#' The implementation of the solver is a modified version of the popular C++ library `LIBSVM`, while the connection to `R`
#' heavily relies on the `R`-package \pkg{e1701}.
#' @examples
#' ## DO NOT RUN:
#' # library(TDA)
#' # set.seed(123)
#' # foo.data = list()
#' # for(i in 1:20){
#' # foo = circleUnif(100)
#' # foo.data[[i]] = ripsDiag(foo, 1,1)$diagram}
#' # for(i in 21:40){
#' # foo = cbind(runif(100), runif(100))
#' # foo.data[[i]] = ripsDiag(foo, 1,1)$diagram
#' # }
#' # GSWkernel = gaus.kernel(foo.data, h =1, dimension = 1, q = 2)
#' # GGKclass = krein.svm(kernelmat = GSWkernel, y = rep(c(1,2), c(20,20)))
#' @return An object of class \code{krein.svm} containing the fitted model, including:
#' \describe{
#' \item{\code{SV}}{a matrix containing the Support Vectors}
#' \item{\code{index}}{index of the resulting support vectors in the data matrix}
#' \item{\code{coefs}}{a matrix containing corresponding coefficients times the training labels}
#' \item{\code{rho}}{value of the (negative) intercept}
#' }
#' @references
#' \insertRef{loosli2015learning}{kernelTDA}
#'
#' \insertRef{chang2011libsvm}{kernelTDA}
#'
#' \insertRef{dimitriadou2008misc}{kernelTDA}
#' @export
krein.svm.default <-
function (kernelmat = NULL,
y = NULL,
cost = 1,
class.weights = NULL,
cross = 0,
probability = FALSE,
fitted = TRUE,
subset,
...)
{
x = kernelmat
y <- as.factor(y)
y.p <- as.integer(y)
nclass <- length(unique(y.p))
indexes <- lapply(sort(unique(y.p)), function(kk) which(y.p == kk))
type <- 0
if(fitted) xhold <- x
ret <- list()
p <- 0
for (i in 1:(nclass-1)) {
jj <- i+1
for(j in jj:nclass) {
p <- p+1
yd <- y[c(indexes[[i]],indexes[[j]])]
yd <- droplevels(yd)
K <- x[c(indexes[[i]],indexes[[j]]), c(indexes[[i]],indexes[[j]]),drop=FALSE]
pret <- krein_svm(kernelmat = K,
y = yd,
n_classes = nclass,
cost = cost,
class.weights = NULL,
cross = 0,
probability = FALSE,
fitted = fitted,
t_levels = levels(y),
...)
ret[[p]] <- pret
}
}
class (ret) <- "krein.svm"
ret$indexes = indexes
if (fitted) {
ret$fitted <- predict(ret, newdata = xhold,
decision.values = FALSE)
}
if(cross > 1){
suppressWarnings(vgr<-split(sample(1:nrow(x),nrow(x)),1:cross))
cross.fun = function(idx){
cross.fit = krein.svm.default(kernelmat = x[-idx, -idx],
y[-idx], cost = cost,
class.weights = class.weights,...)
cross.predict = predict(cross.fit, x[idx, -idx, drop=FALSE])
return(mean(cross.predict!=y[idx]))
}
predcross = lapply(vgr, cross.fun)
ret$misclassifications = unlist(predcross)
ret$tot.misclassification = mean(unlist(predcross))
}
ret
}
predict_krein_svm <-
function (object, newdata,
decision.values = FALSE,
probability = FALSE,
...,
na.action = na.omit)
{
# if (missing(newdata))
# return(fitted(object))
#
newdata = cbind(1:nrow(newdata), newdata)
#
if (object$tot.nSV < 1)
stop("Model is empty!")
if(inherits(newdata, "Matrix")) {
loadNamespace("SparseM")
loadNamespace("Matrix")
newdata <- as(newdata, "matrix.csr")
}
if(inherits(newdata, "simple_triplet_matrix")) {
loadNamespace("SparseM")
ind <- order(newdata$i, newdata$j)
newdata <- new("matrix.csr",
ra = newdata$v[ind],
ja = newdata$j[ind],
ia = as.integer(cumsum(c(1, tabulate(newdata$i[ind])))),
dimension = c(newdata$nrow, newdata$ncol))
}
sparse <- inherits(newdata, "matrix.csr")
if (object$sparse || sparse)
loadNamespace("SparseM")
act <- NULL
if ((is.vector(newdata) && is.atomic(newdata)))
newdata <- t(t(newdata))
if (sparse)
newdata <- SparseM::t(SparseM::t(newdata))
preprocessed <- !is.null(attr(newdata, "na.action"))
rowns <- if (!is.null(rownames(newdata)))
rownames(newdata)
else
1:nrow(newdata)
if (!object$sparse) {
if (inherits(object, "krein.svm.formula")) {
if(is.null(colnames(newdata)))
colnames(newdata) <- colnames(object$SV)
newdata <- na.action(newdata)
act <- attr(newdata, "na.action")
newdata <- model.matrix(delete.response(terms(object)),
as.data.frame(newdata))
} else {
newdata <- na.action(as.matrix(newdata))
act <- attr(newdata, "na.action")
}
}
if (!is.null(act) && !preprocessed)
rowns <- rowns[-act]
if (any(object$scaled))
newdata[,object$scaled] <-
scale(newdata[,object$scaled, drop = FALSE],
center = object$x.scale$"scaled:center",
scale = object$x.scale$"scaled:scale"
)
ret <- svmpredict_R(
as.integer (decision.values),
as.integer (probability),
## model
if (object$sparse) object$SV@ra else t(object$SV),
as.integer (nrow(object$SV)), as.integer(ncol(object$SV)),
as.integer (if (object$sparse) object$SV@ia else 0),
as.integer (if (object$sparse) object$SV@ja else 0),
as.double (as.vector(object$coefs)),
as.double (object$rho),
as.integer (object$compprob),
as.double (if (object$compprob) object$probA else 0),
as.double (if (object$compprob) object$probB else 0),
as.integer (object$nclasses),
as.integer (object$tot.nSV),
as.integer (object$labels),
as.integer (object$nSV),
as.integer (object$sparse),
## parameter
as.integer (object$type),
as.integer (object$kernel),
as.integer (object$degree),
as.double (object$gamma),
as.double (object$coef0),
## test matrix
if (sparse) newdata@ra else t(newdata),
as.integer (nrow(newdata)),
as.integer (if (sparse) newdata@ia else 0),
as.integer (if (sparse) newdata@ja else 0),
as.integer (sparse),
## decision-values
ret = double(nrow(newdata)),
dec = double(nrow(newdata) * object$nclasses * (object$nclasses - 1) / 2),
prob = double(nrow(newdata) * object$nclasses)
)
ret2 <- if (is.character(object$levels)) # classification: return factors
factor (object$levels[ret$ret], levels = object$levels)
else if (object$type == 2) # one-class-classification: return TRUE/FALSE
ret$ret == 1
else if (any(object$scaled) && !is.null(object$y.scale)) # return raw values, possibly scaled back
ret$ret * object$y.scale$"scaled:scale" + object$y.scale$"scaled:center"
else
ret$ret
names(ret2) <- rowns
ret2 <- napredict(act, ret2)
if (decision.values) {
colns = c()
for (i in 1:(object$nclasses - 1))
for (j in (i + 1):object$nclasses)
colns <- c(colns,
paste(object$levels[object$labels[i]],
"/", object$levels[object$labels[j]],
sep = ""))
attr(ret2, "decision.values") <-
napredict(act,
matrix(ret$dec, nrow = nrow(newdata), byrow = TRUE,
dimnames = list(rowns, colns)
)
)
}
if (probability && object$type < 2) {
if (!object$compprob)
warning("SVM has not been trained using `probability = TRUE`, probabilities not available for predictions.")
else
attr(ret2, "probabilities") <-
napredict(act,
matrix(ret$prob, nrow = nrow(newdata), byrow = TRUE,
dimnames = list(rowns, object$levels[object$labels])
)
)
}
ret2
}
#' @export
predict.krein.svm <-
function (object, newdata,
decision.values = FALSE,
probability = FALSE,
...,
na.action = na.omit)
{
if (missing(newdata)){
return(fitted(object))
}
newnrows = nrow(newdata)
indexes <- object$indexes
nclass <- object[[1]]$nclassTOT
ylev <- object[[1]]$levelsTOT
predres <- 1:newnrows
votematrix <- matrix(0, nclass,newnrows)
p <- 0
for(i in 1:(nclass-1)){
jj <- i+1
for(j in jj:nclass){
p <- p+1
retp <- predict_krein_svm(object[[p]], newdata = newdata[, c(indexes[[i]],indexes[[j]]),drop=FALSE], decision.values = decision.values, probability = probability, ...)
# if(missing(newdata)) {
# retp <- predict_krein_svm(object[[p]], decision.values = decision.values, probability = probability, ...)
# } else {
# retp <- predict_krein_svm(object[[p]], newdata = newdata, decision.values = decision.values, probability = probability, ...)
# }
votematrix[i,retp==ylev[i]] <- votematrix[i,retp==ylev[i]] + 1
votematrix[j,retp==ylev[j]] <- votematrix[j,retp==ylev[j]] + 1
}
}
predres <- apply(votematrix, 2, which.max)
# return(votematrix)
return(factor(ylev[predres], levels = ylev))
}
#' @export
print.krein.svm <-
function (x, ...)
{
#cat("\nCall:", deparse(x[[1]]$call, 0.8 * getOption("width")), "\n", sep="\n")
cat(" krein SVM: C-classification with precomputed kernel matrix\n")
# cat(" cost: ", unlist(lapply(x, function(jj) jj$cost)), "\n")
# cat("\nNumber of Support Vectors: ", unlist(lapply(x, function(jj) jj$tot.nSV)))
# cat("\n\n")
}
#' @export
summary.krein.svm <-
function(object, ...)
structure(object, class="summary.krein.svm")
#' @export
print.summary.krein.svm <-
function (x, ...)
{
print.krein.svm(x)
# cat(" (", unlist(lapply(x, function(jj) jj$nSV)), ")\n\n")
cat("\nNumber of Classes: ", x[[1]]$nclassTOT, "\n\n")
cat("Levels:", if(is.numeric(x[[1]]$levelsTOT)) "(as integer)", "\n", x[[1]]$levelsTOT, "\n\n")
if ("tot.misclassification" %in% names(x)) {
cat(length (x$misclassifications), "-fold cross-validation on training data:\n\n", sep="")
cat("Total Misclassification:", x$tot.misclassification, "\n")
cat("Single Misclassifications:\n", x$misclassifications, "\n\n")
}
cat("\n\n")
}
#' @export
scale.data.frame <-
function(x, center = TRUE, scale = TRUE)
{
i <- sapply(x, is.numeric)
if (ncol(x[, i, drop = FALSE])) {
x[, i] <- tmp <- scale.default(x[, i, drop = FALSE], na.omit(center), na.omit(scale))
if(center || !is.logical(center))
attr(x, "scaled:center")[i] <- attr(tmp, "scaled:center")
if(scale || !is.logical(scale))
attr(x, "scaled:scale")[i] <- attr(tmp, "scaled:scale")
}
x
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.