R/orclass.R
In orclus: Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

Documented in orclass.default orclass.formula predict.orclass print.orclass

################################
### definition of orclass method:
################################

orclass <- function (x, ...) 
    UseMethod("orclass")

###################################
### main orclass.default - function:
###################################

orclass.default <- function(x, grouping, k, l, k0, a = 0.5, prior = NULL, inner.loops = 1, predict.train = "nearest", verbose = TRUE, ...){
  # x:            data input matrix
  # grouping      vector of class labels
  # k:            number of clusters
  # l:            final subspace dimension
  # k0:           number of initial clusters
  # a:            reduction rate for subspace dimension over the iterations
  # prior         class prior probabilities
  # inner.loops:  number of iterations for each pair of 'number of clusters' and 'subspace diension' during the iteration process
  # predict.train: "nearest" / "fuzzywts" (type) if prediction of training data is desired
  # verbose:      logical specifying whether iteration process should be printed

  # compute (relative) class frequencies and priors
  relfreqs  <- table(grouping)
  relpriors <- relfreqs / sum(relfreqs)
  if (is.null(prior)) prior <- relpriors
  if(length(prior) != length(relfreqs)) stop("Length of prior does not match number of classes in grouping.")
  if(any(prior == 0)) stop("Any priors of 0 or empty classes!")
  
  # compute orclus model
  orclus.res <- orclus(x=x, k=k, l=l, k0=k0, a = a, inner.loops=inner.loops, verbose=verbose)# ,...)

  ### compute cluster.posteriors
  xtab <- table(orclus.res$cluster, grouping)
  
  for(i in 1: length(prior)) xtab[,i] <- xtab[,i] * prior[i] / relpriors[i]
  cluster.posteriors <- xtab / rowSums(xtab) 
  
  # compute cluster frequencies
  cluster.freqs <- table(orclus.res$cluster)
  rel.clusfreqs <- cluster.freqs/sum(cluster.freqs) 
  # prior correction of cluster frequencies
  prior.corrected.cluster.freqs <- rowSums(xtab)
  rel.pricorr.clusfreqs <- prior.corrected.cluster.freqs/sum(prior.corrected.cluster.freqs)
  
  ### compute criteria of discriminatory power
  # compute train.error
  train.error <- sum(apply(cluster.posteriors, 1, function(z) return(1-max(z))) *  rel.pricorr.clusfreqs)
  # compute entropy 
  cl.entros <- apply(cluster.posteriors, 1, function(z) return( -sum(log2(z[which(z !=0)]) * z[which(z !=0)]) ))
  entro <- sum(cl.entros * rel.pricorr.clusfreqs)
  # compute gini
  gini <- sum((1-rowSums(cluster.posteriors^2)) *  rel.pricorr.clusfreqs)
  # compute chisq
  chisq <- chisq.test(xtab)$p.value # note: xtab is already prior corrected
  
  # summarize criteria in one vector
  sumcrit <- c(train.error = train.error, entropy = entro, gini = gini, chisquare.p = chisq)
  
  # call
  cl <- match.call()
  cl[[1]] <- as.name("orclass")
  
  # create result.list
  orclass.res <- list(orclus.res = orclus.res, cluster.posteriors = cluster.posteriors 
                      , cluster.priors = rel.pricorr.clusfreqs, purity = sumcrit 
                      , prior = prior, predict.train = NULL, orclass.call = cl
                      )
  
  # create result-object of class orclus
  class(orclass.res) <- "orclass"
  
  # training prediction, if specified
  if (predict.train == "nearest") orclass.res$predict.train <- predict(orclass.res, x, type = "nearest")
  if (predict.train == "fuzzywts") orclass.res$predict.train <- predict(orclass.res, x, type = "fuzzywts")
  
  return(orclass.res)   
  }


##################################
### orclass formula interface
##################################

orclass.formula <- function(formula, data = NULL, ...)
{
  m <- match.call(expand.dots = FALSE)
  if (is.matrix(eval.parent(m$data))) 
    m$data <- as.data.frame(data)
  m$... <- NULL
  m[[1]] <- as.name("model.frame")
  m <- eval.parent(m)
  Terms <- attr(m, "terms")
  grouping <- model.response(m)
  x <- model.matrix(Terms, m)
  #x <- model.frame(Terms, m)
  xvars <- as.character(attr(Terms, "variables"))[-1]
  if ((yvar <- attr(Terms, "response")) > 0) 
    xvars <- xvars[-yvar]
  xlev <- if (length(xvars) > 0) {
    xlev <- lapply(m[xvars], levels)
    xlev[!sapply(xlev, is.null)]
  }
  xint <- match("(Intercept)", colnames(x), nomatch = 0)
  if (xint > 0) 
    x <- x[, -xint, drop = FALSE]
  res <- orclass(x, grouping, ...)
  res$terms <- Terms
  res$contrasts <- attr(x, "contrasts")
  res$xlevels <- xlev
  attr(res, "na.message") <- attr(m, "na.message")
  if (!is.null(attr(m, "na.action"))) 
    res$na.action <- attr(m, "na.action")
  res
}



########################
### predict function
########################

predict.orclass <- function(object, newdata, type = "nearest", ...){
  # several error checks 
  if (class(object) != "orclass") stop("Object must be of class orclass!")
  if(is.data.frame(newdata)) newdata <- as.matrix(newdata)
  if(!is.matrix(newdata)) stop("Newdata must be of type matrix or data frame!")
  if(dim(object$orclus.res$centers)[2] != dim(newdata)[2]) stop("Newdata must be of same dimension as the explanatory input variables data set!")
  
  predicted.clusters <- predict(object$orclus.res, newdata)
  
  # assign posteriors
  if (type == "nearest"){ 
    posteriors <- object$cluster.posteriors[predicted.clusters$cluster,]
    }
  
  if (type == "fuzzywts"){
     # compute cluster membership values (for m=2) [coloumn wise]
     funx <- function(x) return(1/sum((x[1] * x[2:length(x)])^2))
     i <- 1
     dummy <- cbind(predicted.clusters$distances[,i], 1/predicted.clusters$distances)
     memberships.i <- apply(dummy, 1, funx)
     memberships <- memberships.i
     for(i in 2:ncol(predicted.clusters$distances)){
       dummy <- cbind(predicted.clusters$distances[,i], 1/predicted.clusters$distances)
       memberships.i <- apply(dummy, 1, funx)
       memberships <- cbind(memberships, memberships.i)       
      }
    # weight clusterwise posteriors with cluster memberships
    posteriors <- memberships %*% object$cluster.posteriors
    } 
  
  # assign classes
  rownames(posteriors) <- rownames(newdata)
  # without randomization (use first in case of multiple maxima): classes <- colnames(posteriors)[apply(posteriors, 1, which.max)]
  # randomization in case of several maxima as which.max returns first maximum
  class.obs <- function(z){res <- which(z == max(z)); if (length(res)>1) res <- sample(res,1); return(res)} 
  classes <- colnames(posteriors)[apply(posteriors, 1, class.obs)]
  
  # create results object
  res <- list()
  res$class       <- classes
  res$posterior   <- posteriors
  res$cluster     <- predicted.clusters$cluster
  res$distances   <- predicted.clusters$distances
  return(res)
  }



########################
### print function
########################

print.orclass <- function(x, ...){
  cat("\nOrclass results\n\n")
  cat("Call:\n")
  print(x$orclass.call)

  cat("\nDimension of subspaces:", x$orclus.res$subspace.dimension, "\n")
  cat("\nNumber of subgroups   :", nrow(x$orclus.res$center), "\n")
  
  cat("\nCluster priors        :", x$cluster.priors, "\n")
  cat("\nClass priors          :", x$prior, "\n")
  cat("\nCluster posteriors    :\n")
  print(x$cluster.posteriors)
  
  cat("\nCluster sparsity coefficient: ", x$orclus.res$sparsity.coefficient, "\n")
  cat("\nCluster purity :\n")
  print(x$purity)
  cat("\n\n")
  
  invisible(x)
  }

Any scripts or data that you put into this service are public.

orclus documentation built on May 2, 2019, 10:23 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

orclus
Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

R/orclass.R
In orclus: Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

Defines functions orclass.default orclass.formula predict.orclass print.orclass

Documented in orclass.default orclass.formula predict.orclass print.orclass

Try the orclus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

orclus Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

R/orclass.R In orclus: Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

Defines functions orclass.default orclass.formula predict.orclass print.orclass

Documented in orclass.default orclass.formula predict.orclass print.orclass

Try the orclus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

orclus
Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

R/orclass.R
In orclus: Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation