R/orclus.R
In orclus: Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

Documented in orclus.default predict.orclus print.orclus

### subfunction for final computation of within cluster projected energies and sparsity coefficient
# compute projected new subspace and projected energy merge of two clusters
compute.final.projen <- function(i, x, act.clustering, allpts = FALSE){
  if (!allpts) { 
    clusteri    <- x[which(act.clustering$cluster == i),] # cluster i
    centpro     <- t(apply(clusteri %*% act.clustering$subspaces[[i]], 1, function(z, zen) return(z-zen), zen = t(act.clustering$centers[i,])%*%act.clustering$subspaces[[i]])) # cluster in subspace shifted by its mean 
    projen      <- sum(apply(centpro, 1, function(z) return(sum(z^2)))) / nrow(centpro)  # projected energy: average distances from center in subspace
    }
  if (allpts) { 
    cntr        <- apply(x, 2, mean)
    centpro     <- t(apply(x %*% act.clustering$subspaces[[i]], 1, function(z, zen) return(z-zen), zen = t(cntr)%*%act.clustering$subspaces[[i]])) # cluster in subspace shifted by its mean 
    projen      <- sum(apply(centpro, 1, function(z) return(sum(z^2)))) / nrow(centpro)  # projected energy: average distances from center in subspace
    }    
  return(projen)
  }


################################
### definition of orclus method:
################################

orclus <- function (x, ...) 
    UseMethod("orclus")


###################################
### main orclus.default - function:
###################################

orclus.default <- function(x, k, l, k0, a = 0.5, inner.loops = 1, verbose = TRUE, ...){
  # x:            data input matrix
  # k:            number of clusters
  # l:            final subspace dimension
  # k0:           number of initial clusters
  # a:            reduction rate for subspace dimension over the iterations
  # inner.loops:  number of iterations for each pair of 'number of clusters' and 'subspace diension' during the iteration process
  # verbose:      logical specifying whether iteration process should be printed
  
  if (is.data.frame(x)) x <- as.matrix(x)
  
  # original dimensions
  d <- ncol(x)
  n <- nrow(x)
  
  # some preliminary error checks
  if (d < l) stop("Subspace dimension should be specified lower than the input dimension!")
  if (d == l){warning("Specified subspace dimension equal to original original dimension. kmeans() applied instead.")
              return(kmeans(x, centers = k))}
  if (d == 1) stop("For dimension of one no subspaces can be computed!")
  if (l < 1) stop("Subspace dimension l should be at least 1!")
  if (n == l) stop("No clustering is necessary if data consists of only one observation!")
  if (n < k*l) stop("Data must consist of more than k*l observations to compute dimensionality reduction for the clusters!")
  if (n < k) stop("Ther can't be more clusters than objects!")
  if (k == 1) stop("Clustering is not meaningful for only one cluster! For dimensionality reduction use principal component analysis.")
  if (!is.numeric(x)) stop("All input variables must be numeric!")
  if (a > 1 | a < 0) stop("Parameter a must be from the interval (0,1)!")
  if (k0 <= k) stop("For the initial number of clusters k0 > k is supposed!")
  
  # reduction factor for the number of clusters
  b <- exp(-log(d/l)*log(1/a)/log(k0/k))
  niters <- 1 + ceiling(log(k/k0, a)) 
  
  # initialization 
  act.d <- d  # current dimension and number of clusters
  act.k <- k0
  kc    <- k0 # initialization of new number of clusters and subspace dimension for the first iteration to k and l 
  lc    <- d

  act.clustering <- list()
  
  # begin iteration 
  for(iter in 1:niters){ 
    if (iter == 1 | act.d > l | act.k > k) {
       for(ilp in 1:inner.loops){ 
         if (verbose & ilp == 1) cat("iteration         :",iter,"\n")
              
         #1) assign clusters:
         if (iter == 1 & ilp == 1) {     
            if (verbose) cat("Initialization with", act.k, "clusters \n")
            clusinit                <- kmeans(x, max(k,round(kc)))
            act.clustering$cluster  <- clusinit$cluster
            act.clustering$centers  <- clusinit$centers
            act.clustering$size     <- clusinit$size 
            remove(clusinit)
            }
         if (iter > 1){
            clusupdate <- reassign(x, act.clustering)
            act.clustering$cluster   <- clusupdate$cluster
            act.clustering$centers   <- clusupdate$centers
            act.clustering$size      <- clusupdate$size
            act.clustering$subspaces <- clusupdate$subspaces
            remove(clusupdate)         
            }
             
         #if (verbose & ilp == 1) cat("current ratio (dimension) :", lc, "\n") 
         
         #2) find new subspaces
         act.clustering$subspaces <- findvectors(x, act.clustering, dimen = max(l,round(lc)))
  
         if(is.vector(act.clustering$subspaces[[1]])) act.d <- 1
         if(is.matrix(act.clustering$subspaces[[1]])) act.d <- dim(act.clustering$subspaces[[1]])[2]
         if (verbose & ilp == 1) cat("Actual Subspace dimension :", act.d, "\n") 
         #if (verbose) cat("Loop                      :", ilp,"\n")
         } 
        
       #3) set new dimension and cluster number for next iteration
       knew <- kc * a
       lnew <- lc * b
       
       #4) merge clusters
       #if (verbose) cat("current ratio (cluster)   :", knew, "\n") 
       act.clustering <- clmerge(x, act.clustering, max(k, round(knew)))     

       act.k <- length(table(act.clustering$cluster))
       kc    <- knew
       lc    <- lnew
       if (verbose) cat("New number of clusters    :", act.k, "\n\n")
       }
  }
  # final reassignment of the clusters
  if (verbose) cat("Final reassigment...\n")   
  clusupdate <- reassign(x, act.clustering)
  act.clustering$cluster   <- clusupdate$cluster
  act.clustering$centers   <- clusupdate$centers
  act.clustering$size      <- clusupdate$size
  act.clustering$subspaces <- clusupdate$subspaces
  remove(clusupdate) 
  
  # check if due to removal of empty clusters the final cluster size is lower than the prespecified k
  if (length(act.clustering$size) < k){ 
    k <- length(act.clustering$size)
    warning("Due to empty classed after merge less than k classes result!")
    cat("New cluster number due to empty classes:", k, "\n\n")
    }
        
  # compute projected energy per cluster and ad it to result list ...this corresponds to withinss from kmeans
  within.projens <- sapply(1:k, compute.final.projen, x = x, act.clustering = act.clustering, allpts = FALSE)

  # compute sparsity coefficient and add it to result list 
  projens.wholedata     <- sapply(1:k, compute.final.projen, x = x, act.clustering = act.clustering, allpts = TRUE)
  sparsity.coefficient  <- mean(within.projens / projens.wholedata)

  act.clustering$subspace.dimension   <- l
  act.clustering$within.projens       <- within.projens
  act.clustering$sparsity.coefficient <- sparsity.coefficient
  act.clustering$orclus.call <- match.call()
  act.clustering$orclus.call[[1]] <- as.name("orclus")
  
  # create result-object of class orclus
  class(act.clustering) <- "orclus"
  return(act.clustering)   
  }



########################
### predict function
########################

predict.orclus <- function(object, newdata, ...){
  # several error checks 
  if (class(object) != "orclus") stop("Object must be of class oclus!")
  if(is.data.frame(newdata)) newdata <- as.matrix(newdata)
  if(!is.matrix(newdata)) stop("Newdata must be of type matrix or data frame!")
  if(dim(object$centers)[2] != dim(newdata)[2]) stop("Newdata must be of same dimension as the explanatory input variables data set!")
  
  clids         <- 1:length(table(object$cluster))

  # compute distances of the observations to each clusterin the correspondin subspaces
  comp.dists         <- function(obs, clids, object) {
    dists   <- sapply(clids, compute.dist, obs = obs, act.clustering = object)
    return(dists)
    }
  distances <- t(apply(newdata, 1, comp.dists, clids = clids, object = object))
  colnames(distances) <- clids

  # assign clusters with minimal distance   
  assign.cluster    <- function(distances.i) {
    cl <- which.min(distances.i)
    if (length(cl) > 1) cl <- sample(cl, 1) # random sampling in case of non-uniqueness 
    return(cl)
    }
  cls <- apply(distances, 1, assign.cluster)
    
  # create results object
  res <- list()
  res$distances <- distances
  res$cluster   <- cls
  return(res)
  }


########################
### print function
########################

print.orclus <- function(x, ...){
  cat("\nresult from arbitrary ORiented CLUStering\n\n")
  cat("Call:\n")
  print(x$orclus.call)

  cat("\nDimension of subspaces:", x$subspace.dimension, "\n")
  
  cat("\nCluster assignment result: \n")
  print(x$cluster)
  cat("\n")

  cat("\nCorresponding subspaces of the clusters: \n")
  print(x$subspaces)
  cat("\n")

  cat("\nCluster sparsity coefficient: \n")
  print(x$sparsity.coefficient)
  cat("\n\n")
                                                                                    
  invisible(x)
  }

Any scripts or data that you put into this service are public.

orclus documentation built on May 2, 2019, 10:23 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

orclus
Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

R/orclus.R
In orclus: Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

Defines functions compute.final.projen orclus.default predict.orclus print.orclus

Documented in orclus.default predict.orclus print.orclus

Try the orclus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

orclus Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

R/orclus.R In orclus: Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

Defines functions compute.final.projen orclus.default predict.orclus print.orclus

Documented in orclus.default predict.orclus print.orclus

Try the orclus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

orclus
Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation

R/orclus.R
In orclus: Subspace Clustering Based on Arbitrarily Oriented Projected Cluster Generation