R/cluspath.R
In condvis2: Interactive Conditional Visualization for Supervised and Unsupervised Models in Shiny

Documented in centroidPath claraPath kmeansPath medoid medoidPath pamPath

#' Constructs  tours of data space based on centers of clusters
#' 
#' @name clusPath
#' @param data A dataframe
#' @param length Path length, defaults to 10
#' @param reorder If TRUE  uses DendSer to reorder the path \code{\link[DendSer]{dser}}
#' @param conditionvars A vector of variable names. The returned tour is for this subset of variables.
#' @param maxn (pamPath only) For datasets with more than maxn rows, use maxn randomly selected rows.
#' @param cl A vector specifying cluster membership for rows of data.
#' @param ... ignored
#' @return A dataframe with the path
#' @importFrom cluster  daisy pam clara
#' @importFrom DendSer  dser
#' @examples
#' kmeansPath(mtcars,length=4)
#' pamPath(mtcars,length=4)
#' claraPath(mtcars,length=4)
#' medoidPath(mtcars,cl=rep(1:3, length.out=nrow(mtcars)))

NULL


#' @describeIn clusPath Constructs a tour of data space following  length k-means centroids
#' @export
kmeansPath<- function(data, length=10, reorder=TRUE,conditionvars=NULL,...){
  if (length(conditionvars)==0) conditionvars <- NULL
  if (length > nrow(data)) {
    warning("Pick length <= nrows")
    return(NULL)
  }
  if (!is.null(conditionvars)) data <- data[,conditionvars,drop=FALSE]
  n <- nrow(data)
  nfac <-sapply(data, is.factor)
  # if (sum(nfac) == n){
  #   print("cannot calculate kmeans path")
  #   return(NULL)
  # }

  if (sum(nfac)==0) datan <- data else datan <- data[,!nfac,drop=FALSE]

  if (sum(nfac) < ncol(data)){
  x <- scale(datan)

  means <- attr(x, "scaled:center")
  sds <- attr(x, "scaled:scale")
  } else {
    x <- NULL
    datan <- NULL
  }

  if (sum(nfac) >0){
  facs <- names(data)[nfac]
  dummylist<- vector("list", length(facs))

  for (i in seq(along=facs)){
    z <- data[[facs[i]]]
    levs <- levels(z)
    res <- matrix(0, nrow=length(z),ncol=length(levs))
    res[cbind(seq(along=z),match(z, levs))]<- 1
    dummylist[[i]] <- res
  }
  dummy <- do.call(cbind,dummylist)
    x <- cbind(x, dummy)
  }
  
  clustering <- kmeans(x, centers = length)
  centers <- clustering$centers
 
  if (reorder & nrow(centers)> 2){
    d <- dist(centers)
    o <- DendSer::dser(d)
    centers <- centers[o,,  drop = FALSE]
  }
  if (sum(nfac) < ncol(data)){
  centersn <- centers[,1:ncol(datan), drop=FALSE]
  centersn <- data.frame(sapply(seq(along=means), function(i) centersn[,i]*sds[i]+ means[i]))
  names(centersn)<- names(datan)
  result <- centersn
  } else result <- NULL

   # if (sum(nfac)>0){
   #   s <- sample(nrow(data),length)
   #   rpath<- data[s, nfac]
   #
   #   centers <- cbind(centers, rpath)
   # }

  if (sum(nfac) >0){
    if (sum(nfac) == ncol(data))
      centersf <- centers else
        centersf <- centers[,-(1:ncol(datan))]
    w <- cumsum(c(1,sapply(dummylist, ncol)))
     facc <- lapply(seq(along=w[-1]), function(i) {
      z <- centersf[, w[i]: (w[i+1]-1)]
      factor(levels(data[[facs[i]]])[apply(z,1, which.max)])
    })
    facc<- data.frame(facc)
    names(facc)<- facs
    if (!is.null(result)) result <- cbind(result, facc) else result <- facc
  }
  result
}

#' @describeIn clusPath  Constructs a tour of data space following  length \code{\link[cluster]{pam}} medoids
#' @export
pamPath<- function(data, length=10, reorder=TRUE,conditionvars=NULL,maxn=4000,...){
  if (is.numeric(maxn) && nrow(data) > maxn){
    data <- data[sample(nrow(data), maxn),]
  }
  if (length(conditionvars)==0) conditionvars <- NULL
  if (length > nrow(data)) {
    warning("Pick length <= nrows")
    return(NULL)
  }
  if (!is.null(conditionvars)) data <- data[,conditionvars,drop=FALSE]
  if (nrow(data) >= 5000) {
    print("Calculating Kmed path...")
    do.swap <- FALSE}
  else do.swap <- TRUE
  
  d <- cluster::daisy(data,stand=TRUE, warnType=FALSE)
  clustering <- cluster::pam(d, k = length,pamonce=5, do.swap=do.swap,
                             keep.diss=FALSE, keep.data=FALSE)
  centers <- data[clustering$medoids, ,drop=F]
 
  if (reorder){
    d <- cluster::daisy(centers,stand=TRUE, warnType=FALSE)
    o <- DendSer::dser(d)
    centers <- centers[o,,drop=F]
  }
  if (nrow(data) >= 5000) print("Kmed path calculated")
  centers
}

#' @describeIn clusPath  Constructs a tour of data space following  length \code{\link[cluster]{clara}} medoids
#' @export
claraPath<- function(data, length=10, reorder=TRUE,conditionvars=NULL,...){
  if (length(conditionvars)==0) conditionvars <- NULL
  # data must be numeric
  if (length > nrow(data)) {
    warning("Pick length <= nrows")
    return(NULL)
  }

  if (!is.null(conditionvars)) data <- data[,conditionvars,drop=FALSE]
  nnum <-sapply(data, is.numeric)
  if (!all(nnum)) {
    warning("All variables must be numeric for clara")
    return(NULL)
  }
  clustering <- cluster::clara(data, k = length, stand=TRUE)
  centers <- clustering$medoids
  
  if (reorder){
    d <- dist(scale(centers))
    o <- DendSer::dser(d)
    centers <- centers[o,,drop=F]
  }
  data.frame(centers)
}

#' @describeIn clusPath Returns a path visiting cluster medoids
#' @export

medoidPath<- function(data, cl,reorder=FALSE){
  clu <- unique(cl)
  centers <- data[1:length(clu),]
  for (i in clu){
    r <- which(cl==i)
    m <-medoid(data[r,])
    centers[i,] <-as.list(m)
  }
  if (reorder){
    d <- cluster::daisy(centers,stand=TRUE,warnType=FALSE)
    o <- DendSer::dser(d)
    centers <- centers[o,,drop=F]
  }
  centers
}
# medoidPath<- function(data, cl,reorder=FALSE){
#   clu <- unique(cl)
#   d <- as.matrix(cluster::daisy(data, stand=TRUE))
#   rows <- sapply(clu, function(i){ r <- which(cl==i)
#   m <- which.min(colMeans(d[r,r,drop=FALSE]))
#   r[m]
#   })
#   centers <- data[rows,,drop=FALSE]
#   if (reorder){
#     d <- cluster::daisy(centers,stand=TRUE)
#     o <- DendSer::dser(d)
#     centers <- centers[o,,drop=F]
#   }
#   centers
# }


#' @describeIn clusPath Returns a path visiting cluster centroids
#' @export

centroidPath<- function(data, cl,reorder=FALSE){
  centers <- aggregate(data,list(cl),mean)[,-1]
  if (reorder){
    d <- cluster::daisy(centers,stand=TRUE, warnType=FALSE)
    o <- DendSer::dser(d)
    centers <- centers[o,,drop=F]
  }
  centers
}


#' Finds medoid of data
#' 
#' @param data A dataframe
#' @param maxn For datasets with more than maxn rows, use maxn randomly selected rows.
#' @return A dataframe with one row, which is the medoid of the data, based on (standardised) daisy dist
#' @export
#' @examples
#' medoid(mtcars)
#' 

medoid<- function(data, maxn = 4000) {
  if (is.numeric(maxn) && nrow(data) > maxn){
    data <- data[sample(nrow(data), maxn),]
  }
  ctype <- which(sapply(data, function(v) !is.numeric(v) & ! is.factor(v)))
  for (ct in ctype) data[[ct]] <- as.factor(data[[ct]])
  
  d <- as.matrix(cluster::daisy(data,stand=TRUE, warnType=FALSE))
  w <- which.min(colMeans(d))
  data[w, ,drop=F]
}

Any scripts or data that you put into this service are public.

condvis2 documentation built on Sept. 14, 2022, 5:06 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

condvis2
Interactive Conditional Visualization for Supervised and Unsupervised Models in Shiny

R/cluspath.R
In condvis2: Interactive Conditional Visualization for Supervised and Unsupervised Models in Shiny

Defines functions medoid centroidPath medoidPath claraPath pamPath kmeansPath

Documented in centroidPath claraPath kmeansPath medoid medoidPath pamPath

Try the condvis2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

condvis2 Interactive Conditional Visualization for Supervised and Unsupervised Models in Shiny

R/cluspath.R In condvis2: Interactive Conditional Visualization for Supervised and Unsupervised Models in Shiny

Defines functions medoid centroidPath medoidPath claraPath pamPath kmeansPath

Documented in centroidPath claraPath kmeansPath medoid medoidPath pamPath

Try the condvis2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

condvis2
Interactive Conditional Visualization for Supervised and Unsupervised Models in Shiny

R/cluspath.R
In condvis2: Interactive Conditional Visualization for Supervised and Unsupervised Models in Shiny