R/plot.R

#'Plotting Subspace Clusterings
#'
#'@description
#'Plotting for Subspace clusterings as generated by the package \emph{subspace}.
#'
#'
#'
#'Generates a 2d-scatterplot with interactive controls to select the dimensions that should be plotted.\cr
#'            This visualization is created using the ggvis package and is therefore also compatible with shiny.
#'
#'
#'           
#'@param x an S3-Object of type \emph{subspace_clustering} as generated by any of the functions of the \emph{subspace} package
#'@param data The original data matrix on which the clustering was performed.
#'@param color_by a parameter indicating how a point that is in multiple clusters should be colored. 
#'  If "mix" is selected, the point will be colored as a mixture of the colors of both of the clusters that the point is in.
#'  If "any" is selected, a random color is selected from the colors of all the clusters that the point is in.
#'@param standardcolors a vector of strings representing HTML-Colors that will be used to color the points by cluster assignment. 
#'Noise will be colored with the last color in the vector.
#'@param tooltip_on decides if tooltips should be shown on "hover" or on "click"
#'@param ... this is passed on to ggvis::layer_points and can be used to change, for example the size of the points
#'
#'@note When passing ellipsis parameters, the ":=" syntax from ggvis may get in your way, but you can work around this by
#'      manually creating a props object as seen in the example.
#'      
#'      
#'@examples
#' #Load the example dataset for this package
#' data("subspace_dataset")
#' #Load the true clustering for this dataset
#' path_to_clustering <- paste(path.package("subspace"),"/extdata/subspace_dataset.true",sep="")
#' clustering <- clustering_from_file(file_path=path_to_clustering)
#' #also generate a clustering with one of the algorithms
#' clustering2 <- CLIQUE(subspace_dataset,tau=0.2)
#' 
#' #now plot the generated clustering
#' plot(clustering2,subspace_dataset)
#' #plot the true clustering with small points
#' plot(clustering,subspace_dataset,size=0.1)
#' 
#' #Now plot the points with a different shape. 
#' #This requires the workaround that was discussed in "Notes"
#' p <- ggvis::prop(property="shape",x="cross")
#' plot(clustering,subspace_dataset,props=p)
#' 
#' 
#'@return a ggvis object. If the return value is not used, a plot will be shown, but the returned plot can also be altered using ggvis
#'@aliases plot
#'@method plot subspace_clustering
#'@export
#'@import ggvis
plot.subspace_clustering <- function(x,
                                     data,
                                     color_by="mix",
                                     standardcolors=c("#1F77B4","#FF7F0E","#2CA02C","#D62728","#9467BD","#8C564B","#E377C2","#7F7F7F","#BCBD22","#17BECF","#000000"),
                                     tooltip_on="hover",
                                     ...
                                     ) {
  #These two lines are just boilerplate code to silence some NOTEs that R CMD
  #check produces because it is nor familiar with some of the peculiarities of
  #ggvis
  key <- NULL
  fill <- NULL
  
  coloringvector <- to_coloring_vector(clustering=x,len=nrow(data),color_by=color_by,standardcolors=standardcolors)
  
  
  #add the clustering assignment to the data frame so that the points can be colored in.
  newdata <- cbind(data,coloringvector)
  #We would also like to make an id-column, so that we can display the row number of a point in the
  #Tooltip, but we should make sure the name "id" for a column is not taken:
  id_name <- "id"
  while(id_name %in% names(newdata)) {
    id_name <- paste(sample(as.character(0:20)),collapse="")
  }
  newdata[[id_name]] <- 1:nrow(newdata)
  
  #Make interactive controls to select which dimensions of the data should be plotted
  first_select  <- ggvis::input_select(names(data),map=as.name,label="Select X axis")
  second_select <- ggvis::input_select(names(data),selected=names(data)[2],map=as.name,label="Select Y Axis")
    
  #Create the plot. The "%>%" operator is a kind of pipe, provided originally by the magrittr package
  newdata %>%
  ggvis::ggvis(first_select,second_select,key:=as.name(id_name)) %>%
  ggvis::layer_points(fill:=~coloringvector,...) %>%
  ggvis::add_tooltip(html=function(lis){
    #Show a tooltip for each point with information about the data point, including
    #its row number and its cluster assignment, separated by newlines.
    nl <- "<br>"
    clusters <- which(sapply(x,function(cluster){lis[[1]]%in%cluster$objects}))
    paste("Row Number:",lis[[1]],nl,
          "Clusters:",paste(clusters,collapse=", "),nl,
          "X:", lis[[2]],nl,
          "Y:",lis[[3]])  
  },on=tooltip_on) %>%
  ggvis::add_axis("x",title="X") %>%
  ggvis::add_axis("y",title="Y")
}

to_coloring_vector <- function(clustering,len,color_by,standardcolors) {
  if(!"subspace_clustering"%in%class(clustering)) {
    print("object passed to 'to_coloring_vector' is not a subspace_clustering and therefore no coloring vector
          could be created.")
    return(NULL)
  }
  if(color_by=="mix") {
    coloringvector <- list_assignments_by_object(clustering,len=len)
    coloringvector <- lapply(coloringvector,function(numbers){sapply(numbers,function(num){get_standard_color_for_number(num,standardcolors)})})
    coloringvector <- sapply(coloringvector,mix)
  } 
  else if(color_by=="any") {
    coloringvector <- list_assignments_by_object(clustering,len=len)
    coloringvector <- sapply(coloringvector,function(vec){vec[1]})
    coloringvector <- sapply(coloringvector,function(num){get_standard_color_for_number(num,standardcolors)})
  } else if (color_by=="number") {
    coloringvector <- list_assignments_by_object(clustering,len=len)
    coloringvector <- sapply(coloringvector,function(vec){vec[1]})
  } else {
    print("coloring method")
    print(color_by)
    print("is not implemented")
    return(NULL)
  }
  return(coloringvector)
}

list_assignments_by_object <- function(clustering,len) {
  #Res becomes a list where res[[n]] is vector of the numbers of the clusters
  #To which the i-th object is assigned. 
  res <- lapply(1:len,function(number){
    which(sapply(clustering,function(elem){number %in% elem$objects}))
  })
  #Outlier objects (those not assigned to any clusters) get assigned to a special 'cluster 0'
  res[sapply(res,function(vec){length(vec)==0})] <- 0
  return(res)
}

#Gets the HTML String for the num-th standard color
get_standard_color_for_number <- function(num,standardcolors) {
  if(num %in% 1:length(standardcolors)){
    return(standardcolors[num])
  } else if (num > length(standardcolors)) {
    return(get_standard_color_for_number(num-length(standardcolors),standardcolors))
  } else {
    return(get_standard_color_for_number(num+length(standardcolors),standardcolors))
  }
}

#These two functions implement the color mixing that is necessary for coloring points correctly.
#mix takes a vector of HTML Colors and mixes them together, weighting each color equally, e.g. mixing equal
#parts red blue and green always gives you grey and the result does not depend on the order of the vector.
mix <- function(vec) {
  if(length(vec)==1){
    return(vec)
  } else {
    colorspace::hex(list_reduce_wt(f=function(a,b,wt){colorspace::mixcolor(alpha=wt,a,b)},lis=lapply(vec,colorspace::hex2RGB)))
  }
}
#Utility function for the mix function.
list_reduce_wt <- function(lis,f) {
  cur <- lis[[1]]
  for (i in 2:length(lis)) {
    cur <- f(cur,lis[[i]],wt=1/i)
  }
  return(cur)
}

#This Function used to be in the package during development but is now deprecated
# overview <- function(clustering,data) {
#   coloring_vector <- to_coloring_vector(clustering,nrow(data),color_by="number")
#   plot(data,col=coloring_vector)
# }

# boilerplate code to prevent R CMD check from complaining about the absence of
# a global function definition
`:=` <- function(x, value) {
  stop("You are getting this error message due to a bug in the package 'subspace'. Please contact its maintainer.", call. = FALSE)
}

Try the subspace package in your browser

Any scripts or data that you put into this service are public.

subspace documentation built on May 2, 2019, 11:11 a.m.