R/Metrics_by_cluster.R

#
# Confusion_by_class:  Returns the percentage of mixture between the class Cx and other classes.
#
# Confusion_by_Cluster_Label: Returns the percentage of mixture between the class Cx and other
# classes, from Cluster_label, that correspond the same amount of original set samples.
#
# Confusion_by_Neuron_Label: Returns the percentage of mixture between class Cx and other classes
# from Neuron_Label of T iterations, then the amount of samples corresponds to (amount of samples x T

confusion_by_cluster <- function (info_sample_cluster.tb)
{
  #get only id, original_label and cluster_label
  temp.data.tb<-unique(dplyr::select(info_sample_cluster.tb, id_sample, original_label, cluster_label))


  #get label that no have cluster
  no_cluster<-dplyr::setdiff(temp.data.tb$original_label, temp.data.tb$cluster_label)

  confusion.matrix<-table(temp.data.tb$original_label, temp.data.tb$cluster_label)

  if (length(no_cluster)>0){

    #number of class that dont have cluster
    size_vector<-length(no_cluster)

    #Add columns in confusion matrix
    sv=1
    for(sv in 1:size_vector)
    {
      #position to add column in confusion matrix
      positon_to_add<-which(Label_table==no_cluster[sv])

      #trasform in df.array to add a column
      cf<-as.data.frame.array(confusion.matrix)

      #add a column in same position of line
      cf<-add_column(cf, d = 0, .after =positon_to_add-1 )

      #rename de column to name of cluster
      names(cf)[positon_to_add]<-no_cluster[sv]
      confusion.matrix<-as.table(as.matrix(cf))

    }

  }

  #Add the total number of samples in table
  confusion.matrix.tb <- addmargins(confusion.matrix, FUN = list(Total = sum), quiet = TRUE)

  #get dimensions (rows and col)

  #represents the original classes of samples
  dim_row <-dim(confusion.matrix.tb)[1]

  #represents clusters
  dim_col <-dim(confusion.matrix.tb)[2]

  #get the names of classes (original labels from samples)
  Label_table<-rownames(confusion.matrix.tb)[1:dim_row-1]

  Mix_class <- dplyr::tibble()
  d = 1
  for (d in 1:(dim_row - 1))
  {
    #sum the samples of original_label "d" by cluster.
    #each column represents the cluster where the sample was allocated
    current_row <- confusion.matrix.tb[d, 1:dim_col-1]

    #get the value total
    current_row_Total <- confusion.matrix.tb[d, dim_col]

    current_class_ambiguity <-
      tibble::as.tibble(list(
        id_class= as.integer(d),
        original_class=  Label_table[d],
        cluster = names(current_row),
        mixture_percentage = as.numeric((current_row / current_row_Total) *
                                        100)
      ))

    #sort the table in decending order
    current_class_ambiguity<- dplyr::arrange(current_class_ambiguity,
                                      desc(current_class_ambiguity$mixture_percentage))

    #remove lines that mix_percentege is zero
    current_class_ambiguity <-
      dplyr::filter(current_class_ambiguity,
                    current_class_ambiguity$mixture_percentage > 0)


    Mix_class<-rbind(Mix_class,current_class_ambiguity)

  }

  info_confusion_matrix<-caret::confusionMatrix(confusion.matrix)

  confusion <-  structure(list(
    confusion_by_cluster = Mix_class,
    summary_confusionMatrix = info_confusion_matrix
      ),
  class = "SITSSA")


}

plot_confusion_by_class <- function (Mix_class, class="All")
{
  if (!(class=="All")){

    Mix_class<-filter(Mix_class, Mix_class$Original_Class == class)

  }




  p<-ggplot() + geom_bar(aes(y = Mix_class$Percentage_Class, x = Mix_class$Original_Class, fill = Mix_class$Neuron_Label),
                         data = Mix_class,
                         stat = "identity", position = position_dodge()) + theme_minimal()
  p + scale_fill_manual(
    values = c(
      "#BDB76B",
      "#EEE8AA",
      "#EEDD82",
      "#B0E2FF",
      "#5CACEE",
      "#008B00",
      "#4682B4",
      "#98FB98",
      "#D2691E",
      "#F5DEB3",
      "#F4A460",
      "#FFFF00"
    )
  )

  return(p)



}
lorenalves/SITSSA documentation built on May 20, 2019, 11:59 a.m.