R/samples_info_table.R

Defines functions info_samples

# From Info_Sample_t we can calculate how many times a sample was associated with a label.
# Thefore, the table Info_Samples_Cluster is created.
#
# id_sample : Identificator of each sample
#
# Original_Label: Original label of each sample, given from samples set.
#
# Neuron_Label: Label of the neuron that the sample was allocated.
#
# Frequency: the amount of times a sample was associated with a label x
#
# Percentage: Percentage of times a sample was associated with a label x

# -----------------------------------------------------------------------
# --------------------------- INFO SAMPLES ------------------------------
# -----------------------------------------------------------------------
info_samples <- function(samples_iteration.tb){

  j = 1
  i=1
  samples_cluster_t.tb <- tibble()
  info_samples.tb <- dplyr::tibble()
  for (j in 1:max(samples_iteration.tb$id_sample))
  {
    #filter the samples whose id is j
    count_sample <-
      dplyr::filter(samples_iteration.tb,
                    samples_iteration.tb$id_sample == j)
    majority_label <-
      sort(table(count_sample$neuron_label), decreasing = TRUE)[1]

    name_majority_label <- names(majority_label)

    #add into sample whose id is j the column cluster_label (majority_label)
    count_sample$cluster_label <- name_majority_label

    #criar tabela com todas as amostras e o majoritario label
    samples_cluster_t.tb <- rbind(samples_cluster_t.tb, count_sample)
  }

  #summary samples_cluster_t.tb
  for (i in 1:max(samples_iteration.tb$id_sample))
  {
    #get samples with id = i from samples_cluster_t.tb
    filter_samples <-
      dplyr::filter (samples_cluster_t.tb, samples_cluster_t.tb$id_sample == i)

    #count the number of labels per class (summary in a table)
    count_labels_id <-
      sort(table(filter_samples$neuron_label), decreasing = TRUE)

    summarized_samples.tb <- tibble::as_tibble(list(
      id_sample= as.integer(i),
      original_label= as.character(unique(filter_samples$original_label)),
      neuron_label = names(count_labels_id),
      frequency = as.integer(count_labels_id),
      percentage = as.numeric(prop.table(count_labels_id) * 100)
    ))

    #Each sample has id,original_label, summarized neuron_label
    # with amount e percertagem of neuron_label.
    summarized_samples.tb <-
      dplyr::filter(summarized_samples.tb, summarized_samples.tb$frequency > 0)

    info_samples.tb <- rbind(info_samples.tb, summarized_samples.tb)

  }

  #get id and cluster_label of each sample
  #this tale should have the same size of input data samples.
  samples_id_cluster_label <-
    unique(dplyr::select(samples_cluster_t.tb, id_sample, cluster_label))

  #this table contains summarized samples and
  #cluster_label (defined by majority neuron_label)

  info_sample_cluster.tb <-
    samples_id_cluster_label %>% dplyr::inner_join(info_samples.tb)

  info_sample_cluster.tb <-
    dplyr::select(info_sample_cluster.tb,
                  id_sample,
                  original_label,
                  neuron_label,
                  frequency,
                  percentage,
                  cluster_label)


  # info_samples_tables <-  structure(list(
  #   info_samples_Cluster = info_samples_complete
  # ),
  # class = "SITSSA")

  return(info_sample_cluster.tb)
}
lorenalves/SITSSA documentation built on May 20, 2019, 11:59 a.m.