Onassis.R
In Onassis: OnASSIs Ontology Annotation and Semantic SImilarity software

## ----imports, echo=FALSE,eval=TRUE, message=FALSE, warning=FALSE--------------
library(Onassis)
library(DT)
library(gplots)
library(org.Hs.eg.db)
library(kableExtra)
library(Rtsne)
library(dendextend)
library(clusteval)
library(ggplot2)
library(ggfortify)


## ----installing_onassis, echo=TRUE, eval=FALSE--------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("Onassis")

## ----load_onasssis, echo=TRUE, eval=TRUE--------------------------------------
library(Onassis)

## ----connectTodb, echo=TRUE,eval=FALSE----------------------------------------
#  
#  require('GEOmetadb')
#  
#  ## Running this function might take some time if the database (6.8GB) has to be downloaded.
#  geo_con <- connectToGEODB(download=TRUE)
#  
#  #Showing the experiment types available in GEO
#  experiments <- experiment_types(geo_con)
#  
#  #Showing the organism types available in GEO
#  species <- organism_types(geo_con)
#  
#  #Retrieving Human gene expression metadata, knowing the GEO platform identifier, e.g. the Affymetrix Human Genome U133 Plus 2.0 Array
#  expression <- getGEOMetadata(geo_con, experiment_type='Expression profiling by array', gpl='GPL570')
#  
#  #Retrieving the metadata associated to experiment type "Methylation profiling by high througput sequencing"
#  meth_metadata <- getGEOMetadata(geo_con, experiment_type='Methylation profiling by high throughput sequencing', organism = 'Homo sapiens')

## ----experimentTypesshow, echo=FALSE, eval=TRUE-------------------------------
experiments <- readRDS(system.file('extdata', 'vignette_data', 'experiment_types.rds', package='Onassis'))

knitr::kable(as.data.frame(experiments[1:10]), col.names = c('experiments')) %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "300px", height = "200px")

## ----speciesShow, echo=FALSE,eval=TRUE----------------------------------------
species <- readRDS(system.file('extdata', 'vignette_data', 'organisms.rds', package='Onassis'))
knitr::kable(as.data.frame(species[1:10]), col.names=c('species')) %>%
              kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "300px", height = "200px")

## ----loadgeoMetadata, echo=TRUE, eval=TRUE------------------------------------
meth_metadata <- readRDS(system.file('extdata', 'vignette_data', 'GEOmethylation.rds', package='Onassis'))

## ----printmeta, echo=FALSE,eval=TRUE------------------------------------------

methylation_tmp <- meth_metadata
methylation_tmp$experiment_summary <- sapply(methylation_tmp$experiment_summary, function(x) substr(x, 1, 50))
knitr::kable(methylation_tmp[1:10,], 
              caption = 'GEOmetadb metadata for Methylation profiling by high throughput sequencing (only the first 10 entries are shown).') %>%
              kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "300px")


## ----connectSRA, echo=TRUE,eval=FALSE-----------------------------------------
#  # Optional download of SRAdb and connection to the corresponding sqlite file
#  require(SRAdb)
#  sqliteFileName <- '/pathto/SRAdb.sqlite'
#  sra_con <- dbConnect(SQLite(), sqliteFileName)
#  
#  # Query for the ChIP-Seq experiments contained in GEO for human samples
#  library_strategy <- 'ChIP-Seq' #ChIP-Seq data
#  library_source='GENOMIC'
#  taxon_id=9606 #Human samples
#  center_name='GEO' #Data from GEO
#  
#  # Query to the sample table
#  samples_query <- paste0("select sample_accession, description, sample_attribute, sample_url_link from sample where taxon_id='", taxon_id, "' and sample_accession IS NOT NULL", " and center_name='", center_name, "'"  )
#  
#  samples_df <- dbGetQuery(sra_con, samples_query)
#  samples <- unique(as.character(as.vector(samples_df[, 1])))
#  
#  experiment_query <- paste0("select experiment_accession, center_name, title, sample_accession, sample_name, experiment_alias, library_strategy, library_layout, experiment_url_link, experiment_attribute from experiment where library_strategy='", library_strategy, "'" , " and library_source ='", library_source,"' ", " and center_name='", center_name, "'" )
#  experiment_df <- dbGetQuery(sra_con, experiment_query)
#  
#  #Merging the columns from the sample and the experiment table
#  experiment_df <- merge(experiment_df, samples_df, by = "sample_accession")
#  
#  # Replacing the '_' character with white spaces
#  experiment_df$sample_name <- sapply(experiment_df$sample_name, function(value) {gsub("_", " ", value)})
#  experiment_df$experiment_alias <- sapply(experiment_df$experiment_alias, function(value) {gsub("_", " ", value)})
#  
#  sra_chip_seq <- experiment_df

## ----readCHIP, echo=TRUE, eval=TRUE-------------------------------------------
sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds',  package='Onassis'))

## ----printchromatinIP, echo=FALSE,eval=TRUE-----------------------------------

knitr::kable(head(sra_chip_seq, 10), rownames=FALSE, 
  caption = 'Metadata of ChIP-seq human samples obtained from SRAdb (first 10 entries)') %>%
              kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "300px")


## ----createSampleAndTargetDict, echo=TRUE,eval=TRUE, message=FALSE------------
# If a Conceptmapper dictionary is already available the dictType CMDICT can be specified and the corresponding file loaded
sample_dict <- CMdictionary(inputFileOrDb=system.file('extdata', 'cmDict-sample.cs.xml', package = 'Onassis'), dictType = 'CMDICT')

#Creation of a dictionary from the file sample.cs.obo available in OnassisJavaLibs
obo <- system.file('extdata', 'sample.cs.obo', package='OnassisJavaLibs')

sample_dict <- CMdictionary(inputFileOrDb=obo, outputDir=getwd(), synonymType='ALL')

# Creation of a dictionary for human genes/proteins. This requires org.Hs.eg.db to be installed
require(org.Hs.eg.db)
targets <- CMdictionary(dictType='TARGET', inputFileOrDb = 'org.Hs.eg.db', synonymType='EXACT')

## ----settingOptions, echo=TRUE,eval=TRUE--------------------------------------

#Creating a CMoptions object and showing hte default parameters 
opts <- CMoptions()  
show(opts)

## ----listCombinations, echo=TRUE, eval=TRUE-----------------------------------
combinations <- listCMOptions()

## ----setsynonymtype, echo=TRUE, eval=TRUE-------------------------------------
myopts <- CMoptions(SynonymType='EXACT_ONLY')
myopts

## ----changeparameter, echo=TRUE, eval=TRUE------------------------------------
#Changing the SearchStrategy parameter
SearchStrategy(myopts) <- 'SKIP_ANY_MATCH_ALLOW_OVERLAP'
myopts

## ----EntityFinder, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds',  package='Onassis'))
chipseq_dict_annot <- EntityFinder(sra_chip_seq[1:50, c('experiment_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], dictionary=sample_dict, options=myopts)

## ----showchipresults, echo=FALSE, eval=TRUE, message=FALSE--------------------

knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE, 
  caption = 'Annotating the metadata of DNA methylation sequencing experiments with a dictionary including CL (Cell line) and UBERON ontologies') %>% kable_styling() %>%
  scroll_box(width = "80%", height = "400px")

## ----filtering_out_terms, echo=TRUE, eval=TRUE, message=FALSE-----------------
chipseq_dict_annot <- filterTerms(chipseq_dict_annot, c('cell', 'tissue'))

## ----showchipresults_filtered, echo=FALSE, eval=TRUE, message=FALSE-----------

knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE, 
  caption = 'Filtered Annotations') %>% kable_styling() %>%
  scroll_box(width = "80%", height = "400px")

## ----annotateGenes, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
#Finding the TARGET entities
target_entities <- EntityFinder(input=sra_chip_seq[1:50, c('experiment_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], options = myopts, dictionary=targets)

## ----printKable, echo=FALSE, eval=TRUE----------------------------------------
knitr::kable(head(target_entities, 20), 
  caption = 'Annotations of ChIP-seq test metadata obtained from SRAdb and stored into files with the TARGETs (genes and histone variants)') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----similarity, echo=TRUE, eval=TRUE, message=FALSE--------------------------
#Instantiating the Similarity
similarities <- listSimilarities()

## ----computing measures, echo=TRUE, eval=TRUE, message=FALSE------------------

#Retrieving URLS of concepts
found_terms <- as.character(unique(chipseq_dict_annot$term_url))

# Creating a dataframe with all possible couples of terms and adding a column to store the similarity
pairwise_results <- t(combn(found_terms, 2))
pairwise_results <- cbind(pairwise_results, rep(0, nrow(pairwise_results)))

# Similarity computation for each couple of terms
for(i in 1:nrow(pairwise_results)){
  pairwise_results[i, 3] <- Similarity(obo, pairwise_results[i,1], pairwise_results[i, 2])
}
colnames(pairwise_results) <- c('term1', 'term2', 'value')  

# Adding the term names from the annotation table to the comparison results 
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term2', by.y='term_url')
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term2_name'
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term1', by.y='term_url')
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term1_name'
pairwise_results <- unique(pairwise_results)
# Reordering the columns
pairwise_results <- pairwise_results[, c('term1', 'term1_name', 'term2', 'term2_name', "value")]

## ----showSim, echo=FALSE, eval=TRUE-------------------------------------------
knitr::kable(pairwise_results,  
  caption = 'Pairwise similarities of cell type terms annotating the ChIP-seq metadata') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----groupwise_measures, echo=TRUE, eval=TRUE, message=FALSE------------------

oboprefix <- 'http://purl.obolibrary.org/obo/'
Similarity(obo, paste0(oboprefix, c('CL_0000055', 'CL_0000066')) , paste0(oboprefix, c('CL_0000542', 'CL_0000236')))

## ----groupwise_measures_2, echo=TRUE, eval=TRUE, message=FALSE----------------
Similarity(obo, paste0(oboprefix, c('CL_0000055', 'CL_0000236' ,'CL_0000236')), paste0(oboprefix, c('CL_0000542', 'CL_0000066')))

## ----samples_similarity, echo=TRUE, eval=TRUE, message=FALSE, fig.height=14, fig.width=16----

# Extracting all the couples of samples 
annotated_samples <- as.character(as.vector(unique(chipseq_dict_annot$sample_id)))

samples_couples <- t(combn(annotated_samples, 2))

# Computing the samples semantic similarity 
similarity_results <- apply(samples_couples, 1, function(couple_of_samples){
  Similarity(obo, couple_of_samples[1], couple_of_samples[2], chipseq_dict_annot)
})


#Creating a matrix to store the results of the similarity between samples
similarity_matrix <- matrix(0, nrow=length(annotated_samples), ncol=length(annotated_samples))
rownames(similarity_matrix) <- colnames(similarity_matrix) <- annotated_samples

# Filling the matrix with similarity values 
similarity_matrix[lower.tri(similarity_matrix, diag=FALSE)] <- similarity_results
similarity_matrix <- t(similarity_matrix)
similarity_matrix[lower.tri(similarity_matrix, diag=FALSE)] <- similarity_results 
# Setting the diagonal to 1
diag(similarity_matrix) <- 1

# Pasting the annotations to the sample identifiers
samples_legend <- aggregate(term_name ~ sample_id, chipseq_dict_annot, function(aggregation) paste(unique(aggregation), collapse=',' ))
rownames(similarity_matrix) <- paste0(rownames(similarity_matrix), ' (', samples_legend[match(rownames(similarity_matrix), samples_legend$sample_id), c('term_name')], ')')

# Showing a heatmap of the similarity between samples
heatmap.2(similarity_matrix, density.info = "none", trace="none", main='Samples\n semantic\n similarity', margins=c(12,50), cexRow = 2, cexCol = 2, keysize = 0.5)

## ----load_h3k27ac, echo=TRUE, eval=TRUE---------------------------------------
h3k27ac_chip <- readRDS(system.file('extdata', 'vignette_data', 'h3k27ac_metadata.rds',  package='Onassis'))

## ----onassis_class_usage, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
cell_annotations <- Onassis::annotate(h3k27ac_chip, 'OBO', obo, FindAllMatches='YES' )

## ----show_onassis_annotations, echo=TRUE, eval=TRUE---------------------------
cell_entities <- entities(cell_annotations) 

## ----cellentities, echo=FALSE, eval=TRUE--------------------------------------
knitr::kable(cell_entities[sample(nrow(cell_entities), 10),], 
  caption = ' Semantic sets of ontology concepts (entities) associated to each sample, stored in the entities slot of the Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----term_filtering, echo=TRUE, eval=TRUE-------------------------------------
filtered_cells <- filterconcepts(cell_annotations, c('cell', 'tissue'))

## ----showingfiltentities, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(entities(filtered_cells), 
  caption = 'Entities in filtered Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----similarity_of_samples, echo=TRUE, eval=TRUE------------------------------
filtered_cells <- sim(filtered_cells)

## ----collapsing_similarities, echo=TRUE, eval=TRUE, message=FALSE, results='hide', fig.width=6, fig.height=6----
collapsed_cells <- Onassis::collapse(filtered_cells, 0.9)

## ----collapsedcellstable, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(entities(collapsed_cells), 
  caption = ' Collapsed Entities in filtered Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----show_collapsed_entities, echo=TRUE, eval=TRUE, fig.height=11, fig.width=11----
heatmap.2(simil(collapsed_cells), density.info = "none", trace="none", margins=c(36, 36), cexRow = 1.5, cexCol = 1.5, keysize=0.5)

## ----creating_disease_annotations, echo=TRUE, eval=TRUE, message=FALSE, results='hide'----
obo2 <- system.file('extdata', 'sample.do.obo', package='OnassisJavaLibs')
disease_annotations <- Onassis::annotate(h3k27ac_chip, 'OBO',obo2, disease=TRUE )

## ----diseases, echo=FALSE, eval=TRUE------------------------------------------
knitr::kable(entities(disease_annotations), 
  caption = ' Disease entities') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----merging_onassis_entities, echo=TRUE, eval=TRUE, message=FALSE------------
cell_disease_onassis <- mergeonassis(collapsed_cells, disease_annotations)

## ----showingmergedentities, echo=FALSE, eval=TRUE-----------------------------
knitr::kable(entities(cell_disease_onassis), 
  caption = ' Cell and disease entities') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----loading_score_matrix, echo=TRUE, eval=TRUE-------------------------------
score_matrix <- readRDS(system.file('extdata', 'vignette_data', 'score_matrix.rds', package='Onassis'))

## ----compare_tissues_by_col, echo=TRUE, eval=TRUE, silent=TRUE, message=FALSE----

cell_comparisons_by_col <- compare(collapsed_cells, score_matrix=as.matrix(score_matrix), by='col', fun_name='wilcox.test')

matrix_of_p_values <- matrix(NA, nrow=nrow(cell_comparisons_by_col), ncol=ncol(cell_comparisons_by_col))
for(i in 1:nrow(cell_comparisons_by_col)){
  for(j in 1:nrow(cell_comparisons_by_col)){
   result_list <- cell_comparisons_by_col[i,j][[1]]
   matrix_of_p_values[i, j] <- result_list[2]
  }
}
colnames(matrix_of_p_values) <- rownames(matrix_of_p_values) <- colnames(cell_comparisons_by_col)

## ----show_p_values_of_t_test, echo=TRUE, eval=TRUE, fig.height=11, fig.width=11----
heatmap.2(-log10(matrix_of_p_values), density.info = "none", trace="none", main='Changes in\n H3K27ac signal \nin promoter regions', margins=c(36,36), cexRow = 1.5, cexCol = 1.5, keysize=1)

## ----compare_tissues, echo=TRUE, eval=TRUE, message=FALSE, silent=TRUE, waring=FALSE----
cell_comparisons <- compare(collapsed_cells, score_matrix=as.matrix(score_matrix), by='row', fun_name='wilcox.test', fun_args=list(exact=FALSE))

# Extraction of p-values less than 0.1
significant_features <- matrix(0, nrow=nrow(cell_comparisons), ncol=ncol(cell_comparisons))
colnames(significant_features) <- rownames(significant_features) <- rownames(cell_comparisons)
for(i in 1:nrow(cell_comparisons)){
  for(j in 1:nrow(cell_comparisons)){
   result_list <- cell_comparisons[i,j][[1]]
   significant_features[i, j] <- length(which(result_list[,2]<=0.1))
  }
}

## ----significantRegions, echo=FALSE, eval=TRUE--------------------------------
knitr::kable(significant_features, 
  caption = ' Number of promoter regions with p.value <=0.1') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----compare_diseases, echo=TRUE, eval=TRUE, message=FALSE, silent=TRUE-------

disease_comparisons <- compare(cell_disease_onassis, score_matrix=as.matrix(score_matrix), by='row', fun_name='wilcox.test', fun_args=list(exact=FALSE))


## ----show_disease_semantic_comparisons, echo=TRUE, eval=TRUE------------------
rownames(disease_comparisons$`breast [8], mammary gland epithelial cell [2], gland [1] (11)`)

## ----show_breast_cancer, echo=TRUE, eval=TRUE---------------------------------
selprom <- (disease_comparisons$`breast [8], mammary gland epithelial cell [2], gland [1] (11)`[2,1][[1]])
selprom <- selprom[is.finite(selprom[,2]),]
head(selprom)

## ----compute_table_of_significant_regions, echo=TRUE, eval=TRUE---------------
disease_matrix <- disease_comparisons[[1]]

# Extracting significant p-values
significant_features <- matrix(0, nrow=nrow(disease_matrix), ncol=ncol(disease_matrix))
colnames(significant_features) <- rownames(significant_features) <- rownames(disease_matrix)
for(i in 1:nrow(disease_matrix)){
  for(j in 1:nrow(disease_matrix)){
   result_list <- disease_matrix[i,j][[1]]
   significant_features[i, j] <- length(which(result_list[,2]<=0.1))
  }
}


## ----showingsignificantregions2, echo=FALSE, eval=TRUE------------------------
knitr::kable(significant_features, 
  caption = ' Number of promoter regions with p.value <= 0.1 ') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----score_comparison, echo=T, eval=T, message=F------------------------------

personal_t <- function(x, y){
    if(is.matrix(x))
      x <- apply(x, 1, mean)
    if(is.matrix(y))
      y <- apply(y, 1, mean)
    signal_to_noise_statistic <- abs(mean(x) - mean(y)) / (sd(x) + sd(y))
    return(list(statistic=signal_to_noise_statistic, p.value=NA))
}

disease_comparisons <- compare(cell_disease_onassis, score_matrix=as.matrix(score_matrix), by='col', fun_name='personal_t')

## ----correspondence, echo=T, eval=T, results='hide'---------------------------

onassis_annotations <- entities(filtered_cells)
# Sorting the annotations based on the order of the score matrix labels
onassis_annotations <- onassis_annotations[match(colnames(score_matrix), onassis_annotations$sample_id),]

# Creating a score matrix with scrambled annotations 
random_score_matrix <- score_matrix[,sample(1:ncol(score_matrix))]

# Running t-SNE on the score matrix and on the random matrix
set.seed(456)
tsne_out <- Rtsne(t(score_matrix), perplexity = 10, dims=3)$Y 
random_tsne_out = Rtsne(t(random_score_matrix), perplexity = 10, dims=3)$Y 
tsne_hclus <- hclust(dist(tsne_out[,c(1,2)]),  method='ward.D')
random_tsne_hclus <- hclust(dist(random_tsne_out[,c(1,2)]) , method='ward.D')


# Vetors to store the similarities of data driven and semantic driven clusters
real_jaccard_similarity_vector <- c()
random_jaccard_similarity_vector <- c()


sequence_of_similarities <- rev(seq(0.1, 1, 0.1))
sequence_of_similarities[1] <- 0.99

for(i in sequence_of_similarities){
  collapsed_cells <- Onassis::collapse(filtered_cells,  as.numeric(i)) 
  onassis_annotations <- entities(collapsed_cells)
  onassis_annotations <- onassis_annotations[match(colnames(score_matrix), onassis_annotations$sample_id),]
  
  # Clustering based on the tSNE results
  tsne_clus <- factor(cutree(tsne_hclus, length(unique(onassis_annotations$short_label))))

  # Similarity of the tSNE driven vs semantic set driven clusters
  real_jaccard_similarity_vector <- c(real_jaccard_similarity_vector, cluster_similarity(tsne_clus, as.numeric(factor(onassis_annotations$short_label)), similarity='jaccard'))  
  
  # Clustering of the random matrix
  random_tsne_clus <- factor(cutree(random_tsne_hclus, length(unique(onassis_annotations$short_label))))

  # Similarity of the tSNE driven vs semantic set driven clusters
  random_jaccard_similarity_vector <- c(random_jaccard_similarity_vector, cluster_similarity(random_tsne_clus, as.numeric(factor(onassis_annotations$short_label)), similarity='jaccard')) 
}
names(real_jaccard_similarity_vector) <- rev(seq(0.1, 1, 0.1))


## ----plot_jaccar, echo=T, eval=T----------------------------------------------
# Plotting the Jaccard similarities for real and random datasets 
myx <- rev(seq(0.1, 1, 0.1))
myylim <- max(random_jaccard_similarity_vector, real_jaccard_similarity_vector) + 0.05
mydata = cbind(myx, random_jaccard_similarity_vector, real_jaccard_similarity_vector)

plot1 <- ggplot(mydata, aes(x=myx)) + 
  geom_line(aes(y = random_jaccard_similarity_vector, color = "darkred")) + 
  geom_line(aes(y = real_jaccard_similarity_vector, color="steelblue" )) +
  geom_point(aes(y = random_jaccard_similarity_vector, color = "darkred")) + 
  geom_point(aes(y = real_jaccard_similarity_vector, color = "steelblue"))  +
  scale_x_continuous(breaks=seq(0,1, 0.1)) + 
  scale_color_manual(labels=c('Random dataset', 'Real dataset'), values=c('darkred', 'steelblue'), name='') + 
  theme_bw() +
  xlab('Semantic similarity threshold') + 
  ylab('Jaccard Similarity')

plot1

## ----plots_of_tsne, echo=T, eval=T--------------------------------------------

best_similarity <- names(real_jaccard_similarity_vector)[which(real_jaccard_similarity_vector==max(real_jaccard_similarity_vector))]

## ----plotofcoherence, echo=T, eval=T, message=FALSE, warning=FALSE------------

# Collapsing semantic sets at the best similarity value
collapsed_cells <- Onassis::collapse(filtered_cells, as.numeric(best_similarity))
onassis_annotations <- entities(collapsed_cells)

# Sorting the annotations based on the score matrix columns
onassis_annotations <- onassis_annotations[match(colnames(score_matrix), onassis_annotations$sample_id),]

tsne_clus <- cutree(hclust(dist(tsne_out[,c(1,2)]), method='ward.D'), length(unique(onassis_annotations$short_label)))

my_tsne_out <- data.frame(tsne_out)
colnames(my_tsne_out) <- c('tSNE1', 'tSNE2', 'tSNE3')
my_tsne_out$annotations <- onassis_annotations$short_label
my_tsne_out$cluster <- as.numeric(as.vector(tsne_clus))

my_random_tsne_clus <- factor(cutree(hclust(dist(random_tsne_out[,c(1,2)]), method='ward.D'), length(unique(onassis_annotations$short_label))))

my_colors <- c("#7FC97F", "#386CB0", "#F0027F", "#666666", "#FF7F00", "#6A3D9A", "#F4CAE4")

plot2 <- ggplot(my_tsne_out[, c(1, 2, 4, 5)], aes(x=tSNE1, y=tSNE2,colour= annotations)) +
  geom_point(size=5) + 
  theme_bw()  +
  scale_colour_manual(name = "Semantic set", values = my_colors) 

plot2 

## ----second_plot, echo=T, eval=T, warning=FALSE, message=FALSE----------------

my_random_tsne_out <- data.frame(random_tsne_out)
colnames(my_random_tsne_out) <- c('tSNE1', 'tSNE2', 'tSNE3')
my_random_tsne_out$annotations <- onassis_annotations$short_label
my_random_tsne_out$cluster <- as.numeric(as.vector(random_tsne_clus))

plot3 <- ggplot(my_random_tsne_out[, c(1, 2, 4, 5)], aes(x=tSNE1, y=tSNE2,colour= annotations)) +
  geom_point(size=5) +
  theme_bw()  +
  scale_colour_manual(name = "Semantic set", values = my_colors) 

plot3 


## ----plot_correspondence_tsne_hclus, echo=T, eva=T, warning=FALSE-------------
plot4 <- ggplot(my_tsne_out[, c(1, 2, 4, 5)], aes(x=tSNE1, y=tSNE2,colour= annotations)) +
  stat_ellipse(type = "t", level=0.9, data=my_tsne_out[, c(1, 2, 4,5)], aes(x=tSNE1, y=tSNE2, colour=factor(cluster))) +
  geom_point(size=5) +
  theme_bw()  +
  scale_colour_manual(name = "Semantic set",values = c(rep('black', 7), my_colors)) +
   
  theme(legend.position = "none")
plot4

## ----performances, echo=FALSE, eval=TRUE--------------------------------------
performances_table <- readRDS(system.file('extdata', 'vignette_data', 'performances.rds', package='Onassis'))

## ----showingperformances, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(performances_table, 
  caption = ' Onassis performances ') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----sessionInfo(), echo=FALSE, eval=TRUE-------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Onassis documentation built on Nov. 8, 2020, 8:18 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
Onassis
OnASSIs Ontology Annotation and Semantic SImilarity software

inst/doc/Onassis.R
In Onassis: OnASSIs Ontology Annotation and Semantic SImilarity software

Try the Onassis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Onassis OnASSIs Ontology Annotation and Semantic SImilarity software

inst/doc/Onassis.R In Onassis: OnASSIs Ontology Annotation and Semantic SImilarity software

Try the Onassis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

Onassis
OnASSIs Ontology Annotation and Semantic SImilarity software

inst/doc/Onassis.R
In Onassis: OnASSIs Ontology Annotation and Semantic SImilarity software