inst/doc/Onassis.R

## ----imports, echo=FALSE,eval=TRUE, message=FALSE, warning=FALSE--------------
library(Onassis)
library(DT)
library(gplots)
library(org.Hs.eg.db)
library(kableExtra)
library(Rtsne)
library(dendextend)
library(clusteval)
library(ggplot2)
library(ggfortify)


## ----installing_onassis, echo=TRUE, eval=FALSE--------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("Onassis")

## ----load_onasssis, echo=TRUE, eval=TRUE--------------------------------------
library(Onassis)

## ----connectTodb, echo=TRUE,eval=FALSE----------------------------------------
#  
#  require('GEOmetadb')
#  
#  ## Running this function might take some time if the database (6.8GB) has to be downloaded.
#  geo_con <- connectToGEODB(download=TRUE)
#  
#  #Showing the experiment types available in GEO
#  experiments <- experiment_types(geo_con)
#  
#  #Showing the organism types available in GEO
#  species <- organism_types(geo_con)
#  
#  #Retrieving Human gene expression metadata, knowing the GEO platform identifier, e.g. the Affymetrix Human Genome U133 Plus 2.0 Array
#  expression <- getGEOMetadata(geo_con, experiment_type='Expression profiling by array', gpl='GPL570')
#  
#  #Retrieving the metadata associated to experiment type "Methylation profiling by high througput sequencing"
#  meth_metadata <- getGEOMetadata(geo_con, experiment_type='Methylation profiling by high throughput sequencing', organism = 'Homo sapiens')

## ----experimentTypesshow, echo=FALSE, eval=TRUE-------------------------------
experiments <- readRDS(system.file('extdata', 'vignette_data', 'experiment_types.rds', package='Onassis'))

knitr::kable(as.data.frame(experiments[1:10]), col.names = c('experiments')) %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "300px", height = "200px")

## ----speciesShow, echo=FALSE,eval=TRUE----------------------------------------
species <- readRDS(system.file('extdata', 'vignette_data', 'organisms.rds', package='Onassis'))
knitr::kable(as.data.frame(species[1:10]), col.names=c('species')) %>%
              kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "300px", height = "200px")

## ----loadgeoMetadata, echo=TRUE, eval=TRUE------------------------------------
meth_metadata <- readRDS(system.file('extdata', 'vignette_data', 'GEOmethylation.rds', package='Onassis'))

## ----printmeta, echo=FALSE,eval=TRUE------------------------------------------

methylation_tmp <- meth_metadata
methylation_tmp$experiment_summary <- sapply(methylation_tmp$experiment_summary, function(x) substr(x, 1, 50))
knitr::kable(methylation_tmp[1:10,], 
              caption = 'GEOmetadb metadata for Methylation profiling by high throughput sequencing (only the first 10 entries are shown).') %>%
              kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "300px")


## ----connectSRA, echo=TRUE,eval=FALSE-----------------------------------------
#  # Optional download of SRAdb and connection to the corresponding sqlite file
#  require(SRAdb)
#  sqliteFileName <- '/pathto/SRAdb.sqlite'
#  sra_con <- dbConnect(SQLite(), sqliteFileName)
#  
#  # Query for the ChIP-Seq experiments contained in GEO for human samples
#  library_strategy <- 'ChIP-Seq' #ChIP-Seq data
#  library_source='GENOMIC'
#  taxon_id=9606 #Human samples
#  center_name='GEO' #Data from GEO
#  
#  # Query to the sample table
#  samples_query <- paste0("select sample_accession, description, sample_attribute, sample_url_link from sample where taxon_id='", taxon_id, "' and sample_accession IS NOT NULL", " and center_name='", center_name, "'"  )
#  
#  samples_df <- dbGetQuery(sra_con, samples_query)
#  samples <- unique(as.character(as.vector(samples_df[, 1])))
#  
#  experiment_query <- paste0("select experiment_accession, center_name, title, sample_accession, sample_name, experiment_alias, library_strategy, library_layout, experiment_url_link, experiment_attribute from experiment where library_strategy='", library_strategy, "'" , " and library_source ='", library_source,"' ", " and center_name='", center_name, "'" )
#  experiment_df <- dbGetQuery(sra_con, experiment_query)
#  
#  #Merging the columns from the sample and the experiment table
#  experiment_df <- merge(experiment_df, samples_df, by = "sample_accession")
#  
#  # Replacing the '_' character with white spaces
#  experiment_df$sample_name <- sapply(experiment_df$sample_name, function(value) {gsub("_", " ", value)})
#  experiment_df$experiment_alias <- sapply(experiment_df$experiment_alias, function(value) {gsub("_", " ", value)})
#  
#  sra_chip_seq <- experiment_df

## ----readCHIP, echo=TRUE, eval=TRUE-------------------------------------------
sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds',  package='Onassis'))

## ----printchromatinIP, echo=FALSE,eval=TRUE-----------------------------------

knitr::kable(head(sra_chip_seq, 10), rownames=FALSE, 
  caption = 'Metadata of ChIP-seq human samples obtained from SRAdb (first 10 entries)') %>%
              kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "300px")


## ----createSampleAndTargetDict, echo=TRUE,eval=TRUE, message=FALSE------------
# If a Conceptmapper dictionary is already available the dictType CMDICT can be specified and the corresponding file loaded
sample_dict <- CMdictionary(inputFileOrDb=system.file('extdata', 'cmDict-sample.cs.xml', package = 'Onassis'), dictType = 'CMDICT')

#Creation of a dictionary from the file sample.cs.obo available in OnassisJavaLibs
obo <- system.file('extdata', 'sample.cs.obo', package='OnassisJavaLibs')

sample_dict <- CMdictionary(inputFileOrDb=obo, outputDir=getwd(), synonymType='ALL')

# Creation of a dictionary for human genes/proteins. This requires org.Hs.eg.db to be installed
require(org.Hs.eg.db)
targets <- CMdictionary(dictType='TARGET', inputFileOrDb = 'org.Hs.eg.db', synonymType='EXACT')

## ----settingOptions, echo=TRUE,eval=TRUE--------------------------------------

#Creating a CMoptions object and showing hte default parameters 
opts <- CMoptions()  
show(opts)

## ----listCombinations, echo=TRUE, eval=TRUE-----------------------------------
combinations <- listCMOptions()

## ----setsynonymtype, echo=TRUE, eval=TRUE-------------------------------------
myopts <- CMoptions(SynonymType='EXACT_ONLY')
myopts

## ----changeparameter, echo=TRUE, eval=TRUE------------------------------------
#Changing the SearchStrategy parameter
SearchStrategy(myopts) <- 'SKIP_ANY_MATCH_ALLOW_OVERLAP'
myopts

## ----EntityFinder, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds',  package='Onassis'))
chipseq_dict_annot <- EntityFinder(sra_chip_seq[1:50, c('experiment_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], dictionary=sample_dict, options=myopts)

## ----showchipresults, echo=FALSE, eval=TRUE, message=FALSE--------------------

knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE, 
  caption = 'Annotating the metadata of DNA methylation sequencing experiments with a dictionary including CL (Cell line) and UBERON ontologies') %>% kable_styling() %>%
  scroll_box(width = "80%", height = "400px")

## ----filtering_out_terms, echo=TRUE, eval=TRUE, message=FALSE-----------------
chipseq_dict_annot <- filterTerms(chipseq_dict_annot, c('cell', 'tissue'))

## ----showchipresults_filtered, echo=FALSE, eval=TRUE, message=FALSE-----------

knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE, 
  caption = 'Filtered Annotations') %>% kable_styling() %>%
  scroll_box(width = "80%", height = "400px")

## ----annotateGenes, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
#Finding the TARGET entities
target_entities <- EntityFinder(input=sra_chip_seq[1:50, c('experiment_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], options = myopts, dictionary=targets)

## ----printKable, echo=FALSE, eval=TRUE----------------------------------------
knitr::kable(head(target_entities, 20), 
  caption = 'Annotations of ChIP-seq test metadata obtained from SRAdb and stored into files with the TARGETs (genes and histone variants)') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----similarity, echo=TRUE, eval=TRUE, message=FALSE--------------------------
#Instantiating the Similarity
similarities <- listSimilarities()

## ----computing measures, echo=TRUE, eval=TRUE, message=FALSE------------------

#Retrieving URLS of concepts
found_terms <- as.character(unique(chipseq_dict_annot$term_url))

# Creating a dataframe with all possible couples of terms and adding a column to store the similarity
pairwise_results <- t(combn(found_terms, 2))
pairwise_results <- cbind(pairwise_results, rep(0, nrow(pairwise_results)))

# Similarity computation for each couple of terms
for(i in 1:nrow(pairwise_results)){
  pairwise_results[i, 3] <- Similarity(obo, pairwise_results[i,1], pairwise_results[i, 2])
}
colnames(pairwise_results) <- c('term1', 'term2', 'value')  

# Adding the term names from the annotation table to the comparison results 
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term2', by.y='term_url')
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term2_name'
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term1', by.y='term_url')
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term1_name'
pairwise_results <- unique(pairwise_results)
# Reordering the columns
pairwise_results <- pairwise_results[, c('term1', 'term1_name', 'term2', 'term2_name', "value")]

## ----showSim, echo=FALSE, eval=TRUE-------------------------------------------
knitr::kable(pairwise_results,  
  caption = 'Pairwise similarities of cell type terms annotating the ChIP-seq metadata') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----groupwise_measures, echo=TRUE, eval=TRUE, message=FALSE------------------

oboprefix <- 'http://purl.obolibrary.org/obo/'
Similarity(obo, paste0(oboprefix, c('CL_0000055', 'CL_0000066')) , paste0(oboprefix, c('CL_0000542', 'CL_0000236')))

## ----groupwise_measures_2, echo=TRUE, eval=TRUE, message=FALSE----------------
Similarity(obo, paste0(oboprefix, c('CL_0000055', 'CL_0000236' ,'CL_0000236')), paste0(oboprefix, c('CL_0000542', 'CL_0000066')))

## ----samples_similarity, echo=TRUE, eval=TRUE, message=FALSE, fig.height=14, fig.width=16----

# Extracting all the couples of samples 
annotated_samples <- as.character(as.vector(unique(chipseq_dict_annot$sample_id)))

samples_couples <- t(combn(annotated_samples, 2))

# Computing the samples semantic similarity 
similarity_results <- apply(samples_couples, 1, function(couple_of_samples){
  Similarity(obo, couple_of_samples[1], couple_of_samples[2], chipseq_dict_annot)
})


#Creating a matrix to store the results of the similarity between samples
similarity_matrix <- matrix(0, nrow=length(annotated_samples), ncol=length(annotated_samples))
rownames(similarity_matrix) <- colnames(similarity_matrix) <- annotated_samples

# Filling the matrix with similarity values 
similarity_matrix[lower.tri(similarity_matrix, diag=FALSE)] <- similarity_results
similarity_matrix <- t(similarity_matrix)
similarity_matrix[lower.tri(similarity_matrix, diag=FALSE)] <- similarity_results 
# Setting the diagonal to 1
diag(similarity_matrix) <- 1

# Pasting the annotations to the sample identifiers
samples_legend <- aggregate(term_name ~ sample_id, chipseq_dict_annot, function(aggregation) paste(unique(aggregation), collapse=',' ))
rownames(similarity_matrix) <- paste0(rownames(similarity_matrix), ' (', samples_legend[match(rownames(similarity_matrix), samples_legend$sample_id), c('term_name')], ')')

# Showing a heatmap of the similarity between samples
heatmap.2(similarity_matrix, density.info = "none", trace="none", main='Samples\n semantic\n similarity', margins=c(12,50), cexRow = 2, cexCol = 2, keysize = 0.5)

## ----load_h3k27ac, echo=TRUE, eval=TRUE---------------------------------------
h3k27ac_chip <- readRDS(system.file('extdata', 'vignette_data', 'h3k27ac_metadata.rds',  package='Onassis'))

## ----onassis_class_usage, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
cell_annotations <- Onassis::annotate(h3k27ac_chip, 'OBO', obo, FindAllMatches='YES' )

## ----show_onassis_annotations, echo=TRUE, eval=TRUE---------------------------
cell_entities <- entities(cell_annotations) 

## ----cellentities, echo=FALSE, eval=TRUE--------------------------------------
knitr::kable(cell_entities[sample(nrow(cell_entities), 10),], 
  caption = ' Semantic sets of ontology concepts (entities) associated to each sample, stored in the entities slot of the Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----term_filtering, echo=TRUE, eval=TRUE-------------------------------------
filtered_cells <- filterconcepts(cell_annotations, c('cell', 'tissue'))

## ----showingfiltentities, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(entities(filtered_cells), 
  caption = 'Entities in filtered Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----similarity_of_samples, echo=TRUE, eval=TRUE------------------------------
filtered_cells <- sim(filtered_cells)

## ----collapsing_similarities, echo=TRUE, eval=TRUE, message=FALSE, results='hide', fig.width=6, fig.height=6----
collapsed_cells <- Onassis::collapse(filtered_cells, 0.9)

## ----collapsedcellstable, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(entities(collapsed_cells), 
  caption = ' Collapsed Entities in filtered Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----show_collapsed_entities, echo=TRUE, eval=TRUE, fig.height=11, fig.width=11----
heatmap.2(simil(collapsed_cells), density.info = "none", trace="none", margins=c(36, 36), cexRow = 1.5, cexCol = 1.5, keysize=0.5)

## ----creating_disease_annotations, echo=TRUE, eval=TRUE, message=FALSE, results='hide'----
obo2 <- system.file('extdata', 'sample.do.obo', package='OnassisJavaLibs')
disease_annotations <- Onassis::annotate(h3k27ac_chip, 'OBO',obo2, disease=TRUE )

## ----diseases, echo=FALSE, eval=TRUE------------------------------------------
knitr::kable(entities(disease_annotations), 
  caption = ' Disease entities') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----merging_onassis_entities, echo=TRUE, eval=TRUE, message=FALSE------------
cell_disease_onassis <- mergeonassis(collapsed_cells, disease_annotations)

## ----showingmergedentities, echo=FALSE, eval=TRUE-----------------------------
knitr::kable(entities(cell_disease_onassis), 
  caption = ' Cell and disease entities') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----loading_score_matrix, echo=TRUE, eval=TRUE-------------------------------
score_matrix <- readRDS(system.file('extdata', 'vignette_data', 'score_matrix.rds', package='Onassis'))

## ----compare_tissues_by_col, echo=TRUE, eval=TRUE, silent=TRUE, message=FALSE----

cell_comparisons_by_col <- compare(collapsed_cells, score_matrix=as.matrix(score_matrix), by='col', fun_name='wilcox.test')

matrix_of_p_values <- matrix(NA, nrow=nrow(cell_comparisons_by_col), ncol=ncol(cell_comparisons_by_col))
for(i in 1:nrow(cell_comparisons_by_col)){
  for(j in 1:nrow(cell_comparisons_by_col)){
   result_list <- cell_comparisons_by_col[i,j][[1]]
   matrix_of_p_values[i, j] <- result_list[2]
  }
}
colnames(matrix_of_p_values) <- rownames(matrix_of_p_values) <- colnames(cell_comparisons_by_col)

## ----show_p_values_of_t_test, echo=TRUE, eval=TRUE, fig.height=11, fig.width=11----
heatmap.2(-log10(matrix_of_p_values), density.info = "none", trace="none", main='Changes in\n H3K27ac signal \nin promoter regions', margins=c(36,36), cexRow = 1.5, cexCol = 1.5, keysize=1)

## ----compare_tissues, echo=TRUE, eval=TRUE, message=FALSE, silent=TRUE, waring=FALSE----
cell_comparisons <- compare(collapsed_cells, score_matrix=as.matrix(score_matrix), by='row', fun_name='wilcox.test', fun_args=list(exact=FALSE))

# Extraction of p-values less than 0.1
significant_features <- matrix(0, nrow=nrow(cell_comparisons), ncol=ncol(cell_comparisons))
colnames(significant_features) <- rownames(significant_features) <- rownames(cell_comparisons)
for(i in 1:nrow(cell_comparisons)){
  for(j in 1:nrow(cell_comparisons)){
   result_list <- cell_comparisons[i,j][[1]]
   significant_features[i, j] <- length(which(result_list[,2]<=0.1))
  }
}

## ----significantRegions, echo=FALSE, eval=TRUE--------------------------------
knitr::kable(significant_features, 
  caption = ' Number of promoter regions with p.value <=0.1') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----compare_diseases, echo=TRUE, eval=TRUE, message=FALSE, silent=TRUE-------

disease_comparisons <- compare(cell_disease_onassis, score_matrix=as.matrix(score_matrix), by='row', fun_name='wilcox.test', fun_args=list(exact=FALSE))


## ----show_disease_semantic_comparisons, echo=TRUE, eval=TRUE------------------
rownames(disease_comparisons$`breast [8], mammary gland epithelial cell [2], gland [1] (11)`)

## ----show_breast_cancer, echo=TRUE, eval=TRUE---------------------------------
selprom <- (disease_comparisons$`breast [8], mammary gland epithelial cell [2], gland [1] (11)`[2,1][[1]])
selprom <- selprom[is.finite(selprom[,2]),]
head(selprom)

## ----compute_table_of_significant_regions, echo=TRUE, eval=TRUE---------------
disease_matrix <- disease_comparisons[[1]]

# Extracting significant p-values
significant_features <- matrix(0, nrow=nrow(disease_matrix), ncol=ncol(disease_matrix))
colnames(significant_features) <- rownames(significant_features) <- rownames(disease_matrix)
for(i in 1:nrow(disease_matrix)){
  for(j in 1:nrow(disease_matrix)){
   result_list <- disease_matrix[i,j][[1]]
   significant_features[i, j] <- length(which(result_list[,2]<=0.1))
  }
}


## ----showingsignificantregions2, echo=FALSE, eval=TRUE------------------------
knitr::kable(significant_features, 
  caption = ' Number of promoter regions with p.value <= 0.1 ') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----score_comparison, echo=T, eval=T, message=F------------------------------

personal_t <- function(x, y){
    if(is.matrix(x))
      x <- apply(x, 1, mean)
    if(is.matrix(y))
      y <- apply(y, 1, mean)
    signal_to_noise_statistic <- abs(mean(x) - mean(y)) / (sd(x) + sd(y))
    return(list(statistic=signal_to_noise_statistic, p.value=NA))
}

disease_comparisons <- compare(cell_disease_onassis, score_matrix=as.matrix(score_matrix), by='col', fun_name='personal_t')

## ----correspondence, echo=T, eval=T, results='hide'---------------------------

onassis_annotations <- entities(filtered_cells)
# Sorting the annotations based on the order of the score matrix labels
onassis_annotations <- onassis_annotations[match(colnames(score_matrix), onassis_annotations$sample_id),]

# Creating a score matrix with scrambled annotations 
random_score_matrix <- score_matrix[,sample(1:ncol(score_matrix))]

# Running t-SNE on the score matrix and on the random matrix
set.seed(456)
tsne_out <- Rtsne(t(score_matrix), perplexity = 10, dims=3)$Y 
random_tsne_out = Rtsne(t(random_score_matrix), perplexity = 10, dims=3)$Y 
tsne_hclus <- hclust(dist(tsne_out[,c(1,2)]),  method='ward.D')
random_tsne_hclus <- hclust(dist(random_tsne_out[,c(1,2)]) , method='ward.D')


# Vetors to store the similarities of data driven and semantic driven clusters
real_jaccard_similarity_vector <- c()
random_jaccard_similarity_vector <- c()


sequence_of_similarities <- rev(seq(0.1, 1, 0.1))
sequence_of_similarities[1] <- 0.99

for(i in sequence_of_similarities){
  collapsed_cells <- Onassis::collapse(filtered_cells,  as.numeric(i)) 
  onassis_annotations <- entities(collapsed_cells)
  onassis_annotations <- onassis_annotations[match(colnames(score_matrix), onassis_annotations$sample_id),]
  
  # Clustering based on the tSNE results
  tsne_clus <- factor(cutree(tsne_hclus, length(unique(onassis_annotations$short_label))))

  # Similarity of the tSNE driven vs semantic set driven clusters
  real_jaccard_similarity_vector <- c(real_jaccard_similarity_vector, cluster_similarity(tsne_clus, as.numeric(factor(onassis_annotations$short_label)), similarity='jaccard'))  
  
  # Clustering of the random matrix
  random_tsne_clus <- factor(cutree(random_tsne_hclus, length(unique(onassis_annotations$short_label))))

  # Similarity of the tSNE driven vs semantic set driven clusters
  random_jaccard_similarity_vector <- c(random_jaccard_similarity_vector, cluster_similarity(random_tsne_clus, as.numeric(factor(onassis_annotations$short_label)), similarity='jaccard')) 
}
names(real_jaccard_similarity_vector) <- rev(seq(0.1, 1, 0.1))


## ----plot_jaccar, echo=T, eval=T----------------------------------------------
# Plotting the Jaccard similarities for real and random datasets 
myx <- rev(seq(0.1, 1, 0.1))
myylim <- max(random_jaccard_similarity_vector, real_jaccard_similarity_vector) + 0.05
mydata = cbind(myx, random_jaccard_similarity_vector, real_jaccard_similarity_vector)

plot1 <- ggplot(mydata, aes(x=myx)) + 
  geom_line(aes(y = random_jaccard_similarity_vector, color = "darkred")) + 
  geom_line(aes(y = real_jaccard_similarity_vector, color="steelblue" )) +
  geom_point(aes(y = random_jaccard_similarity_vector, color = "darkred")) + 
  geom_point(aes(y = real_jaccard_similarity_vector, color = "steelblue"))  +
  scale_x_continuous(breaks=seq(0,1, 0.1)) + 
  scale_color_manual(labels=c('Random dataset', 'Real dataset'), values=c('darkred', 'steelblue'), name='') + 
  theme_bw() +
  xlab('Semantic similarity threshold') + 
  ylab('Jaccard Similarity')

plot1

## ----plots_of_tsne, echo=T, eval=T--------------------------------------------

best_similarity <- names(real_jaccard_similarity_vector)[which(real_jaccard_similarity_vector==max(real_jaccard_similarity_vector))]

## ----plotofcoherence, echo=T, eval=T, message=FALSE, warning=FALSE------------

# Collapsing semantic sets at the best similarity value
collapsed_cells <- Onassis::collapse(filtered_cells, as.numeric(best_similarity))
onassis_annotations <- entities(collapsed_cells)

# Sorting the annotations based on the score matrix columns
onassis_annotations <- onassis_annotations[match(colnames(score_matrix), onassis_annotations$sample_id),]

tsne_clus <- cutree(hclust(dist(tsne_out[,c(1,2)]), method='ward.D'), length(unique(onassis_annotations$short_label)))

my_tsne_out <- data.frame(tsne_out)
colnames(my_tsne_out) <- c('tSNE1', 'tSNE2', 'tSNE3')
my_tsne_out$annotations <- onassis_annotations$short_label
my_tsne_out$cluster <- as.numeric(as.vector(tsne_clus))

my_random_tsne_clus <- factor(cutree(hclust(dist(random_tsne_out[,c(1,2)]), method='ward.D'), length(unique(onassis_annotations$short_label))))

my_colors <- c("#7FC97F", "#386CB0", "#F0027F", "#666666", "#FF7F00", "#6A3D9A", "#F4CAE4")

plot2 <- ggplot(my_tsne_out[, c(1, 2, 4, 5)], aes(x=tSNE1, y=tSNE2,colour= annotations)) +
  geom_point(size=5) + 
  theme_bw()  +
  scale_colour_manual(name = "Semantic set", values = my_colors) 

plot2 

## ----second_plot, echo=T, eval=T, warning=FALSE, message=FALSE----------------

my_random_tsne_out <- data.frame(random_tsne_out)
colnames(my_random_tsne_out) <- c('tSNE1', 'tSNE2', 'tSNE3')
my_random_tsne_out$annotations <- onassis_annotations$short_label
my_random_tsne_out$cluster <- as.numeric(as.vector(random_tsne_clus))

plot3 <- ggplot(my_random_tsne_out[, c(1, 2, 4, 5)], aes(x=tSNE1, y=tSNE2,colour= annotations)) +
  geom_point(size=5) +
  theme_bw()  +
  scale_colour_manual(name = "Semantic set", values = my_colors) 

plot3 


## ----plot_correspondence_tsne_hclus, echo=T, eva=T, warning=FALSE-------------
plot4 <- ggplot(my_tsne_out[, c(1, 2, 4, 5)], aes(x=tSNE1, y=tSNE2,colour= annotations)) +
  stat_ellipse(type = "t", level=0.9, data=my_tsne_out[, c(1, 2, 4,5)], aes(x=tSNE1, y=tSNE2, colour=factor(cluster))) +
  geom_point(size=5) +
  theme_bw()  +
  scale_colour_manual(name = "Semantic set",values = c(rep('black', 7), my_colors)) +
   
  theme(legend.position = "none")
plot4

## ----performances, echo=FALSE, eval=TRUE--------------------------------------
performances_table <- readRDS(system.file('extdata', 'vignette_data', 'performances.rds', package='Onassis'))

## ----showingperformances, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(performances_table, 
  caption = ' Onassis performances ') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----sessionInfo(), echo=FALSE, eval=TRUE-------------------------------------
sessionInfo()

Try the Onassis package in your browser

Any scripts or data that you put into this service are public.

Onassis documentation built on Nov. 8, 2020, 8:18 p.m.