# Set a CRAN mirror options(repos = c(CRAN = "https://cran.rstudio.com")) install.packages("xfun")
knitr::opts_chunk$set(results = 'hide', fig.show = 'hide')
if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install(c("devtools", "tidyverse", "kableExtra")) BiocManager::install(c("waldronlab/bugSigSimple", "waldronlab/BugSigDBStats", "waldronlab/bugsigdbr"))
suppressPackageStartupMessages({ library(bugSigSimple) library(BugSigDBStats) library(bugsigdbr) library(tidyverse) library(stringr) library(kableExtra) library(dplyr) library(magrittr) library(boot) })
use version="devel" and cache = FALSE to take the latest version from bugsigdb.org
dat <- bugsigdbr::importBugSigDB(version = "devel", cache = FALSE) dim(dat)
Examine the data pulled in to view variable names
names(dat)
Subset to include only PMID numbers of interest
included.pmid <- c( 31463790, 32931939, 37586456, 37762390, 34900990, 37803284, 35350577, 37026303, 32694705, 38111925 ) subset.dat <- dat[dat$PMID %in% included.pmid,]
dim(subset.dat)
Confirm all studies included
subset.dat
Examine group 0 names
unique(subset.dat$`Group 0 name`)
Examine group 1 names
unique(subset.dat$`Group 1 name`)
To omit the double comparison in Study 856 by removing the ANOSIM analysis
subset.dat <- subset.dat %>% filter(`Statistical test` != "ANOSIM") subset.dat
Final subset to include only those Group 0 names of interest (controls with no DED) and only those Group 1 names of interest (some variation on DED)
subset.final <- subset.dat[which(subset.dat$`Group 0 name` %in% c("Healthy Control", "Control", "control", "healthy controls", "Normal Control (NC)","Healthy control", "Healthy Controls","Normal healthy (NDM) children", "Healthy controls")& subset.dat$`Group 1 name`%in% c("Meibomian Gland Dysfunction + Lacrimal Dysfunction", "Meibomian Gland Dysfunction", "Dry Eye","ADDE", "DED patients", "MGD", "Meibomian Gland Dysfunction (MGD)", "Meibomian Gland Dysfunction (MGD) DED", "Meibomian Gland Dysfunction (MGD) Groups", "Mixed DED", "Sjogrens Syndrome Dry Eye (SSDE)", "Non Sjogrens Syndrome Dry Eye (NSSDE)", "Dry Eye Disease patients", "Sjogren's patients with Dry Eye Disease", "Dry Eye Disease patients without Sjogrens", "Diabetic children with Dry Eye Disease (DM-DE)", "Mild Dry Eye", "Mild and Moderate to Severe Dry Eye")),] subset.final
dim(subset.final)
These are the studies included in the review
bugSigSimple::createStudyTable(subset.final)|> kableExtra::kbl()
This table summarizes the results for the identified taxa.
# Install and load necessary packages install.packages("openxlsx") library(openxlsx) library(knitr) # Create and format the table taxon_table <- bugSigSimple::createTaxonTable(subset.final, n = 2000) styled_table <- kable_styling(kbl(taxon_table)) # Create a new workbook wb <- createWorkbook() # Add a worksheet addWorksheet(wb, "Taxon Table") # Write the data to the worksheet writeData(wb, "Taxon Table", taxon_table) # Save the workbook saveWorkbook(wb, "TaxonTable.xlsx", overwrite = TRUE)
install.packages("writexl")
install.packages("ontologyIndex")
efo<-getOntology("efo") efo
Top 1000 taxa decreased in relation to the control
mostfreqdec<-bugSigSimple::getMostFrequentTaxa(subset.final, n=1000,sig.type = "decreased")
mostfreqdec
Top 1000 taxa increased in relation to the control
mostfreqinc<-bugSigSimple::getMostFrequentTaxa(subset.final,n=1000, sig.type = "increased") mostfreqinc
To obtain signatures
allsigs <- bugsigdbr::getSignatures(subset.final, tax.id.type = "taxname") allsigs <- allsigs[sapply(allsigs, length) > 0] #require length > 0 dim(allsigs)
To obtain signature lengths displayed in a bar graph and import to a word document
# Install and load necessary packages library(ggplot2) # Create the bar graph # Calculate the lengths of signatures siglengths <- sapply(allsigs, length) # Create a dataframe siglengths.df <- data.frame(siglengths = siglengths) # Plot the bar graph with Y-axis label plot <- ggplot(siglengths.df, aes(x = siglengths)) + geom_bar() + labs(y = "Number of signatures", x = "Signature Lengths") # Display the plot print(plot) # Save the plot to a file ggsave("bar_graph.png", plot)
Table of signature lengths
table(siglengths)
Calculate pairwise overlap and obtain Jaccard value to determine similarity of signatures
mydists <- BugSigDBStats::calcPairwiseOverlaps(allsigs) dim(mydists)
library(grid) library(ComplexHeatmap) jmat <- BugSigDBStats::calcJaccardSimilarity(allsigs) # Truncate row names at the first underscore rownames(jmat) <- sub("_.+$", "", rownames(jmat)) # Truncate column names at the first underscore colnames(jmat) <- sub("_.+$", "", colnames(jmat)) # Check the truncated names cat("Truncated row names: ", rownames(jmat), "\n") cat("Truncated column names: ", colnames(jmat), "\n") # Ensure row_labels and column_labels lengths match the matrix dimensions row_labels <- rownames(jmat) col_labels <- colnames(jmat) # Verify that lengths match cat("Length of row_labels: ", length(row_labels), "\n") cat("Length of col_labels: ", length(col_labels), "\n") # Create custom annotations with line breaks ha <- HeatmapAnnotation( `Signature\nLength` = anno_barplot(siglengths, gp = gpar(fill = 4:13), annotation_name_gp = gpar(fontsize = 10, lineheight = 0.8)) ) hr <- rowAnnotation( `Signature\nLength` = anno_barplot(siglengths, gp = gpar(fill = 4:13), annotation_name_gp = gpar(fontsize = 10, lineheight = 0.8)) ) # Adjust heatmap dimensions and settings hm <- Heatmap( jmat, top_annotation = ha, left_annotation = hr, row_names_max_width = unit(10, "cm"), # Adjusted width column_names_max_height = unit(3, "cm"), # Adjusted height for column names row_labels = row_labels, column_labels = col_labels, column_title = "Fig 3 Heatmap of Similarity of Signatures Between Studies", # Title of the heatmap column_title_gp = gpar(fontsize = 15), # Adjust font size for title heatmap_legend_param = list( title = "Legend", direction = "horizontal" # Set legend direction to horizontal ) # Customizing legend ) # Draw the heatmap draw(hm, heatmap_legend_side = "bottom", annotation_legend_side = "right") # Save the heatmap as an image png("heatmap.png", width = 12, height = 8, units = "in", res = 300) draw(hm, heatmap_legend_side = "bottom", annotation_legend_side = "right") dev.off()
library(InteractiveComplexHeatmap) hm <- draw(hm) htShiny(hm)
library(ComplexHeatmap) library(grid) # Assuming jmat and other variables are already defined as before # Perform hierarchical clustering hc <- hclust(dist(jmat)) plot(hc) # Save the cluster map as an image png("cluster_map.png", width = 12, height = 8, units = "in", res = 300) plot(hc, main = "Hierarchical Cluster Map of Signatures", xlab = "", sub = "", cex = 0.6) dev.off()
clusts <- sort(cutree(hc, h = 0.05)) lapply(unique(clusts), function(i) names(clusts)[clusts == i])
#all taxon increased and decreased freq allfreqs <- bugSigSimple::createTaxonTable(subset.final, n = 500) %>% #could change number arrange(I(decreased_signatures - increased_signatures)) incfreqs <- filter(allfreqs, I(increased_signatures - decreased_signatures) > 0) decfreqs <- filter(allfreqs, I(increased_signatures - decreased_signatures) < 0) kableExtra::kbl(allfreqs) %>% kable_paper("hover", full_width = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.