knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
LIONESS (Linear Interpolation to Obtain Network Estimates for Single Samples) is a method for creating sample-specific networks. When applied to a PANDA regulatory network, the result is a set of gene regulatory networks, one for each sample in the gene expression dataset. More information on LIONESS can be found in the published paper: https://doi.org/10.1016/j.isci.2019.03.021
In this vignette, we will compare LIONESS regulatory networks from 207 females and 238 males with colon cancer using RNA-Seq data from TCGA. We will compare the edge weights between females and males using a linear regression model and correcting for the covariates age, race, and disease stage, as available in the limma package. We will also compare the gene's in-degree (defined as the sum of the gene's incoming edge weights from all TFs in the network). Finally, we will perform gene set enrichment analysis to find the pathways enriched for genes differentially targeted by sex in colon cancer.
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager",repos = "http://cran.us.r-project.org") BiocManager::install("limma") BiocManager::install("fgsea") BiocManager::install("Biobase") # install.packages("ggplot2",repos = "http://cran.us.r-project.org") install.packages("igraph",repos = "http://cran.us.r-project.org")
library(limma) library(fgsea) library(ggplot2) library(Biobase) library(igraph)
For the purposes of demonstrating the workflow, we will load only a subet of LIONESS networks. Our subset shows the edge weights for 50,000 edges (rows) by 445 samples (columns). Let's take a look at the networks:
# download a subset of LIONESS networks from netZooR AWS Bucket to working directory. system("curl -O https://netzoo.s3.us-east-2.amazonaws.com/netZooR/tutorial_datasets/lioness_coloncancer_subset.txt") lioness <- read.delim("./lioness_coloncancer_subset.txt",stringsAsFactors = F, check.names = F) head(lioness[,1:5]) # Add row names as "TF_gene" and remove columns 1 and 2 with TF and gene name rownames(lioness) <- apply(lioness, 1, function(x){ paste(x[1], x[2], sep="_") }) # Remove TF and gene columns lioness <- lioness[,-(1:2)] head(lioness[,1:5]) # Load the complete gene in-degree (sum of all edge weights for each gene) and clinical data stored as an expression set system("curl -O https://netzoo.s3.us-east-2.amazonaws.com/netZooR/tutorial_datasets/inDegree_allEdges_coloncancer.rdata") load("./inDegree_allEdges_coloncancer.rdata") # Clinical information pData(obj1)[1:5,30:35]
Compare the edge weights between males and females using linear regression model (limma package) and adjusting for covariates: stage, age, race.
# Define the covariates gender <- factor(as.character(pData(obj1)$gender),levels=c("MALE","FEMALE")) stage <- (as.character(pData(obj1)$uicc_stage)) stage[which(is.na(stage))] <- "NA" stage <- as.factor(stage) race <- as.character(pData(obj1)$race) race[which(is.na(race))] <- "NA" race <- as.factor(race) age <- as.numeric(pData(obj1)$age_at_initial_pathologic_diagnosis) age[which(is.na(age))] <- mean(age,na.rm=TRUE) design = model.matrix(~ stage + race + age + gender) # Run limma fitGood = lmFit(as.matrix(lioness),design) fitGood = eBayes(fitGood) tb = topTable(fitGood,coef="genderFEMALE",number=Inf) head(tb)
We select the top 50 edges with differential edge weights by sex and convert them into an igraph graph.data.frame object for visualization. We color edges red if they have higher coefficients in the female group, and blue if they have higher coefficients in the male group.
toptable_edges <- t(matrix(unlist(c(strsplit(row.names(tb), "_"))),2)) z <- cbind(toptable_edges[1:50,], tb$logFC[1:50]) g <- graph.data.frame(z, directed=FALSE) E(g)$weight <- as.numeric(z[,3]) E(g)$color[E(g)$weight<0] <- "blue" E(g)$color[E(g)$weight>0] <- "red" E(g)$weight <- 1 par(mar=c(0,0,0,0)) plot(g, vertex.label.cex=0.7, vertex.size=10, vertex.label.font=3, edge.width=5*(abs(as.numeric(z[,3]))))
Compare the gene in-degree between males and females using linear regression model (limma package) and adjusting for covariates: stage, age, race.
indegree <- assayData(obj1)[["quantile"]] head(indegree[,1:3]) # Use the same design matrix as before fitGood = lmFit(indegree,design) fitGood = eBayes(fitGood) tb_degree = topTable(fitGood,coef="genderFEMALE",number=Inf) head(tb_degree) # Save gene ranks indegree_rank <- setNames(object=tb_degree[,"t"], rownames(tb_degree))
Well will use the fgsea package to perform gene set enrichment analysis. We need to point to a ranked gene list (for example the gene in-degree statistical difference (t value) between males and females), and a list of gene sets (or signatures) in gmt format to test for enrichment. The gene sets can be downloaded from MSigDB: http://software.broadinstitute.org/gsea/msigdb Same gene annotation should be used in the ranked gene list and gene sets.
system("curl -O https://netzoo.s3.us-east-2.amazonaws.com/netZooR/tutorial_datasets/c2.cp.kegg.v7.0.symbols.gmt") pathways <- gmtPathways("./c2.cp.kegg.v7.0.symbols.gmt") fgseaRes <- fgsea(pathways, indegree_rank, minSize=15, maxSize=500, nperm=1000) head(fgseaRes) # Subset to pathways with FDR < 0.05 sig <- fgseaRes[fgseaRes$padj < 0.05,] # Top 10 pathways enriched in females sig$pathway[sig$NES > 0][1:10] # Top 10 pathways enriched in males sig$pathway[sig$NES < 0][1:10]
Bubble plot of gene sets on y-axis and adjusted p-value (padj) on x-axis. Bubble size indicates the number of genes in each gene set, and bubble color indicates the normalized enrichment score (NES). Blue is for negative NES (enrichment of higher targeted genes in males), and red is for positive NES (enrichment of higher targeted genes in females).
dat <- data.frame(fgseaRes) # Settings fdrcut <- 0.05 # FDR cut-off to use as output for significant signatures dencol_neg <- "blue" # bubble plot color for negative ES dencol_pos <- "red" # bubble plot color for positive ES signnamelength <- 4 # set to remove prefix from signature names (2 for "GO", 4 for "KEGG", 8 for "REACTOME") asp <- 3 # aspect ratio of bubble plot charcut <- 100 # cut signature name in heatmap to this nr of characters # Make signature names more readable a <- as.character(dat$pathway) # 'a' is a great variable name to substitute row names with something more readable for (j in 1:length(a)){ a[j] <- substr(a[j], signnamelength+2, nchar(a[j])) } a <- tolower(a) # convert to lower case (you may want to comment this out, it really depends on what signatures you are looking at, c6 signatures contain gene names, and converting those to lower case may be confusing) for (j in 1:length(a)){ if(nchar(a[j])>charcut) { a[j] <- paste(substr(a[j], 1, charcut), "...", sep=" ")} } # cut signature names that have more characters than charcut, and add "..." a <- gsub("_", " ", a) dat$NAME <- a # Determine what signatures to plot (based on FDR cut) dat2 <- dat[dat[,"padj"]<fdrcut,] dat2 <- dat2[order(dat2[,"padj"]),] dat2$signature <- factor(dat2$NAME, rev(as.character(dat2$NAME))) # Determine what labels to color sign_neg <- which(dat2[,"NES"]<0) sign_pos <- which(dat2[,"NES"]>0) # Color labels signcol <- rep(NA, length(dat2$signature)) signcol[sign_neg] <- dencol_neg # text color of negative signatures signcol[sign_pos] <- dencol_pos # text color of positive signatures signcol <- rev(signcol) # need to revert vector of colors, because ggplot starts plotting these from below # Plot bubble plot g<-ggplot(dat2, aes(x=padj,y=signature,size=size)) g+geom_point(aes(fill=NES), shape=21, colour="white")+ theme_bw()+ # white background, needs to be placed before the "signcol" line xlim(0,fdrcut)+ scale_size_area(max_size=10,guide="none")+ scale_fill_gradient2(low=dencol_neg, high=dencol_pos)+ theme(axis.text.y = element_text(colour=signcol))+ theme(aspect.ratio=asp, axis.title.y=element_blank()) # test aspect.ratio
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.