knitr::opts_chunk$set(echo = TRUE)
This script identifies common and distinct ET-associated genes in human middle temporal gyrus as compared with human fronto-insula, and compares the proportions of cells across clusters for each brain region.
If needed, set the working directory first: e.g. setwd("C:/Users/jeremym/Desktop/VEN_TEST2")
.
suppressPackageStartupMessages({ library(VENcelltypes) library(feather) library(dplyr) library(gplots) library(matrixStats) }) options(stringsAsFactors=FALSE)
These files were created in the previous code file, which must be run prior to running this code block. First, let's read in the data from this manuscript (human FI). We do not need to subset the data for this analysis.
## Read in the data inputFolder <- "FI/" Expr.dat <- feather(paste(inputFolder,"data.feather",sep="")) annoFI <- read_feather(paste(inputFolder,"anno.feather",sep="")) exprData <- as.matrix(Expr.dat[,colnames(Expr.dat)[colnames(Expr.dat)!="sample_id"]]) rownames(exprData) = Expr.dat$sample_id datFI <- t(exprData) datFI <- log2(datFI+1) load(paste(inputFolder,"clusterInfo.rda",sep="")) infoFI <- clusterInfo
second, let's read in the comparable data from human MTG.
## Read in the data mtgFolder <- "MTG/" annoMTG <- read_feather(paste(mtgFolder,"anno.feather",sep="")) Expr.datp <- feather(paste(mtgFolder,"data.feather",sep="")) annoMTG <- annoMTG[match(Expr.datp$sample_id,annoMTG$sample_id),] datMTG <- as.matrix(Expr.datp[,names(Expr.datp)!="sample_id"]) rownames(datMTG) <- annoMTG$sample_id datMTG <- t(datMTG) datMTG <- log2(datMTG+1) data("clusterInfoMTG") infoMTG <- clusterInfoMTG
Finally, find the common genes between studies.
kpGenes <- intersect(rownames(datMTG),rownames(datFI)) datMTG <- datMTG[kpGenes,] datFI <- datFI[kpGenes,]
We have already shown in the previous code that a handful of genes are enriched in ET cells relative to other subclasses in multiple regions in mouse and human. Now we want to extend this result to see if we can find an expanded list when we only consider human, and also to identify genes which may be specific to FI (and therefore potentially associated with diversified morphologies).
As a first step we need to calculate some statistics on these clusters, including means and proportions.
## FI statistics exprThresh <- 1 clFI <- annoFI$cluster_label names(clFI) <- colnames(datFI) propFI <- do.call("cbind", tapply(names(clFI), clFI, function(x) rowMeans(datFI[,x]>exprThresh))) meanFI <- do.call("cbind", tapply(names(clFI), clFI, function(x) rowMeans(datFI[,x]))) rownames(propFI) <- rownames(meanFI) <- rownames(datFI) propFI <- propFI[,infoFI$cluster_label] meanFI <- meanFI[,infoFI$cluster_label] ## MTG statistics clMTG <- annoMTG$cluster_label names(clMTG)<- colnames(datMTG) propMTG <- do.call("cbind", tapply(names(clMTG), clMTG, function(x) rowMeans(datMTG[,x]>exprThresh))) meanMTG <- do.call("cbind", tapply(names(clMTG), clMTG, function(x) rowMeans(datMTG[,x]))) rownames(propMTG) <- rownames(meanMTG) <- rownames(datMTG) propMTG <- propMTG[,infoMTG$cluster_label] meanMTG <- meanMTG[,infoMTG$cluster_label]
Next, find the top marker genes for the ET cluster in each data set (Exc FEZF2 GABRQ in FI and Exc L4-5 FEZF2 SCN4B in MTG). Note that we are using slightly different statistical criteria and comparison types as we did in the cross-species analysis, and therefore the resulting gene lists do not perfectly align.
# FI data exc_cl <- substr(colnames(meanFI),1,3)=="Exc" pti <- which(colnames(meanFI[,exc_cl])=="Exc FEZF2 GABRQ") wme <- apply(meanFI[,exc_cl],1,which.max) fce <- apply(meanFI[,exc_cl],1,function(x) diff(range(-sort(-x)[1:2]))) VEe <- apply(meanFI[,exc_cl],1,function(x) x[pti]-mean(x[c(1:(pti-1),(pti+1):length(x))])) p2e <- apply(propFI[,exc_cl],1,function(x) -sort(-x)[2]) p1e <- apply(propFI[,exc_cl],1,max) pce <- p1e-p2e topVENgeneFI <- names(fce)[(fce>1)&(wme==pti)&(pce>0.25)&(p1e>0.5)&(p2e<0.5)] topVENgeneFI <- topVENgeneFI[order(-fce[(topVENgeneFI)])] # MTG data exc_clb <- substr(colnames(meanMTG),1,3)=="Exc" ptib <- which(colnames(meanMTG[,exc_clb])=="Exc L4-5 FEZF2 SCN4B") wmeb <- apply(meanMTG[,exc_clb],1,which.max) fce <- apply(meanMTG[,exc_clb],1,function(x) diff(range(-sort(-x)[1:2]))) VEe <- apply(meanMTG[,exc_clb],1,function(x) x[ptib]-mean(x[c(1:(ptib-1),(ptib+1):length(x))])) p2e <- apply(propMTG[,exc_clb],1,function(x) -sort(-x)[2]) p1e <- apply(propMTG[,exc_clb],1,max) pme <- apply(propMTG[,exc_clb],1,mean) pce <- p1e-p2e topVENgeneMTG <- names(fce)[(fce>1)&(wmeb==ptib)&(pce>0.25)&(p1e>0.5)&(p2e<0.5)] topVENgeneMTG <- topVENgeneMTG[order(-fce[(topVENgeneMTG)])]
How many FI and MTG genes do we find? Note that we are biasing our plot toward showing all FI genes, but only the BEST MTG genes.
print(paste(length(topVENgeneFI),"FI genes in the ET cluster.")) print(paste(length(topVENgeneMTG),"MTG genes in the ET cluster.")) print(paste(length(intersect(topVENgeneFI,topVENgeneMTG)),"genes in the ET cluster common to both regions."))
As with the subclass analysis in the previous code document, we find more ET markers in MTG than in FI, although the specific numbers differe because here we are looking at finer cluster resolution than in the previous code document.
Finally, let's order and plot these genes in both FI and MTG. Note that these plots are in log scale, whereas the rest of the manuscript is in linear scale. These genes are ordered by the average fold change between expression and proportions in the ET cluster vs. other excitatory clusters in MTG. MTG first.
ord <- order(-((p1e-pme)*VEe)[topVENgeneFI]) topVENgeneOut <- unique(c(topVENgeneFI[ord],intersect(topVENgeneMTG,names(fce)[(propFI[,exc_cl][,pti]<0.3)]))) topVENgeneOut <- intersect(topVENgeneOut,rownames(meanFI)) topVENgeneOut <- intersect(topVENgeneOut,rownames(meanMTG)) group_violin_plot(data_source = mtgFolder, clusters=which(exc_clb), genes=topVENgeneOut, logscale = TRUE) ggsave("Fig4_MTG_violinPlots_VENgenes.pdf", width = 9, height = 12)
Now FI.
group_violin_plot(data_source = inputFolder, clusters=which(exc_cl), genes=topVENgeneOut, logscale = TRUE) ggsave("Fig4_FI_violinPlots_VENgenes.pdf", width = 5, height = 12)
Here we calculate the fraction of excitatory cells from layer 5 that map to the ET clusters in MTG and FI.
clusterFI <- annoFI$cluster_label isPT_FI <- clusterFI=="Exc FEZF2 GABRQ" isExc_FI <- substr(clusterFI,1,3)=="Exc" clusterMTG<- annoMTG$cluster_label isPT_MTG <- (clusterMTG=="Exc L4-5 FEZF2 SCN4B")&(annoMTG$brain_subregion_label=="L5") isExc_MTG <- (substr(clusterMTG,1,3)=="Exc")&(annoMTG$brain_subregion_label=="L5") props <- c(100*mean(isPT_MTG[isExc_MTG],na.rm=TRUE),100*mean(isPT_FI[isExc_FI])) names(props) <- c("MTG","FI") signif(props,3)
Note that there is a much higher percentage of ET-associated excitatory cells in FI than in MTG. These values are also consistent with results from ISH.
Output session information.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.