In AllenInstitute/L5_VEN:

knitr::opts_chunk$set(echo = TRUE)

Overview

This script identifies common and distinct ET-associated genes in human middle temporal gyrus as compared with human fronto-insula, and compares the proportions of cells across clusters for each brain region.

If needed, set the working directory first: e.g. setwd("C:/Users/jeremym/Desktop/VEN_TEST2").

Load the relevant libraries

suppressPackageStartupMessages({
  library(VENcelltypes) 
  library(feather)
  library(dplyr)
  library(gplots)
  library(matrixStats)
})
options(stringsAsFactors=FALSE)

Read in the data and metadata

These files were created in the previous code file, which must be run prior to running this code block. First, let's read in the data from this manuscript (human FI). We do not need to subset the data for this analysis.

## Read in the data
inputFolder <- "FI/"
Expr.dat <- feather(paste(inputFolder,"data.feather",sep=""))
annoFI   <- read_feather(paste(inputFolder,"anno.feather",sep="")) 
exprData <- as.matrix(Expr.dat[,colnames(Expr.dat)[colnames(Expr.dat)!="sample_id"]])
rownames(exprData) = Expr.dat$sample_id
datFI    <- t(exprData)
datFI    <- log2(datFI+1)

load(paste(inputFolder,"clusterInfo.rda",sep="")) 
infoFI   <- clusterInfo

second, let's read in the comparable data from human MTG.

## Read in the data
mtgFolder <- "MTG/"
annoMTG   <- read_feather(paste(mtgFolder,"anno.feather",sep="")) 
Expr.datp <- feather(paste(mtgFolder,"data.feather",sep=""))
annoMTG   <- annoMTG[match(Expr.datp$sample_id,annoMTG$sample_id),] 
datMTG    <- as.matrix(Expr.datp[,names(Expr.datp)!="sample_id"])
rownames(datMTG) <- annoMTG$sample_id
datMTG    <- t(datMTG)
datMTG    <- log2(datMTG+1)

data("clusterInfoMTG")
infoMTG   <- clusterInfoMTG

Finally, find the common genes between studies.

kpGenes <- intersect(rownames(datMTG),rownames(datFI))
datMTG  <- datMTG[kpGenes,]
datFI   <- datFI[kpGenes,]

Find common and distict ET-associated genes in FI and MTG

We have already shown in the previous code that a handful of genes are enriched in ET cells relative to other subclasses in multiple regions in mouse and human. Now we want to extend this result to see if we can find an expanded list when we only consider human, and also to identify genes which may be specific to FI (and therefore potentially associated with diversified morphologies).

As a first step we need to calculate some statistics on these clusters, including means and proportions.

## FI statistics
exprThresh  <- 1
clFI        <- annoFI$cluster_label
names(clFI) <- colnames(datFI)
propFI      <- do.call("cbind", tapply(names(clFI), clFI, function(x) rowMeans(datFI[,x]>exprThresh))) 
meanFI      <- do.call("cbind", tapply(names(clFI), clFI, function(x) rowMeans(datFI[,x]))) 
rownames(propFI) <- rownames(meanFI) <- rownames(datFI)  
propFI      <- propFI[,infoFI$cluster_label]
meanFI      <- meanFI[,infoFI$cluster_label]

## MTG statistics
clMTG       <- annoMTG$cluster_label
names(clMTG)<- colnames(datMTG)
propMTG     <- do.call("cbind", tapply(names(clMTG), clMTG, function(x) rowMeans(datMTG[,x]>exprThresh))) 
meanMTG     <- do.call("cbind", tapply(names(clMTG), clMTG, function(x) rowMeans(datMTG[,x]))) 
rownames(propMTG) <- rownames(meanMTG) <- rownames(datMTG)  
propMTG     <- propMTG[,infoMTG$cluster_label]
meanMTG     <- meanMTG[,infoMTG$cluster_label]

Next, find the top marker genes for the ET cluster in each data set (Exc FEZF2 GABRQ in FI and Exc L4-5 FEZF2 SCN4B in MTG). Note that we are using slightly different statistical criteria and comparison types as we did in the cross-species analysis, and therefore the resulting gene lists do not perfectly align.

# FI data
exc_cl  <- substr(colnames(meanFI),1,3)=="Exc"
pti     <- which(colnames(meanFI[,exc_cl])=="Exc FEZF2 GABRQ")
wme <- apply(meanFI[,exc_cl],1,which.max)
fce <- apply(meanFI[,exc_cl],1,function(x) diff(range(-sort(-x)[1:2])))
VEe <- apply(meanFI[,exc_cl],1,function(x) x[pti]-mean(x[c(1:(pti-1),(pti+1):length(x))]))
p2e <- apply(propFI[,exc_cl],1,function(x) -sort(-x)[2])
p1e <- apply(propFI[,exc_cl],1,max)
pce <- p1e-p2e

topVENgeneFI <- names(fce)[(fce>1)&(wme==pti)&(pce>0.25)&(p1e>0.5)&(p2e<0.5)]
topVENgeneFI <- topVENgeneFI[order(-fce[(topVENgeneFI)])]

# MTG data
exc_clb <- substr(colnames(meanMTG),1,3)=="Exc"
ptib    <- which(colnames(meanMTG[,exc_clb])=="Exc L4-5 FEZF2 SCN4B")
wmeb <- apply(meanMTG[,exc_clb],1,which.max)
fce <- apply(meanMTG[,exc_clb],1,function(x) diff(range(-sort(-x)[1:2])))
VEe <- apply(meanMTG[,exc_clb],1,function(x) x[ptib]-mean(x[c(1:(ptib-1),(ptib+1):length(x))]))
p2e <- apply(propMTG[,exc_clb],1,function(x) -sort(-x)[2])
p1e <- apply(propMTG[,exc_clb],1,max)
pme <- apply(propMTG[,exc_clb],1,mean)
pce <- p1e-p2e

topVENgeneMTG <- names(fce)[(fce>1)&(wmeb==ptib)&(pce>0.25)&(p1e>0.5)&(p2e<0.5)]
topVENgeneMTG <- topVENgeneMTG[order(-fce[(topVENgeneMTG)])]

How many FI and MTG genes do we find? Note that we are biasing our plot toward showing all FI genes, but only the BEST MTG genes.

print(paste(length(topVENgeneFI),"FI genes in the ET cluster."))
print(paste(length(topVENgeneMTG),"MTG genes in the ET cluster."))
print(paste(length(intersect(topVENgeneFI,topVENgeneMTG)),"genes in the ET cluster common to both regions."))

As with the subclass analysis in the previous code document, we find more ET markers in MTG than in FI, although the specific numbers differe because here we are looking at finer cluster resolution than in the previous code document.

Finally, let's order and plot these genes in both FI and MTG. Note that these plots are in log scale, whereas the rest of the manuscript is in linear scale. These genes are ordered by the average fold change between expression and proportions in the ET cluster vs. other excitatory clusters in MTG. MTG first.

ord  <- order(-((p1e-pme)*VEe)[topVENgeneFI])
topVENgeneOut <- unique(c(topVENgeneFI[ord],intersect(topVENgeneMTG,names(fce)[(propFI[,exc_cl][,pti]<0.3)])))
topVENgeneOut <- intersect(topVENgeneOut,rownames(meanFI))
topVENgeneOut <- intersect(topVENgeneOut,rownames(meanMTG))

group_violin_plot(data_source = mtgFolder, clusters=which(exc_clb), genes=topVENgeneOut, logscale = TRUE)
ggsave("Fig4_MTG_violinPlots_VENgenes.pdf", width = 9, height = 12)

Now FI.

group_violin_plot(data_source = inputFolder, clusters=which(exc_cl), genes=topVENgeneOut, logscale = TRUE)
ggsave("Fig4_FI_violinPlots_VENgenes.pdf", width = 5, height = 12)

ET cell proportions

Here we calculate the fraction of excitatory cells from layer 5 that map to the ET clusters in MTG and FI.

clusterFI <- annoFI$cluster_label
isPT_FI   <- clusterFI=="Exc FEZF2 GABRQ"
isExc_FI  <- substr(clusterFI,1,3)=="Exc"
clusterMTG<- annoMTG$cluster_label
isPT_MTG  <- (clusterMTG=="Exc L4-5 FEZF2 SCN4B")&(annoMTG$brain_subregion_label=="L5")
isExc_MTG <- (substr(clusterMTG,1,3)=="Exc")&(annoMTG$brain_subregion_label=="L5")
props     <- c(100*mean(isPT_MTG[isExc_MTG],na.rm=TRUE),100*mean(isPT_FI[isExc_FI]))
names(props) <- c("MTG","FI")
signif(props,3)

Note that there is a much higher percentage of ET-associated excitatory cells in FI than in MTG. These values are also consistent with results from ISH.

Output session information.