title: "R Notebook" output: html_notebook date: "Last modified: 2022-04-14"
# Load libraries and some functions
source(here::here("R", "libraries.R"))
## Attaching SeuratObject
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5 ✔ purrr 0.3.4
## ✔ tibble 3.1.6 ✔ dplyr 1.0.8
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## here() starts at /crex/proj/uppstore2017283/jussi/NtsCreProject
R.utils::sourceDirectory(here("R", "functions"))
# Load SS2 data and annotation
FosTRAP <-
read.delim(here("data", "raw", "SS420_counts.txt"), sep = " ") %>%
as.matrix()
FosTRAP_annotation <-
read.delim(
here("data", "reference", "SS420_cells_annot_for_Yizhou.txt"),
sep = " "
) %>%
rename(cell_type = x) %>%
rownames_to_column("cell")
# Create Seurat object
FosTRAP %<>%
CreateSeuratObject(project = "FosTRAP", min.features = 500) %>%
NormalizeData() %>%
ScaleData(features = rownames(FosTRAP)) %>%
FindVariableFeatures() %>%
RunPCA()
## Centering and scaling data matrix
## PC_ 1
## Positive: Epas1, Ifitm3, Igfbp7, Crip2, Vim, Eng, Pltp, Tm4sf1, Id1, Tagln2
## Ly6a, Cldn5, H2-K1, Myl12a, Ly6c1, Esam, Gpr116, Klf2, Cav1, Id3
## Ptrf, Msn, Ets1, Cd34, Timp3, Srgn, Tns1, Acvrl1, Bcam, Pecam1
## Negative: Scg2, Aplp1, Tubb4a, AI593442, Serpinb1b, Trp53i11, Rph3a, Calb1, Slc17a6, Nrn1
## Sez6, Sirpa, Vat1l, Snca, Bin1, Omg, Tmem255a, Nefh, Cck, F2rl2
## Cbln2, Ptprd, Adarb2, Acsl6, Ttyh1, Nell1, Hpcal4, Rgs2, Abca2, Kcnip1
## PC_ 2
## Positive: Tsc22d1, Sptbn1, Ly6e, Scg2, Bsg, Itm2a, Slc39a10, Egfl7, 9430020K01Rik, Slc6a6
## Slc9a3r2, Flnb, AI593442, Plk2, Cxcl12, Tpm1, Cldn5, Trp53i11, Ly6a, Ly6c1
## Flt1, Abcb1a, Lef1, Nos3, Slc7a1, Ltbp4, Alas1, Slco1a4, Tek, Eltd1
## Negative: Gpr37l1, Cmtm5, Slc1a3, Aqp4, Tsc22d4, Agt, Slc6a11, Ramp1, Slc6a9, Ppap2b
## Slc39a12, Kcnj10, Cldn10, Mmd2, Slc1a2, Acsbg1, Slc14a1, Slc7a10, Sdc4, Cxcl14
## Gja1, Mlc1, Scd1, Gjb6, Gfap, Bcan, Paqr6, Apoe, Timp4, Ndrg2
## PC_ 3
## Positive: Atp1b2, Atp1a2, Sparcl1, Ndrg2, Fxyd1, Ppap2b, Slc6a9, Tril, Scd2, Aqp4
## Mt1, Gpr37l1, Slc6a11, Htra1, Prdx6, Gja1, Bcan, Fgfr3, Mt2, Slc39a12
## Cldn10, Slc7a2, Dbi, Mmd2, Sdc2, Elovl5, Daam2, Mlc1, Ddr1, Vcl
## Negative: C1qb, C1qc, Pld4, Tyrobp, C1qa, Cd33, Ctss, Csf1r, Blnk, Fcgr3
## Neurl3, Fcer1g, C3ar1, Spi1, Mpeg1, Cd53, Cd14, Nckap1l, Fcrls, Sash3
## Arhgap9, Myo1f, Itgam, Emr1, Vav1, Ccl4, Itgb2, Cyth4, Trem2, Ly86
## PC_ 4
## Positive: Tagln, Tpm2, Acta2, Rasl12, Mustn1, Aoc3, Smim5, Asb2, Gpr20, Olfr558
## Myh11, Slc38a11, Rbpms2, Cd248, Perp, Rrad, Myl9, Pdlim3, Ednra, Cspg4
## Olfr78, Ano1, Mrvi1, Heyl, Tgfb3, Filip1l, Adap2, Tbx2, Gprc5c, Mylk
## Negative: Mfsd2a, Slc38a3, Bsg, Slc16a1, Slco1a4, Abcb1a, Pglyrp1, Slc22a8, Slco1c1, Slc7a5
## Vwa1, Itm2a, Ptn, Ctla2a, Slc40a1, Prom1, Flt1, Ablim1, Gm694, Ushbp1
## Hmcn1, Slc16a4, Pllp, Slc2a1, Ramp2, Mal, Ttyh2, Hmgcs2, Itih5, Kdr
## PC_ 5
## Positive: Cldn11, Opalin, Ermn, Ugt8a, Mog, Gjc3, Mag, Nkx6-2, Fa2h, Ptgds
## Tmem88b, Mobp, Pls1, Hapln2, Sox10, Plekhh1, Gjb1, St18, Gjc2, Trf
## Pex5l, S1pr5, Galnt6, Tmem125, Pllp, Slc44a1, Cnp, Plp1, Plxnb3, Aspa
## Negative: Clu, Gja1, Aqp4, Agt, Slc4a4, Mt2, Gfap, Bcan, Acsbg1, Nkain4
## Mlc1, Slc14a1, Atp1b2, Slc39a12, Ntsr2, Cldn10, Cxcl14, Fabp7, Mgst1, Slc6a11
## Ntrk2, Mmd2, Timp4, Gpr37l1, Paqr6, Pla2g7, Ednrb, Cbs, Gjb6, Lrrc8a
# Plot some QC metrics
VlnPlot(FosTRAP, c("nFeature_RNA", "nCount_RNA"))
ElbowPlot(FosTRAP, ndims = 50)
# Cluster cells
FosTRAP %<>%
RunUMAP(reduction = "pca", dims = 1:10, n.neighbors = 50) %>%
FindNeighbors(reduction = "pca", dims = 1:10) %>%
FindClusters(resolution = 0.5) %>%
BuildClusterTree(
features = NULL, dims = 1:10, graph = NULL,
reorder = TRUE, reorder.numeric = TRUE, verbose = TRUE
)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:23:39 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:23:39 Read 372 rows and found 10 numeric columns
## 11:23:39 Using Annoy for neighbor search, n_neighbors = 50
## 11:23:39 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:23:39 Writing NN index file to temp file /scratch/25857701/RtmpdngI9X/file68ee777b9c4d
## 11:23:39 Searching Annoy index using 1 thread, search_k = 5000
## 11:23:39 Annoy recall = 100%
## 11:23:39 Commencing smooth kNN distance calibration using 1 thread
## 11:23:40 Initializing from normalized Laplacian + noise
## 11:23:40 Commencing optimization for 500 epochs, with 19550 positive edges
## 11:23:41 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 372
## Number of edges: 10746
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8375
## Number of communities: 5
## Elapsed time: 0 seconds
## Reordering identity classes and rebuilding tree
annot <-
rownames(FosTRAP@meta.data) %>%
as_tibble() %>%
left_join(FosTRAP_annotation, by = c("value" = "cell")) %>%
rename(cell = value)
FosTRAP@meta.data %<>%
rownames_to_column("cell") %>%
left_join(FosTRAP_annotation, by = "cell") %>%
column_to_rownames("cell")
FeaturePlot(FosTRAP, "Rbfox3", label = TRUE) +
VlnPlot(FosTRAP, c("Rbfox3"))+ NoLegend()
# Remove nonneuronal cells
FosTRAP %<>%
subset(idents = 3:5) %>%
NormalizeData() %>%
ScaleData(features = rownames(FosTRAP)) %>%
FindVariableFeatures() %>%
RunPCA() %>%
RunUMAP(reduction = "pca", dims = 1:10, n.neighbors = 50) %>%
FindNeighbors(reduction = "pca", dims = 1:10) %>%
FindClusters(resolution = 0.5) %>%
BuildClusterTree(
features = NULL, dims = 1:10, graph = NULL,
reorder = TRUE, reorder.numeric = TRUE, verbose = TRUE
)
## Centering and scaling data matrix
## PC_ 1
## Positive: Slc17a6, Nrn1, Camk2n1, Cbln1, Camkv, Adarb2, Ndrg1, Spock1, Plch2, Dhrs3
## Nell1, F2rl2, Cck, Nell2, Rora, Satb1, Pvalb, Cbln2, Cpne8, Gabra1
## Ebf2, Gabra5, Fam19a1, Ebf1, Bdnf, Osbpl1a, Cacna2d1, AI593442, Ap1s2, Ntng1
## Negative: Pcp4l1, Lamp5, Sall3, Cplx2, Gria3, Nr2f2, Slc6a1, Gad1, Plcb1, Mgat4c
## Gad2, Krt25, Gfra2, Grik2, Fstl5, Kcnip3, Bcl11a, Slc6a5, Pax8, Atp2b4
## Sorcs1, Gm14204, Kcnf1, Cacna2d3, Npy, Rph3a, Fam163b, Etl4, Fam84a, Acadl
## PC_ 2
## Positive: Sparcl1, Vamp1, Nefm, Slc24a3, Nat8l, Pvalb, Scn4b, Dgkb, Prkcd, Ntng1
## Fam19a1, Rassf4, Dpp10, Nefh, Trp53i11, Nr4a1, Sema5b, Plin2, Dusp1, Rora
## Rgs2, Gsg1l, Nell2, Dgkg, Kcnc4, Egr1, Aldoc, Fam163b, Lgi3, Cxcl12
## Negative: Sst, Baiap3, Arhgap6, Zbtb20, Pcp4, Syndig1l, Frmpd1, Prkcg, Itm2c, Scara5
## Rprm, A730046J19Rik, Rmst, Car8, Ppap2b, Hpcal4, Ankfn1, Sntb1, Trh, Pde11a
## Ucp2, Amigo2, Hap1, Ucn3, Grem1, Tmem215, Tex15, Gfra1, Lama4, Rtp4
## PC_ 3
## Positive: Lrrc38, Mia, Tac2, Pthlh, Calb2, Nmu, Sema3c, Thbs1, Ngb, Thbs3
## Gpr149, Bmp3, Grp, AA387883, Rspo1, Oxtr, Arhgef28, Grm2, Gna14, Cacng5
## Frzb, Proser2, Homer2, Car12, Slc39a11, Prkca, Pld5, Wnt7b, Cpne2, Kcnk10
## Negative: Scara5, Trh, Car8, Sntb1, Ucn3, Pcp4, Nrsn1, Lama4, Prkcg, Sfrp2
## St8sia2, Sorcs3, Chrna6, Adprhl1, Grem1, Cpne5, Pde11a, Gal, Spsb4, Rab3b
## Sel1l3, Syndig1l, Vill, Amigo2, Adcy2, Ihh, Col23a1, Chrnb3, Tmem132b, Cck
## PC_ 4
## Positive: Osr1, Rorb, Igf1, Kcnip4, P2ry1, Dkk3, Cnr1, Hpse2, Adam12, Alcam
## Otof, Fgf1, Cyp39a1, Htra1, Arhgap36, Col6a1, Bmp7, Zmat4, Gyg, Plcxd2
## Fam84a, Idh1, Trpc4, Ptprm, Snca, Dgkg, Chrna3, Gm5468, Strip2, Cdh1
## Negative: Penk, Ptprd, Kcnip2, Ryr3, 9530091C08Rik, Mafb, Ppp3ca, Gfra2, Mpped2, Synpr
## Hoxc8, Ptprk, Tshz2, Ntsr2, Nxph1, Cplx2, Sez6, Syt5, Atp2b4, Galnt14
## Rora, Cntnap4, Fmn1, Egr3, Slc4a4, Chrm3, Tenm3, Arc, Ctxn1, Baiap2
## PC_ 5
## Positive: Cpne9, Fxyd6, Kcnc4, Gas7, Trp53i11, Vat1l, Htr1d, Zcchc12, Fam20a, Col11a1
## Rab3b, Nbl1, Plcxd3, Gpc3, Pkia, Serpine2, Hpcal4, Igsf9, Syt10, Th
## Resp18, Kcna5, Npw, Necab1, Dkk3, Robo3, Syt5, Kcnv1, Synpr, Creg2
## Negative: Nefh, Kcnab1, Adcy2, Cck, Ncald, Scn4b, Htra1, Pld5, Mafb, Sh3bgr
## Kitl, Hspb8, Ap1s2, Grb14, Slc35d3, Nat8l, Cxcl12, Acan, Vamp1, Kit
## Meis2, Snap29, Sparcl1, Cntn4, Myo5b, Pmaip1, Htr3a, Strip2, Nxnl2, Vwa5a
## 11:23:46 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:23:46 Read 262 rows and found 10 numeric columns
## 11:23:46 Using Annoy for neighbor search, n_neighbors = 50
## 11:23:46 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:23:46 Writing NN index file to temp file /scratch/25857701/RtmpdngI9X/file68eea913dbf
## 11:23:46 Searching Annoy index using 1 thread, search_k = 5000
## 11:23:46 Annoy recall = 100%
## 11:23:47 Commencing smooth kNN distance calibration using 1 thread
## 11:23:47 Initializing from normalized Laplacian + noise
## 11:23:47 Commencing optimization for 500 epochs, with 12962 positive edges
## 11:23:48 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 262
## Number of edges: 6861
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7954
## Number of communities: 4
## Elapsed time: 0 seconds
## Reordering identity classes and rebuilding tree
FeaturePlot(FosTRAP, "Rbfox3", label = TRUE) +
VlnPlot(FosTRAP, "Rbfox3") + NoLegend()
# Load and prepare Häring et al. data
haring <- readRDS(here("data", "reference", "haring.rds"))
haring %>%
set_idents("standard_names") %>%
NormalizeData() %>%
ScaleData(features = rownames(haring)) %>%
FindVariableFeatures() %>%
RunPCA()
## Centering and scaling data matrix
## PC_ 1
## Positive: Mdh1, Chgb, Resp18, Serpinb1b, Snca, Vsnl1, Scg2, Ralyl, Rab3c, Prdx5
## Syt4, Plcb1, Fstl5, Pcp4l1, Gm14204, Atp1b1, Necab1, Fxyd6, Ldhb, Syt1
## Cd24a, Lamp5, Nxph1, Gad1, Tmsb10, Cend1, Cycs, Hpcal1, Frzb, 1500009L16Rik
## Negative: Mog, Ugt8a, Trf, Cldn11, Cmtm5, Cryab, Mag, Cnp, Mobp, Mal
## Gpr37, Plp1, Aspa, Apod, Car2, Tspan2, Pdlim2, Gatm, Csrp1, Pllp
## Opalin, Tmem88b, Mbp, Ermn, Sept4, Litaf, Gsn, Plekhb1, Fa2h, Cers2
## PC_ 2
## Positive: Gad1, Slc6a1, Gad2, Fstl5, Sall3, Gm14204, Pnoc, Cacna2d3, Pcp4l1, Sez6
## Nrxn3, Lgals9, Npy, Lamp5, 1500016L03Rik, Mgll, Rorb, Pax2, Nxph1, Serpinb1b
## Unc13c, Zmat4, Rph3a, Vwa5a, Sfrp1, Cplx2, Chrm3, 3110047P20Rik, 1500009L16Rik, Id2
## Negative: Slc17a6, Sst, Cacna2d1, Calb1, Ebf2, Grp, Pcp4, Nrn1, Nmu, Adcyap1
## Cpne8, Ldb2, F2rl2, Tac2, Grin3a, Mia, Col11a1, Npas4, Xist, Calb2
## Bdnf, Car12, C1ql3, B230216N24Rik, Rprm, Tshz3, Mt3, Thbs1, Miat, Mt1
## PC_ 3
## Positive: Scn1a, Atp1b1, Cplx1, Lynx1, Kcna2, Vamp1, Pcdh7, Ldhb, Nat8l, Nefm
## Nacc2, Slc24a2, Bend6, Gm13629, Scn1b, Pvalb, Adcy1, Kcnc3, Rgs7bp, Elavl2
## Gnai1, Kcna1, Ppargc1a, Slc25a5, Cox5a, Gabra5, Maf, Cend1, Kcnab3, Peg13
## Negative: Tppp3, Gal, Lgals9, Parm1, Scg2, Vwa5a, Tuba1a, Sstr2, Calca, Pnoc
## Pdyn, Cd24a, Efnb3, Eml1, Fth1, Fos, Gabrg1, 3110047P20Rik, Chodl, Prdx1
## Abi3bp, Serpinb1b, Ptprz1, Hmgcs1, Rspo3, Fstl5, B230216N24Rik, Gucy2d, Jun, Stmn4
## PC_ 4
## Positive: Synpr, Cpne8, Arpp21, AI593442, Nmu, Cpne5, Tac2, Cbln1, Gm5424, Sorcs3
## Frzb, Nts, Pdzd2, Rab3c, Mia, Cck, Gm2694, Ebf2, Car8, Rbms3
## Grp, Plcb1, Fbn2, Hspb1, Adcy1, Arap2, Pld5, 1500016L03Rik, Ucn3, P2ry1
## Negative: mt-Tm, Gm13826, Sema5a, mt-Ti, Gpr101, Pabpc1, Ap1s2, mt-Te, Lmo3, Tomm6
## Penk, Grik1, Adra2a, Slc25a4, Junb, Cbln4, Atp2b4, mt-Ta, Rpl13, Rn4.5s-loc5
## Rn4.5s-loc1, Kcnk2, Actb, Rn4.5s-loc4, Rn4.5s-loc3, Rn4.5s-loc8, Rn4.5s-loc7, Rn4.5s-loc2, mt-Tl2, mt-Ty
## PC_ 5
## Positive: Cox7c, Cox6b1, Cycs, Mdh1, Atp5k, Prdx5, Atp5g1, Ndufa5, Mt3, Fth1
## Slc25a4, Ldhb, Gstp1, Pcp4, Vamp1, Resp18, Cend1, Ggct, Fabp5, Gpr101
## Prdx1, Tmsb4x, Cfl2, Slc25a5, Sec11c, Rgs10, Vmp1, Bend6, Cbln4, Ralyl
## Negative: Epas1, Sparc, Slco1c1, Pltp, Slco1a4, Ly6c1, Atp1a2, Cldn5, Flt1, Tns1
## Ptprb, Tm4sf1, Car4, Igfbp7, Tcf7l2, Cspg4, Rgs5, Mfsd2a, Ppic, Abcb1a
## Ptprz1, Slc1a2, 1810041L15Rik, Bcan, Esam, Tril, Gpr116, 9630013A20Rik, Itpr2, AI593442
## An object of class Seurat
## 24378 features across 1545 samples within 1 assay
## Active assay: RNA (24378 features, 2000 variable features)
## 2 dimensional reductions calculated: pca, tsne
# Create classifier with scPred using MDA as model
haring %<>%
getFeatureSpace("standard_names") %>%
trainModel(model = "mda")
## ● Extracting feature space for each cell type...
## DONE!
## ● Training models for each cell type...
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
## Warning: did not converge in 10 iterations
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 70000)
## Warning: did not converge in 10 iterations
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 59100)
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## DONE!
# Check model performance
get_scpred(haring)
## 'scPred' object
## ✔ Prediction variable = standard_names
## ✔ Discriminant features per cell type
## ✔ Training model(s)
## Summary
##
## |Cell type | n| Features|Method | ROC| Sens| Spec|
## |:---------|---:|--------:|:------|-----:|-----:|-----:|
## |Gaba1 | 18| 100|mda | 0.893| 0.150| 0.991|
## |Gaba10 | 66| 100|mda | 0.975| 0.710| 0.982|
## |Gaba11 | 48| 100|mda | 0.954| 0.667| 0.969|
## |Gaba12 | 56| 100|mda | 0.996| 0.891| 0.990|
## |Gaba13 | 34| 100|mda | 0.935| 0.352| 0.980|
## |Gaba14 | 41| 100|mda | 0.996| 0.975| 0.993|
## |Gaba15 | 11| 100|mda | 0.988| 0.800| 0.985|
## |Gaba2 | 87| 100|mda | 0.978| 0.724| 0.971|
## |Gaba3 | 145| 100|mda | 0.971| 0.779| 0.974|
## |Gaba4 | 11| 100|mda | 0.912| 0.433| 0.992|
## |Gaba5 | 19| 100|mda | 0.967| 0.617| 0.984|
## |Gaba6 | 51| 100|mda | 0.981| 0.847| 0.989|
## |Gaba7 | 39| 100|mda | 0.985| 0.696| 0.985|
## |Gaba8 | 67| 100|mda | 0.957| 0.762| 0.976|
## |Gaba9 | 85| 100|mda | 0.986| 0.812| 0.976|
## |Glut1 | 32| 100|mda | 0.956| 0.724| 0.989|
## |Glut10 | 129| 100|mda | 0.946| 0.697| 0.960|
## |Glut11 | 61| 100|mda | 0.969| 0.588| 0.968|
## |Glut12 | 35| 100|mda | 0.959| 0.657| 0.981|
## |Glut13 | 38| 100|mda | 0.938| 0.389| 0.971|
## |Glut14 | 34| 100|mda | 0.970| 0.586| 0.983|
## |Glut15 | 22| 100|mda | 0.945| 0.350| 0.985|
## |Glut2 | 34| 100|mda | 0.997| 0.819| 0.994|
## |Glut3 | 34| 100|mda | 0.996| 0.914| 0.990|
## |Glut4 | 34| 100|mda | 0.972| 0.619| 0.986|
## |Glut5 | 24| 100|mda | 0.968| 0.550| 0.986|
## |Glut6 | 75| 100|mda | 0.976| 0.787| 0.964|
## |Glut7 | 104| 100|mda | 0.962| 0.616| 0.967|
## |Glut8 | 77| 100|mda | 0.922| 0.596| 0.983|
## |Glut9 | 34| 100|mda | 0.939| 0.595| 0.980|
# Predict labels for FosTRAP neurons
FosTRAP %<>% scPredict(haring, threshold = 0.75)
## ● Matching reference with new dataset...
## ─ 2789 features present in reference loadings
## ─ 2735 features shared between reference and new dataset
## ─ 98.06% of features in the reference are present in new dataset
## ● Aligning new data to reference...
## Harmony 1/20
## Harmony 2/20
## Harmony converged after 2 iterations
## ● Classifying cells...
## DONE!
# Max score distributions, red vertical line (0.55) is the assignment boundary
FosTRAP@meta.data %>%
ggplot(aes(scpred_max)) +
geom_histogram(
bins = 50,
color = "white",
fill = "darkblue",
alpha = 0.7
) +
geom_vline(xintercept = 0.75, color = "red", size = 1) +
cowplot::theme_cowplot() +
scale_y_continuous(expand = expansion(mult = c(0, .1))) +
labs(
title = "",
x = "Max prediction score",
y = "Number of cells"
)
# Discard unassigned neurons and cell types with less than 3 cells
FosTRAP %<>% set_idents("scpred_prediction")
FosTRAP %<>%
subset(idents = as.character(unique(Idents(FosTRAP))) %>%
keep(~ .x %not_in% "unassigned"))
clusters_to_keep <-
FosTRAP@meta.data %>%
count(scpred_prediction, sort = TRUE) %>%
mutate(keep = n >= 3) %>%
select(scpred_prediction, keep)
FosTRAP@meta.data %<>%
rownames_to_column("cell") %>%
left_join(clusters_to_keep, by = "scpred_prediction") %>%
column_to_rownames("cell")
FosTRAP %<>%
set_idents("keep") %>%
subset(idents = TRUE) %>%
set_idents("scpred_prediction")
# scPred score heatmap
FosTRAP@meta.data %>%
select(
starts_with("scpred_"),
-c(scpred_max, scpred_prediction, scpred_no_rejection)
) %>%
rename_with(~ str_remove(.x, "scpred_")) %>%
t() %>%
pheatmap::pheatmap(
cluster_rows = FALSE,
cluster_cols = TRUE,
treeheight_col = 0,
show_colnames = FALSE,
color = viridisLite::viridis(10),
fontsize = 12,
title = ""
)
# Plot UMAP with predicted identites
DimPlot(FosTRAP, label = TRUE, repel = TRUE)
# Plot neurotransmitter type (Glut/Gaba)
(FeaturePlot(FosTRAP,
c("Slc17a6", "Slc32a1"),
label = TRUE,
pt.size = 1,
repel = TRUE)) /
(VlnPlot(FosTRAP, c("Slc17a6", "Slc32a1")))
# Plot number of neurons per celltype
FosTRAP@meta.data %>%
count(scpred_prediction, sort = TRUE) %>%
mutate(prop = n / sum(n)) %>%
ggplot(aes(fct_inorder(scpred_prediction), prop)) +
geom_col(fill = "darkblue", alpha = 0.7) +
cowplot::theme_cowplot() +
scale_y_continuous(expand = expansion(mult = c(0, .1))) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x = "Predicted cell type", y = "Proportion of cells")
# Tabulate cell counts per predicted identity
FosTRAP@meta.data %>%
count(scpred_prediction, sort = TRUE) %>%
knitr::kable()
|scpred_prediction | n| |:-----------------|--:| |Gaba12 | 63| |Glut1 | 54| |Gaba10 | 10| |Glut3 | 9| |Glut2 | 4| |Glut4 | 4|
# Get markers for Häring et al. data
haring_markers <-
haring %>%
set_idents("standard_names") %>%
FindAllMarkers(only.pos = TRUE)
## Calculating cluster Gaba1
## Calculating cluster Gaba10
## Calculating cluster Gaba11
## Calculating cluster Gaba12
## Calculating cluster Gaba13
## Calculating cluster Gaba14
## Calculating cluster Gaba15
## Calculating cluster Gaba2
## Calculating cluster Gaba3
## Calculating cluster Gaba4
## Calculating cluster Gaba5
## Calculating cluster Gaba6
## Calculating cluster Gaba7
## Calculating cluster Gaba8
## Calculating cluster Gaba9
## Calculating cluster Glut1
## Calculating cluster Glut10
## Calculating cluster Glut11
## Calculating cluster Glut12
## Calculating cluster Glut13
## Calculating cluster Glut14
## Calculating cluster Glut15
## Calculating cluster Glut2
## Calculating cluster Glut3
## Calculating cluster Glut4
## Calculating cluster Glut5
## Calculating cluster Glut6
## Calculating cluster Glut7
## Calculating cluster Glut8
## Calculating cluster Glut9
# Get top genes for Häring et al. cell types
top_genes_haring <-
haring_markers %>%
filter(cluster %in% unique(FosTRAP@meta.data$scpred_prediction)) %>%
group_by(cluster) %>%
slice_min(p_val_adj, n = 5, with_ties = FALSE)
# Re-order FosTRAP data (with assigned IDs) for plotting
order <-
c(
"Glut1",
"Glut2",
"Glut3",
"Glut4",
"Glut6",
"Glut7",
"Glut8",
"Gaba10",
"Gaba12",
"Gaba13"
)
Idents(FosTRAP) <- factor(x = Idents(FosTRAP), levels = order)
# Dotplot of top Häring markers in FosTRAP neurons
FosTRAP %>%
DotPlot(
features = unique(c("Slc32a1", "Slc17a6", "Prkcg", top_genes_haring$gene))
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
coord_flip()
# Plot Nts and Prkcq in FosTRAP neurons
VlnPlot(FosTRAP, c("Nts", "Prkcg"), pt.size = 0.1)
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS: /sw/apps/R/4.1.1/rackham/lib64/R/lib/libRblas.so
## LAPACK: /sw/apps/R/4.1.1/rackham/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] caret_6.0-90 lattice_0.20-45 scPred_1.9.2 here_1.0.1
## [5] fs_1.5.2 patchwork_1.1.1 magrittr_2.0.2 forcats_0.5.1
## [9] stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4 readr_2.1.2
## [13] tidyr_1.2.0 tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1
## [17] SeuratObject_4.0.4 Seurat_4.1.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.4.1 plyr_1.8.6
## [4] igraph_1.2.11 lazyeval_0.2.2 splines_4.1.1
## [7] listenv_0.8.0 scattermore_0.8 digest_0.6.29
## [10] foreach_1.5.2 htmltools_0.5.2 fansi_1.0.2
## [13] tensor_1.5 cluster_2.1.2 ROCR_1.0-11
## [16] limma_3.48.3 tzdb_0.2.0 recipes_0.2.0
## [19] globals_0.14.0 gower_1.0.0 modelr_0.1.8
## [22] matrixStats_0.61.0 R.utils_2.11.0 hardhat_0.2.0
## [25] spatstat.sparse_2.1-0 colorspace_2.0-3 rvest_1.0.2
## [28] ggrepel_0.9.1 haven_2.4.3 xfun_0.30
## [31] crayon_1.5.0 jsonlite_1.8.0 spatstat.data_2.1-2
## [34] ape_5.6-1 iterators_1.0.14 survival_3.2-13
## [37] zoo_1.8-9 glue_1.6.2 polyclip_1.10-0
## [40] gtable_0.3.0 ipred_0.9-12 leiden_0.3.9
## [43] R.cache_0.15.0 future.apply_1.8.1 abind_1.4-5
## [46] scales_1.1.1 pheatmap_1.0.12 DBI_1.1.2
## [49] spatstat.random_2.1-0 miniUI_0.1.1.1 Rcpp_1.0.8
## [52] viridisLite_0.4.0 xtable_1.8-4 reticulate_1.24
## [55] spatstat.core_2.4-0 stats4_4.1.1 lava_1.6.10
## [58] prodlim_2019.11.13 htmlwidgets_1.5.4 httr_1.4.2
## [61] RColorBrewer_1.1-2 ellipsis_0.3.2 ica_1.0-2
## [64] farver_2.1.0 pkgconfig_2.0.3 R.methodsS3_1.8.1
## [67] nnet_7.3-16 uwot_0.1.11 dbplyr_2.1.1
## [70] deldir_1.0-6 utf8_1.2.2 labeling_0.4.2
## [73] tidyselect_1.1.2 rlang_1.0.2 reshape2_1.4.4
## [76] later_1.3.0 munsell_0.5.0 cellranger_1.1.0
## [79] tools_4.1.1 cli_3.2.0 generics_0.1.2
## [82] broom_0.7.12 ggridges_0.5.3 evaluate_0.15
## [85] fastmap_1.1.0 goftest_1.2-3 ModelMetrics_1.2.2.2
## [88] knitr_1.37 fitdistrplus_1.1-8 RANN_2.6.1
## [91] pbapply_1.5-0 future_1.24.0 nlme_3.1-153
## [94] mime_0.12 ggrastr_1.0.1 R.oo_1.24.0
## [97] xml2_1.3.3 ezknitr_0.6.1 compiler_4.1.1
## [100] rstudioapi_0.13 beeswarm_0.4.0 plotly_4.10.0
## [103] png_0.1-7 spatstat.utils_2.3-0 reprex_2.0.1
## [106] stringi_1.7.6 highr_0.9 RSpectra_0.16-0
## [109] Matrix_1.3-4 styler_1.7.0.9001 vctrs_0.3.8
## [112] pillar_1.7.0 lifecycle_1.0.1 spatstat.geom_2.3-2
## [115] lmtest_0.9-39 RcppAnnoy_0.0.19 data.table_1.14.2
## [118] cowplot_1.1.1 irlba_2.3.5 httpuv_1.6.5
## [121] R6_2.5.1 promises_1.2.0.1 KernSmooth_2.23-20
## [124] gridExtra_2.3 vipor_0.4.5 parallelly_1.30.0
## [127] codetools_0.2-18 MASS_7.3-54 assertthat_0.2.1
## [130] rprojroot_2.0.2 withr_2.5.0 sctransform_0.3.3
## [133] harmony_0.1.0 mgcv_1.8-38 parallel_4.1.1
## [136] hms_1.1.1 timeDate_3043.102 grid_4.1.1
## [139] rpart_4.1-15 class_7.3-19 mda_0.5-2
## [142] Cairo_1.5-14 Rtsne_0.15 pROC_1.18.0
## [145] shiny_1.7.1 lubridate_1.8.0 ggbeeswarm_0.6.0
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.