notebooks/FosTRAP_neurons.md

title: "R Notebook" output: html_notebook date: "Last modified: 2022-04-14"

# Load libraries and some functions
source(here::here("R", "libraries.R"))
## Attaching SeuratObject
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5     ✔ purrr   0.3.4
## ✔ tibble  3.1.6     ✔ dplyr   1.0.8
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## here() starts at /crex/proj/uppstore2017283/jussi/NtsCreProject
R.utils::sourceDirectory(here("R", "functions"))
# Load SS2 data and annotation
FosTRAP <-
  read.delim(here("data", "raw", "SS420_counts.txt"), sep = " ") %>%
  as.matrix()

FosTRAP_annotation <-
  read.delim(
    here("data", "reference", "SS420_cells_annot_for_Yizhou.txt"),
    sep = " "
  ) %>%
  rename(cell_type = x) %>%
  rownames_to_column("cell")
# Create Seurat object
FosTRAP %<>%
  CreateSeuratObject(project = "FosTRAP", min.features = 500) %>%
  NormalizeData() %>%
  ScaleData(features = rownames(FosTRAP)) %>%
  FindVariableFeatures() %>%
  RunPCA()
## Centering and scaling data matrix
## PC_ 1 
## Positive:  Epas1, Ifitm3, Igfbp7, Crip2, Vim, Eng, Pltp, Tm4sf1, Id1, Tagln2 
##     Ly6a, Cldn5, H2-K1, Myl12a, Ly6c1, Esam, Gpr116, Klf2, Cav1, Id3 
##     Ptrf, Msn, Ets1, Cd34, Timp3, Srgn, Tns1, Acvrl1, Bcam, Pecam1 
## Negative:  Scg2, Aplp1, Tubb4a, AI593442, Serpinb1b, Trp53i11, Rph3a, Calb1, Slc17a6, Nrn1 
##     Sez6, Sirpa, Vat1l, Snca, Bin1, Omg, Tmem255a, Nefh, Cck, F2rl2 
##     Cbln2, Ptprd, Adarb2, Acsl6, Ttyh1, Nell1, Hpcal4, Rgs2, Abca2, Kcnip1 
## PC_ 2 
## Positive:  Tsc22d1, Sptbn1, Ly6e, Scg2, Bsg, Itm2a, Slc39a10, Egfl7, 9430020K01Rik, Slc6a6 
##     Slc9a3r2, Flnb, AI593442, Plk2, Cxcl12, Tpm1, Cldn5, Trp53i11, Ly6a, Ly6c1 
##     Flt1, Abcb1a, Lef1, Nos3, Slc7a1, Ltbp4, Alas1, Slco1a4, Tek, Eltd1 
## Negative:  Gpr37l1, Cmtm5, Slc1a3, Aqp4, Tsc22d4, Agt, Slc6a11, Ramp1, Slc6a9, Ppap2b 
##     Slc39a12, Kcnj10, Cldn10, Mmd2, Slc1a2, Acsbg1, Slc14a1, Slc7a10, Sdc4, Cxcl14 
##     Gja1, Mlc1, Scd1, Gjb6, Gfap, Bcan, Paqr6, Apoe, Timp4, Ndrg2 
## PC_ 3 
## Positive:  Atp1b2, Atp1a2, Sparcl1, Ndrg2, Fxyd1, Ppap2b, Slc6a9, Tril, Scd2, Aqp4 
##     Mt1, Gpr37l1, Slc6a11, Htra1, Prdx6, Gja1, Bcan, Fgfr3, Mt2, Slc39a12 
##     Cldn10, Slc7a2, Dbi, Mmd2, Sdc2, Elovl5, Daam2, Mlc1, Ddr1, Vcl 
## Negative:  C1qb, C1qc, Pld4, Tyrobp, C1qa, Cd33, Ctss, Csf1r, Blnk, Fcgr3 
##     Neurl3, Fcer1g, C3ar1, Spi1, Mpeg1, Cd53, Cd14, Nckap1l, Fcrls, Sash3 
##     Arhgap9, Myo1f, Itgam, Emr1, Vav1, Ccl4, Itgb2, Cyth4, Trem2, Ly86 
## PC_ 4 
## Positive:  Tagln, Tpm2, Acta2, Rasl12, Mustn1, Aoc3, Smim5, Asb2, Gpr20, Olfr558 
##     Myh11, Slc38a11, Rbpms2, Cd248, Perp, Rrad, Myl9, Pdlim3, Ednra, Cspg4 
##     Olfr78, Ano1, Mrvi1, Heyl, Tgfb3, Filip1l, Adap2, Tbx2, Gprc5c, Mylk 
## Negative:  Mfsd2a, Slc38a3, Bsg, Slc16a1, Slco1a4, Abcb1a, Pglyrp1, Slc22a8, Slco1c1, Slc7a5 
##     Vwa1, Itm2a, Ptn, Ctla2a, Slc40a1, Prom1, Flt1, Ablim1, Gm694, Ushbp1 
##     Hmcn1, Slc16a4, Pllp, Slc2a1, Ramp2, Mal, Ttyh2, Hmgcs2, Itih5, Kdr 
## PC_ 5 
## Positive:  Cldn11, Opalin, Ermn, Ugt8a, Mog, Gjc3, Mag, Nkx6-2, Fa2h, Ptgds 
##     Tmem88b, Mobp, Pls1, Hapln2, Sox10, Plekhh1, Gjb1, St18, Gjc2, Trf 
##     Pex5l, S1pr5, Galnt6, Tmem125, Pllp, Slc44a1, Cnp, Plp1, Plxnb3, Aspa 
## Negative:  Clu, Gja1, Aqp4, Agt, Slc4a4, Mt2, Gfap, Bcan, Acsbg1, Nkain4 
##     Mlc1, Slc14a1, Atp1b2, Slc39a12, Ntsr2, Cldn10, Cxcl14, Fabp7, Mgst1, Slc6a11 
##     Ntrk2, Mmd2, Timp4, Gpr37l1, Paqr6, Pla2g7, Ednrb, Cbs, Gjb6, Lrrc8a
# Plot some QC metrics
VlnPlot(FosTRAP, c("nFeature_RNA", "nCount_RNA"))

plot of chunk unnamed-chunk-4

ElbowPlot(FosTRAP, ndims = 50)

plot of chunk unnamed-chunk-5

# Cluster cells
FosTRAP %<>%
  RunUMAP(reduction = "pca", dims = 1:10, n.neighbors = 50) %>%
  FindNeighbors(reduction = "pca", dims = 1:10) %>%
  FindClusters(resolution = 0.5) %>%
  BuildClusterTree(
    features = NULL, dims = 1:10, graph = NULL,
    reorder = TRUE, reorder.numeric = TRUE, verbose = TRUE
  )
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:23:39 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:23:39 Read 372 rows and found 10 numeric columns
## 11:23:39 Using Annoy for neighbor search, n_neighbors = 50
## 11:23:39 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:23:39 Writing NN index file to temp file /scratch/25857701/RtmpdngI9X/file68ee777b9c4d
## 11:23:39 Searching Annoy index using 1 thread, search_k = 5000
## 11:23:39 Annoy recall = 100%
## 11:23:39 Commencing smooth kNN distance calibration using 1 thread
## 11:23:40 Initializing from normalized Laplacian + noise
## 11:23:40 Commencing optimization for 500 epochs, with 19550 positive edges
## 11:23:41 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 372
## Number of edges: 10746
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8375
## Number of communities: 5
## Elapsed time: 0 seconds
## Reordering identity classes and rebuilding tree
annot <-
  rownames(FosTRAP@meta.data) %>%
  as_tibble() %>%
  left_join(FosTRAP_annotation, by = c("value" = "cell")) %>%
  rename(cell = value)

FosTRAP@meta.data %<>%
  rownames_to_column("cell") %>%
  left_join(FosTRAP_annotation, by = "cell") %>%
  column_to_rownames("cell")
FeaturePlot(FosTRAP, "Rbfox3", label = TRUE) +
  VlnPlot(FosTRAP, c("Rbfox3"))+ NoLegend()

plot of chunk unnamed-chunk-7

# Remove nonneuronal cells
FosTRAP %<>%
  subset(idents = 3:5) %>%
  NormalizeData() %>%
  ScaleData(features = rownames(FosTRAP)) %>%
  FindVariableFeatures() %>%
  RunPCA() %>%
  RunUMAP(reduction = "pca", dims = 1:10, n.neighbors = 50) %>%
  FindNeighbors(reduction = "pca", dims = 1:10) %>%
  FindClusters(resolution = 0.5) %>%
  BuildClusterTree(
    features = NULL, dims = 1:10, graph = NULL,
    reorder = TRUE, reorder.numeric = TRUE, verbose = TRUE
  )
## Centering and scaling data matrix
## PC_ 1 
## Positive:  Slc17a6, Nrn1, Camk2n1, Cbln1, Camkv, Adarb2, Ndrg1, Spock1, Plch2, Dhrs3 
##     Nell1, F2rl2, Cck, Nell2, Rora, Satb1, Pvalb, Cbln2, Cpne8, Gabra1 
##     Ebf2, Gabra5, Fam19a1, Ebf1, Bdnf, Osbpl1a, Cacna2d1, AI593442, Ap1s2, Ntng1 
## Negative:  Pcp4l1, Lamp5, Sall3, Cplx2, Gria3, Nr2f2, Slc6a1, Gad1, Plcb1, Mgat4c 
##     Gad2, Krt25, Gfra2, Grik2, Fstl5, Kcnip3, Bcl11a, Slc6a5, Pax8, Atp2b4 
##     Sorcs1, Gm14204, Kcnf1, Cacna2d3, Npy, Rph3a, Fam163b, Etl4, Fam84a, Acadl 
## PC_ 2 
## Positive:  Sparcl1, Vamp1, Nefm, Slc24a3, Nat8l, Pvalb, Scn4b, Dgkb, Prkcd, Ntng1 
##     Fam19a1, Rassf4, Dpp10, Nefh, Trp53i11, Nr4a1, Sema5b, Plin2, Dusp1, Rora 
##     Rgs2, Gsg1l, Nell2, Dgkg, Kcnc4, Egr1, Aldoc, Fam163b, Lgi3, Cxcl12 
## Negative:  Sst, Baiap3, Arhgap6, Zbtb20, Pcp4, Syndig1l, Frmpd1, Prkcg, Itm2c, Scara5 
##     Rprm, A730046J19Rik, Rmst, Car8, Ppap2b, Hpcal4, Ankfn1, Sntb1, Trh, Pde11a 
##     Ucp2, Amigo2, Hap1, Ucn3, Grem1, Tmem215, Tex15, Gfra1, Lama4, Rtp4 
## PC_ 3 
## Positive:  Lrrc38, Mia, Tac2, Pthlh, Calb2, Nmu, Sema3c, Thbs1, Ngb, Thbs3 
##     Gpr149, Bmp3, Grp, AA387883, Rspo1, Oxtr, Arhgef28, Grm2, Gna14, Cacng5 
##     Frzb, Proser2, Homer2, Car12, Slc39a11, Prkca, Pld5, Wnt7b, Cpne2, Kcnk10 
## Negative:  Scara5, Trh, Car8, Sntb1, Ucn3, Pcp4, Nrsn1, Lama4, Prkcg, Sfrp2 
##     St8sia2, Sorcs3, Chrna6, Adprhl1, Grem1, Cpne5, Pde11a, Gal, Spsb4, Rab3b 
##     Sel1l3, Syndig1l, Vill, Amigo2, Adcy2, Ihh, Col23a1, Chrnb3, Tmem132b, Cck 
## PC_ 4 
## Positive:  Osr1, Rorb, Igf1, Kcnip4, P2ry1, Dkk3, Cnr1, Hpse2, Adam12, Alcam 
##     Otof, Fgf1, Cyp39a1, Htra1, Arhgap36, Col6a1, Bmp7, Zmat4, Gyg, Plcxd2 
##     Fam84a, Idh1, Trpc4, Ptprm, Snca, Dgkg, Chrna3, Gm5468, Strip2, Cdh1 
## Negative:  Penk, Ptprd, Kcnip2, Ryr3, 9530091C08Rik, Mafb, Ppp3ca, Gfra2, Mpped2, Synpr 
##     Hoxc8, Ptprk, Tshz2, Ntsr2, Nxph1, Cplx2, Sez6, Syt5, Atp2b4, Galnt14 
##     Rora, Cntnap4, Fmn1, Egr3, Slc4a4, Chrm3, Tenm3, Arc, Ctxn1, Baiap2 
## PC_ 5 
## Positive:  Cpne9, Fxyd6, Kcnc4, Gas7, Trp53i11, Vat1l, Htr1d, Zcchc12, Fam20a, Col11a1 
##     Rab3b, Nbl1, Plcxd3, Gpc3, Pkia, Serpine2, Hpcal4, Igsf9, Syt10, Th 
##     Resp18, Kcna5, Npw, Necab1, Dkk3, Robo3, Syt5, Kcnv1, Synpr, Creg2 
## Negative:  Nefh, Kcnab1, Adcy2, Cck, Ncald, Scn4b, Htra1, Pld5, Mafb, Sh3bgr 
##     Kitl, Hspb8, Ap1s2, Grb14, Slc35d3, Nat8l, Cxcl12, Acan, Vamp1, Kit 
##     Meis2, Snap29, Sparcl1, Cntn4, Myo5b, Pmaip1, Htr3a, Strip2, Nxnl2, Vwa5a
## 11:23:46 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:23:46 Read 262 rows and found 10 numeric columns
## 11:23:46 Using Annoy for neighbor search, n_neighbors = 50
## 11:23:46 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:23:46 Writing NN index file to temp file /scratch/25857701/RtmpdngI9X/file68eea913dbf
## 11:23:46 Searching Annoy index using 1 thread, search_k = 5000
## 11:23:46 Annoy recall = 100%
## 11:23:47 Commencing smooth kNN distance calibration using 1 thread
## 11:23:47 Initializing from normalized Laplacian + noise
## 11:23:47 Commencing optimization for 500 epochs, with 12962 positive edges
## 11:23:48 Optimization finished
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 262
## Number of edges: 6861
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7954
## Number of communities: 4
## Elapsed time: 0 seconds
## Reordering identity classes and rebuilding tree
FeaturePlot(FosTRAP, "Rbfox3", label = TRUE) +
  VlnPlot(FosTRAP, "Rbfox3") + NoLegend()

plot of chunk unnamed-chunk-9

# Load and prepare Häring et al. data
haring <- readRDS(here("data", "reference", "haring.rds"))

haring %>%
  set_idents("standard_names") %>%
  NormalizeData() %>%
  ScaleData(features = rownames(haring)) %>%
  FindVariableFeatures() %>%
  RunPCA()
## Centering and scaling data matrix
## PC_ 1 
## Positive:  Mdh1, Chgb, Resp18, Serpinb1b, Snca, Vsnl1, Scg2, Ralyl, Rab3c, Prdx5 
##     Syt4, Plcb1, Fstl5, Pcp4l1, Gm14204, Atp1b1, Necab1, Fxyd6, Ldhb, Syt1 
##     Cd24a, Lamp5, Nxph1, Gad1, Tmsb10, Cend1, Cycs, Hpcal1, Frzb, 1500009L16Rik 
## Negative:  Mog, Ugt8a, Trf, Cldn11, Cmtm5, Cryab, Mag, Cnp, Mobp, Mal 
##     Gpr37, Plp1, Aspa, Apod, Car2, Tspan2, Pdlim2, Gatm, Csrp1, Pllp 
##     Opalin, Tmem88b, Mbp, Ermn, Sept4, Litaf, Gsn, Plekhb1, Fa2h, Cers2 
## PC_ 2 
## Positive:  Gad1, Slc6a1, Gad2, Fstl5, Sall3, Gm14204, Pnoc, Cacna2d3, Pcp4l1, Sez6 
##     Nrxn3, Lgals9, Npy, Lamp5, 1500016L03Rik, Mgll, Rorb, Pax2, Nxph1, Serpinb1b 
##     Unc13c, Zmat4, Rph3a, Vwa5a, Sfrp1, Cplx2, Chrm3, 3110047P20Rik, 1500009L16Rik, Id2 
## Negative:  Slc17a6, Sst, Cacna2d1, Calb1, Ebf2, Grp, Pcp4, Nrn1, Nmu, Adcyap1 
##     Cpne8, Ldb2, F2rl2, Tac2, Grin3a, Mia, Col11a1, Npas4, Xist, Calb2 
##     Bdnf, Car12, C1ql3, B230216N24Rik, Rprm, Tshz3, Mt3, Thbs1, Miat, Mt1 
## PC_ 3 
## Positive:  Scn1a, Atp1b1, Cplx1, Lynx1, Kcna2, Vamp1, Pcdh7, Ldhb, Nat8l, Nefm 
##     Nacc2, Slc24a2, Bend6, Gm13629, Scn1b, Pvalb, Adcy1, Kcnc3, Rgs7bp, Elavl2 
##     Gnai1, Kcna1, Ppargc1a, Slc25a5, Cox5a, Gabra5, Maf, Cend1, Kcnab3, Peg13 
## Negative:  Tppp3, Gal, Lgals9, Parm1, Scg2, Vwa5a, Tuba1a, Sstr2, Calca, Pnoc 
##     Pdyn, Cd24a, Efnb3, Eml1, Fth1, Fos, Gabrg1, 3110047P20Rik, Chodl, Prdx1 
##     Abi3bp, Serpinb1b, Ptprz1, Hmgcs1, Rspo3, Fstl5, B230216N24Rik, Gucy2d, Jun, Stmn4 
## PC_ 4 
## Positive:  Synpr, Cpne8, Arpp21, AI593442, Nmu, Cpne5, Tac2, Cbln1, Gm5424, Sorcs3 
##     Frzb, Nts, Pdzd2, Rab3c, Mia, Cck, Gm2694, Ebf2, Car8, Rbms3 
##     Grp, Plcb1, Fbn2, Hspb1, Adcy1, Arap2, Pld5, 1500016L03Rik, Ucn3, P2ry1 
## Negative:  mt-Tm, Gm13826, Sema5a, mt-Ti, Gpr101, Pabpc1, Ap1s2, mt-Te, Lmo3, Tomm6 
##     Penk, Grik1, Adra2a, Slc25a4, Junb, Cbln4, Atp2b4, mt-Ta, Rpl13, Rn4.5s-loc5 
##     Rn4.5s-loc1, Kcnk2, Actb, Rn4.5s-loc4, Rn4.5s-loc3, Rn4.5s-loc8, Rn4.5s-loc7, Rn4.5s-loc2, mt-Tl2, mt-Ty 
## PC_ 5 
## Positive:  Cox7c, Cox6b1, Cycs, Mdh1, Atp5k, Prdx5, Atp5g1, Ndufa5, Mt3, Fth1 
##     Slc25a4, Ldhb, Gstp1, Pcp4, Vamp1, Resp18, Cend1, Ggct, Fabp5, Gpr101 
##     Prdx1, Tmsb4x, Cfl2, Slc25a5, Sec11c, Rgs10, Vmp1, Bend6, Cbln4, Ralyl 
## Negative:  Epas1, Sparc, Slco1c1, Pltp, Slco1a4, Ly6c1, Atp1a2, Cldn5, Flt1, Tns1 
##     Ptprb, Tm4sf1, Car4, Igfbp7, Tcf7l2, Cspg4, Rgs5, Mfsd2a, Ppic, Abcb1a 
##     Ptprz1, Slc1a2, 1810041L15Rik, Bcan, Esam, Tril, Gpr116, 9630013A20Rik, Itpr2, AI593442
## An object of class Seurat 
## 24378 features across 1545 samples within 1 assay 
## Active assay: RNA (24378 features, 2000 variable features)
##  2 dimensional reductions calculated: pca, tsne
# Create classifier with scPred using MDA as model
haring %<>%
  getFeatureSpace("standard_names") %>%
  trainModel(model = "mda")
## ●  Extracting feature space for each cell type...
## DONE!
## ●  Training models for each cell type...
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
## Warning: did not converge in 10 iterations
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 70000)
## Warning: did not converge in 10 iterations
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 59100)
## Warning: did not converge in 10 iterations

## Warning: did not converge in 10 iterations

## Warning: did not converge in 10 iterations

## Warning: did not converge in 10 iterations

## Warning: did not converge in 10 iterations

## Warning: did not converge in 10 iterations

## Warning: did not converge in 10 iterations

## Warning: did not converge in 10 iterations

## Warning: did not converge in 10 iterations

## Warning: did not converge in 10 iterations
## DONE!
# Check model performance
get_scpred(haring)
## 'scPred' object
## ✔  Prediction variable = standard_names 
## ✔  Discriminant features per cell type
## ✔  Training model(s)
## Summary
## 
## |Cell type |   n| Features|Method |   ROC|  Sens|  Spec|
## |:---------|---:|--------:|:------|-----:|-----:|-----:|
## |Gaba1     |  18|      100|mda    | 0.893| 0.150| 0.991|
## |Gaba10    |  66|      100|mda    | 0.975| 0.710| 0.982|
## |Gaba11    |  48|      100|mda    | 0.954| 0.667| 0.969|
## |Gaba12    |  56|      100|mda    | 0.996| 0.891| 0.990|
## |Gaba13    |  34|      100|mda    | 0.935| 0.352| 0.980|
## |Gaba14    |  41|      100|mda    | 0.996| 0.975| 0.993|
## |Gaba15    |  11|      100|mda    | 0.988| 0.800| 0.985|
## |Gaba2     |  87|      100|mda    | 0.978| 0.724| 0.971|
## |Gaba3     | 145|      100|mda    | 0.971| 0.779| 0.974|
## |Gaba4     |  11|      100|mda    | 0.912| 0.433| 0.992|
## |Gaba5     |  19|      100|mda    | 0.967| 0.617| 0.984|
## |Gaba6     |  51|      100|mda    | 0.981| 0.847| 0.989|
## |Gaba7     |  39|      100|mda    | 0.985| 0.696| 0.985|
## |Gaba8     |  67|      100|mda    | 0.957| 0.762| 0.976|
## |Gaba9     |  85|      100|mda    | 0.986| 0.812| 0.976|
## |Glut1     |  32|      100|mda    | 0.956| 0.724| 0.989|
## |Glut10    | 129|      100|mda    | 0.946| 0.697| 0.960|
## |Glut11    |  61|      100|mda    | 0.969| 0.588| 0.968|
## |Glut12    |  35|      100|mda    | 0.959| 0.657| 0.981|
## |Glut13    |  38|      100|mda    | 0.938| 0.389| 0.971|
## |Glut14    |  34|      100|mda    | 0.970| 0.586| 0.983|
## |Glut15    |  22|      100|mda    | 0.945| 0.350| 0.985|
## |Glut2     |  34|      100|mda    | 0.997| 0.819| 0.994|
## |Glut3     |  34|      100|mda    | 0.996| 0.914| 0.990|
## |Glut4     |  34|      100|mda    | 0.972| 0.619| 0.986|
## |Glut5     |  24|      100|mda    | 0.968| 0.550| 0.986|
## |Glut6     |  75|      100|mda    | 0.976| 0.787| 0.964|
## |Glut7     | 104|      100|mda    | 0.962| 0.616| 0.967|
## |Glut8     |  77|      100|mda    | 0.922| 0.596| 0.983|
## |Glut9     |  34|      100|mda    | 0.939| 0.595| 0.980|
# Predict labels for FosTRAP neurons
FosTRAP %<>% scPredict(haring, threshold = 0.75)
## ●  Matching reference with new dataset...
##   ─ 2789 features present in reference loadings
##   ─ 2735 features shared between reference and new dataset
##   ─ 98.06% of features in the reference are present in new dataset
## ●  Aligning new data to reference...
## Harmony 1/20
## Harmony 2/20
## Harmony converged after 2 iterations
## ●  Classifying cells...
## DONE!
# Max score distributions, red vertical line (0.55) is the assignment boundary
FosTRAP@meta.data %>%
  ggplot(aes(scpred_max)) +
  geom_histogram(
    bins = 50,
    color = "white",
    fill = "darkblue",
    alpha = 0.7
  ) +
  geom_vline(xintercept = 0.75, color = "red", size = 1) +
  cowplot::theme_cowplot() +
  scale_y_continuous(expand = expansion(mult = c(0, .1))) +
  labs(
    title = "",
    x = "Max prediction score",
    y = "Number of cells"
  )

plot of chunk unnamed-chunk-14

# Discard unassigned neurons and cell types with less than 3 cells
FosTRAP %<>% set_idents("scpred_prediction")

FosTRAP %<>%
  subset(idents = as.character(unique(Idents(FosTRAP))) %>%
    keep(~ .x %not_in% "unassigned"))

clusters_to_keep <-
  FosTRAP@meta.data %>% 
  count(scpred_prediction, sort = TRUE) %>% 
  mutate(keep = n >= 3) %>% 
  select(scpred_prediction, keep)

FosTRAP@meta.data %<>%
  rownames_to_column("cell") %>% 
  left_join(clusters_to_keep, by = "scpred_prediction") %>% 
  column_to_rownames("cell") 

FosTRAP %<>% 
  set_idents("keep") %>% 
  subset(idents = TRUE) %>% 
  set_idents("scpred_prediction")
# scPred score heatmap
FosTRAP@meta.data %>%
  select(
    starts_with("scpred_"),
    -c(scpred_max, scpred_prediction, scpred_no_rejection)
  ) %>%
  rename_with(~ str_remove(.x, "scpred_")) %>%
  t() %>%
  pheatmap::pheatmap(
    cluster_rows = FALSE,
    cluster_cols = TRUE,
    treeheight_col = 0,
    show_colnames = FALSE,
    color = viridisLite::viridis(10),
    fontsize = 12,
    title = ""
  )

plot of chunk unnamed-chunk-16

# Plot UMAP with predicted identites
DimPlot(FosTRAP, label = TRUE, repel = TRUE) 

plot of chunk unnamed-chunk-17

# Plot neurotransmitter type (Glut/Gaba)
(FeaturePlot(FosTRAP, 
             c("Slc17a6", "Slc32a1"), 
             label = TRUE, 
             pt.size = 1, 
             repel = TRUE)) /
  (VlnPlot(FosTRAP, c("Slc17a6", "Slc32a1")))

plot of chunk unnamed-chunk-18

# Plot number of neurons per celltype
FosTRAP@meta.data %>%
  count(scpred_prediction, sort = TRUE) %>%
  mutate(prop = n / sum(n)) %>%
  ggplot(aes(fct_inorder(scpred_prediction), prop)) +
  geom_col(fill = "darkblue", alpha = 0.7) +
  cowplot::theme_cowplot() +
  scale_y_continuous(expand = expansion(mult = c(0, .1))) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(x = "Predicted cell type", y = "Proportion of cells")

plot of chunk unnamed-chunk-19

# Tabulate cell counts per predicted identity
FosTRAP@meta.data %>% 
  count(scpred_prediction, sort = TRUE) %>% 
  knitr::kable()

|scpred_prediction | n| |:-----------------|--:| |Gaba12 | 63| |Glut1 | 54| |Gaba10 | 10| |Glut3 | 9| |Glut2 | 4| |Glut4 | 4|

# Get markers for Häring et al. data
haring_markers <-
  haring %>%
  set_idents("standard_names") %>%
  FindAllMarkers(only.pos = TRUE)
## Calculating cluster Gaba1
## Calculating cluster Gaba10
## Calculating cluster Gaba11
## Calculating cluster Gaba12
## Calculating cluster Gaba13
## Calculating cluster Gaba14
## Calculating cluster Gaba15
## Calculating cluster Gaba2
## Calculating cluster Gaba3
## Calculating cluster Gaba4
## Calculating cluster Gaba5
## Calculating cluster Gaba6
## Calculating cluster Gaba7
## Calculating cluster Gaba8
## Calculating cluster Gaba9
## Calculating cluster Glut1
## Calculating cluster Glut10
## Calculating cluster Glut11
## Calculating cluster Glut12
## Calculating cluster Glut13
## Calculating cluster Glut14
## Calculating cluster Glut15
## Calculating cluster Glut2
## Calculating cluster Glut3
## Calculating cluster Glut4
## Calculating cluster Glut5
## Calculating cluster Glut6
## Calculating cluster Glut7
## Calculating cluster Glut8
## Calculating cluster Glut9
# Get top genes for Häring et al. cell types
top_genes_haring <-
  haring_markers %>%
  filter(cluster %in% unique(FosTRAP@meta.data$scpred_prediction)) %>%
  group_by(cluster) %>%
  slice_min(p_val_adj, n = 5, with_ties = FALSE)
# Re-order FosTRAP data (with assigned IDs) for plotting
order <-
  c(
    "Glut1",
    "Glut2",
    "Glut3",
    "Glut4",
    "Glut6",
    "Glut7",
    "Glut8",
    "Gaba10",
    "Gaba12",
    "Gaba13"
  )

Idents(FosTRAP) <- factor(x = Idents(FosTRAP), levels = order)
# Dotplot of top Häring markers in FosTRAP neurons
FosTRAP %>%
  DotPlot(
    features = unique(c("Slc32a1", "Slc17a6", "Prkcg", top_genes_haring$gene))
  ) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  coord_flip()

plot of chunk unnamed-chunk-24

# Plot Nts and Prkcq in FosTRAP neurons
VlnPlot(FosTRAP, c("Nts", "Prkcg"), pt.size = 0.1)

plot of chunk unnamed-chunk-25

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS:   /sw/apps/R/4.1.1/rackham/lib64/R/lib/libRblas.so
## LAPACK: /sw/apps/R/4.1.1/rackham/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] caret_6.0-90       lattice_0.20-45    scPred_1.9.2       here_1.0.1        
##  [5] fs_1.5.2           patchwork_1.1.1    magrittr_2.0.2     forcats_0.5.1     
##  [9] stringr_1.4.0      dplyr_1.0.8        purrr_0.3.4        readr_2.1.2       
## [13] tidyr_1.2.0        tibble_3.1.6       ggplot2_3.3.5      tidyverse_1.3.1   
## [17] SeuratObject_4.0.4 Seurat_4.1.0      
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.4.1       plyr_1.8.6           
##   [4] igraph_1.2.11         lazyeval_0.2.2        splines_4.1.1        
##   [7] listenv_0.8.0         scattermore_0.8       digest_0.6.29        
##  [10] foreach_1.5.2         htmltools_0.5.2       fansi_1.0.2          
##  [13] tensor_1.5            cluster_2.1.2         ROCR_1.0-11          
##  [16] limma_3.48.3          tzdb_0.2.0            recipes_0.2.0        
##  [19] globals_0.14.0        gower_1.0.0           modelr_0.1.8         
##  [22] matrixStats_0.61.0    R.utils_2.11.0        hardhat_0.2.0        
##  [25] spatstat.sparse_2.1-0 colorspace_2.0-3      rvest_1.0.2          
##  [28] ggrepel_0.9.1         haven_2.4.3           xfun_0.30            
##  [31] crayon_1.5.0          jsonlite_1.8.0        spatstat.data_2.1-2  
##  [34] ape_5.6-1             iterators_1.0.14      survival_3.2-13      
##  [37] zoo_1.8-9             glue_1.6.2            polyclip_1.10-0      
##  [40] gtable_0.3.0          ipred_0.9-12          leiden_0.3.9         
##  [43] R.cache_0.15.0        future.apply_1.8.1    abind_1.4-5          
##  [46] scales_1.1.1          pheatmap_1.0.12       DBI_1.1.2            
##  [49] spatstat.random_2.1-0 miniUI_0.1.1.1        Rcpp_1.0.8           
##  [52] viridisLite_0.4.0     xtable_1.8-4          reticulate_1.24      
##  [55] spatstat.core_2.4-0   stats4_4.1.1          lava_1.6.10          
##  [58] prodlim_2019.11.13    htmlwidgets_1.5.4     httr_1.4.2           
##  [61] RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2            
##  [64] farver_2.1.0          pkgconfig_2.0.3       R.methodsS3_1.8.1    
##  [67] nnet_7.3-16           uwot_0.1.11           dbplyr_2.1.1         
##  [70] deldir_1.0-6          utf8_1.2.2            labeling_0.4.2       
##  [73] tidyselect_1.1.2      rlang_1.0.2           reshape2_1.4.4       
##  [76] later_1.3.0           munsell_0.5.0         cellranger_1.1.0     
##  [79] tools_4.1.1           cli_3.2.0             generics_0.1.2       
##  [82] broom_0.7.12          ggridges_0.5.3        evaluate_0.15        
##  [85] fastmap_1.1.0         goftest_1.2-3         ModelMetrics_1.2.2.2 
##  [88] knitr_1.37            fitdistrplus_1.1-8    RANN_2.6.1           
##  [91] pbapply_1.5-0         future_1.24.0         nlme_3.1-153         
##  [94] mime_0.12             ggrastr_1.0.1         R.oo_1.24.0          
##  [97] xml2_1.3.3            ezknitr_0.6.1         compiler_4.1.1       
## [100] rstudioapi_0.13       beeswarm_0.4.0        plotly_4.10.0        
## [103] png_0.1-7             spatstat.utils_2.3-0  reprex_2.0.1         
## [106] stringi_1.7.6         highr_0.9             RSpectra_0.16-0      
## [109] Matrix_1.3-4          styler_1.7.0.9001     vctrs_0.3.8          
## [112] pillar_1.7.0          lifecycle_1.0.1       spatstat.geom_2.3-2  
## [115] lmtest_0.9-39         RcppAnnoy_0.0.19      data.table_1.14.2    
## [118] cowplot_1.1.1         irlba_2.3.5           httpuv_1.6.5         
## [121] R6_2.5.1              promises_1.2.0.1      KernSmooth_2.23-20   
## [124] gridExtra_2.3         vipor_0.4.5           parallelly_1.30.0    
## [127] codetools_0.2-18      MASS_7.3-54           assertthat_0.2.1     
## [130] rprojroot_2.0.2       withr_2.5.0           sctransform_0.3.3    
## [133] harmony_0.1.0         mgcv_1.8-38           parallel_4.1.1       
## [136] hms_1.1.1             timeDate_3043.102     grid_4.1.1           
## [139] rpart_4.1-15          class_7.3-19          mda_0.5-2            
## [142] Cairo_1.5-14          Rtsne_0.15            pROC_1.18.0          
## [145] shiny_1.7.1           lubridate_1.8.0       ggbeeswarm_0.6.0


jussi-kupari/kumar-et-al-2022 documentation built on June 15, 2022, 9:37 p.m.