knitr::opts_chunk$set(echo = TRUE)

Load the library

library(COSG) 
library(Seurat)

Load the data

Download data from 10x Genomics:

# getwd()
download.file("https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_protein_v3/pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5",
              destfile = "./pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")
# Please install hdf5r to read HDF5 files
pbmc10k_raw<-Read10X_h5('./pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5')
pbmc10k<-CreateSeuratObject(pbmc10k_raw$`Gene Expression`,assay = 'RNA',project = 'COSG',  min.cells = 5, min.features = 200) 
pbmc10k

Quality control

pbmc10k[["percent.mt"]] <- PercentageFeatureSet(pbmc10k, pattern = "^MT-")
# Visualize QC metrics as a violin plot
VlnPlot(pbmc10k, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(pbmc10k, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(pbmc10k, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
pbmc10k <- subset(pbmc10k, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & nCount_RNA < 10000 & nCount_RNA > 500 & percent.mt < 5)
pbmc10k
plot1 <- FeatureScatter(pbmc10k, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(pbmc10k, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2

Normalization

pbmc10k <- NormalizeData(pbmc10k, normalization.method = "LogNormalize", scale.factor = 10000)

Feature selection

pbmc10k <- FindVariableFeatures(pbmc10k, selection.method = "vst", nfeatures = 2000)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(pbmc10k), 10)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(pbmc10k)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot2

Run PCA

pbmc10k <- ScaleData(pbmc10k, features = rownames(pbmc10k))
pbmc10k <- RunPCA(pbmc10k, features = VariableFeatures(object = pbmc10k))

Run Clustering

pbmc10k <- FindNeighbors(pbmc10k, dims = 1:15)
pbmc10k <- FindClusters(pbmc10k
                        # resolution = 0.5
                        )

Run UMAP

pbmc10k <- RunUMAP(pbmc10k, dims = 1:15)
DimPlot(pbmc10k, reduction = "umap")

Run COSG

marker_cosg<-cosg(
  pbmc10k,
  groups='all',
  assay='RNA',
  slot='data',
  mu=1,
  n_genes_user=2000)

Check the marker genes

Check markers:

head(marker_cosg$names)

Check scores:

head(marker_cosg$scores)
top_list<-c()
for (group in colnames(marker_cosg$names)){
    top_i<-marker_cosg$names[group][1:5,1]
    top_list<-c(top_list,top_i)
}
DotPlot(pbmc10k, 
        assay = 'RNA',
        # scale=TRUE,
        features =  unique(top_list)) + RotatedAxis()
DoHeatmap(pbmc10k, 
        assay = 'RNA',
        features =  top_list) 


genecell/COSGR documentation built on Jan. 3, 2023, 10:57 a.m.