## Run this on JHPCE at
## /dcs04/lieber/lcolladotor/with10x_LIBD001/HumanPilot/Analysis/spatialLIBD_files
library("SingleCellExperiment")
library("here")
library("sessioninfo")
## Output directory
dir.create(here(
"Analysis",
"spatialLIBD_files"
), showWarnings = FALSE)
## Load data
load(
here(
"Analysis",
"Human_DLPFC_Visium_processedData_sce_scran.Rdata"
),
verbose = TRUE
)
load(
here(
"Analysis",
"rda_scran",
"clust_k5_list.Rdata"
),
verbose = TRUE
)
load(
here(
"Analysis",
"rda_scran",
"clust_10x_layer_maynard_martinowich.Rdata"
),
verbose = TRUE
)
load(
here(
"Analysis", "Layer_Guesses", "rda",
"layer_guess_tab.Rdata"
),
verbose = TRUE
)
## From the original spatialLIBD code
clust_k5 <- do.call(cbind, clust_k5_list)
colnames(clust_k5) <- paste0("SNN_k50_", colnames(clust_k5))
rownames(clust_k5) <- NULL
colData(sce) <- cbind(colData(sce), clust_k5)
sce$Cluster10X <- sce$Cluster
sce$Maynard <- clust_10x_layer_maynard
sce$Martinowich <- clust_10x_layer_martinowich
sce$Layer <- "NA"
## From https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/Layer_Guesses/layer_specificity.R
sce$layer_guess <- NA
m <- match(sce$key, layer_guess_tab$key)
sce$layer_guess[!is.na(m)] <- layer_guess_tab$layer[m[!is.na(m)]]
sce$layer_guess[which(sce$layer_guess == "Layer 2/3")] <- "Layer 3"
sce$layer_guess <-
factor(gsub(" ", "", sce$layer_guess), levels = c("WM", paste0("Layer", seq_len(6))))
sce$layer_guess_reordered <-
factor(sce$layer_guess, levels = c(paste0("Layer", seq_len(6)), "WM"))
sce$layer_guess_reordered_short <- sce$layer_guess_reordered
levels(sce$layer_guess_reordered_short) <-
gsub("ayer", "", levels(sce$layer_guess_reordered))
## From https://github.com/LieberInstitute/HumanPilot/blob/master/Analysis/Layer_Guesses/misc_numbers.R
ix_mito <- grep("^MT-", rowData(sce)$gene_name)
sce$expr_chrM <- colSums(assays(sce)$counts[ix_mito, ])
sce$expr_chrM_ratio <- sce$expr_chrM / sce$sum_umi
## Manually compare vs the info from the other script
summary(sce$expr_chrM_ratio)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.04853 0.15465 0.18442 0.18554 0.21521 0.44156
## Fix the rowRanges
## From https://github.com/LieberInstitute/HumanPilot/blob/c8a3a31b991081d656ededee59da45aa0494b334/Analysis/Layer_Notebook.R#L78-L87
map <- read.delim(
here(
"/10X/151675/151675_raw_feature_bc_matrix__features.tsv.gz"
),
as.is = TRUE,
header = FALSE,
col.names = c("EnsemblID", "Symbol", "Type")
)
## get GTF, this seems like what they used
gtf <- rtracklayer::import(
"/dcl01/ajaffe/data/lab/singleCell/refdata-cellranger-GRCh38-3.0.0/genes/genes.gtf"
)
gtf <- gtf[gtf$type == "gene"]
names(gtf) <- gtf$gene_id
gtf <- gtf[map$EnsemblID]
seqlevels(gtf)[seq_len(25)] <-
paste0("chr", seqlevels(gtf)[seq_len(25)])
# mcols(gtf) = mcols(gtf)[,c(5:9)]
## Keep the non-empty mcols()
mcols_empty_n <- sapply(mcols(gtf), function(x) {
sum(is.na(x))
})
names(mcols_empty_n[mcols_empty_n == 0])
# [1] "source" "type" "gene_id" "gene_version" "gene_name"
# [6] "gene_source" "gene_biotype"
mcols(gtf) <- mcols(gtf)[, names(mcols_empty_n[mcols_empty_n == 0])]
## Check the order
stopifnot(identical(rownames(sce), names(gtf)))
## Replace the rowRanges info
rowRanges(sce) <- gtf
## To simplify other code later
rowData(sce)$gene_search <-
paste0(rowData(sce)$gene_name, "; ", rowData(sce)$gene_id)
## Double check our selection of chrM genes now that we have the
## correct rowRanges data
stopifnot(identical(which(seqnames(sce) == "chrMT"), ix_mito))
## Add whether the gene is a top highly variable gene (HVG) or not
rowData(sce)$is_top_hvg <- rownames(sce) %in% top.hvgs
## Add more cluster results?
## Likely the ones from Lukas
## initial k-means ones from 10x?
save(
sce,
file = here(
"Analysis",
"spatialLIBD_files",
"Human_DLPFC_Visium_processedData_sce_scran_spatialLIBD.Rdata"
)
)
## Now for the sce_layer object
## note that this re-loads the top.hvgs object
load(here("Analysis", "Layer_Guesses", "rda", "sce_layer.Rdata"),
verbose = TRUE
)
## Fix the rowRanges info
rowRanges(sce_layer) <-
rowRanges(sce)[match(rownames(sce_layer), rownames(sce)), ]
## Save the top HVG (layer-level) info
rowData(sce_layer)$is_top_hvg_sce_layer <-
rownames(sce_layer) %in% top.hvgs
## For the different plots
sce_layer$layer_guess_reordered <-
factor(sce_layer$layer_guess, levels = c(paste0("Layer", seq_len(6)), "WM"))
sce_layer$layer_guess_reordered_short <-
sce_layer$layer_guess_reordered
levels(sce_layer$layer_guess_reordered_short) <-
gsub("ayer", "", levels(sce_layer$layer_guess_reordered))
save(
sce_layer,
file = here(
"Analysis",
"spatialLIBD_files",
"Human_DLPFC_Visium_processedData_sce_scran_sce_layer_spatialLIBD.Rdata"
)
)
## Also the modeling results
load(
here(
"Analysis",
"Layer_Guesses",
"rda",
"modeling_results.Rdata"
),
verbose = TRUE
)
modeling_results <- list(
"anova" = results_anova,
"enrichment" = results_specificity,
"pairwise" = results_pairwise
)
save(
modeling_results,
file = here(
"Analysis",
"spatialLIBD_files",
"Human_DLPFC_Visium_modeling_results.Rdata"
)
)
## List all files
dir(here(
"Analysis",
"spatialLIBD_files"
))
# [1] "Human_DLPFC_Visium_modeling_results.Rdata"
# [2] "Human_DLPFC_Visium_processedData_sce_scran_sce_layer_spatialLIBD.Rdata"
# [3] "Human_DLPFC_Visium_processedData_sce_scran_spatialLIBD.Rdata"
######################################
##### Run locally (not on JHPCE) #####
######################################
usethis::use_directory("data-raw/spatialLIBD_files", ignore = TRUE)
system2(
"scp",
paste0(
"e:/dcs04/lieber/lcolladotor/with10x_LIBD001/HumanPilot/Analysis/spatialLIBD_files/* ",
here::here("data-raw/spatialLIBD_files"),
"/"
)
)
## Update on 2020-02-21 the modeling results only
# system(
# paste0(
# 'scp e:/dcs04/lieber/lcolladotor/with10x_LIBD001/HumanPilot/Analysis/spatialLIBD_files/Human_DLPFC_Visium_modeling_results.Rdata ',
# here::here('data-raw/spatialLIBD_files'),
# '/'
# )
# )
system2("echo", paste(
"data-raw/spatialLIBD_files >>",
here::here(".gitignore")
))
## Add the clusters from Lukas M Weber and Stephanie Hicks
library("SingleCellExperiment")
load(
here::here(
"data-raw",
"spatialLIBD_files",
"Human_DLPFC_Visium_processedData_sce_scran_spatialLIBD.Rdata"
),
verbose = TRUE
)
local_cluster_csvs <-
dir(
"~/Dropbox/code/HumanPilot/outputs/SpatialDE_clustering/",
pattern = "^cluster_labels_.*.csv",
full.names = TRUE
)
csv_file <- local_cluster_csvs[1]
spatial_clus <-
do.call(
rbind,
lapply(local_cluster_csvs, read.csv, stringsAsFactors = FALSE)
)
## It's too big to include as data in the pkg
lobstr::obj_size(spatial_clus) / 1024^2 ## Convert to MB
# 8.01 MB
## Check the order
stopifnot(identical(spatial_clus$key, sce$key))
## Drop the 'key' and 'ground_truth' since we don't need it
spatial_clus <-
spatial_clus[, -which(colnames(spatial_clus) %in% c("key", "ground_truth"))]
## Append
colData(sce) <- cbind(colData(sce), spatial_clus)
## Rename Cluster10X to something else
colnames(colData(sce))[colnames(colData(sce)) == "Cluster10X"] <-
"GraphBased"
## Double check that it all works!
# sce_layer <-
# fetch_data('sce_layer', here::here('data-raw/spatialLIBD_files'))
# modeling_results <-
# fetch_data('modeling_results',
# here::here('data-raw/spatialLIBD_files'))
#
# sig_genes <-
# sig_genes_extract_all(n = nrow(sce_layer),
# modeling_results,
# sce_layer = sce_layer)
#
#
# spatialLIBD::run_app(
# sce = sce,
# sce_layer = sce_layer,
# modeling_results = modeling_results,
# sig_genes = sig_genes,
# sce_discrete_vars = c('GraphBased',
# 'Layer',
# 'Maynard',
# 'Martinowich',
# paste0('SNN_k50_k', 4:28), colnames(spatial_clus))
# )
## It does! =)
paste0("c('", paste(colnames(spatial_clus), collapse = "', '"), "')")
# c('SpatialDE_PCA', 'SpatialDE_pool_PCA', 'HVG_PCA', 'pseudobulk_PCA', 'markers_PCA', 'SpatialDE_UMAP', 'SpatialDE_pool_UMAP', 'HVG_UMAP', 'pseudobulk_UMAP', 'markers_UMAP', 'SpatialDE_PCA_spatial', 'SpatialDE_pool_PCA_spatial', 'HVG_PCA_spatial', 'pseudobulk_PCA_spatial', 'markers_PCA_spatial', 'SpatialDE_UMAP_spatial', 'SpatialDE_pool_UMAP_spatial', 'HVG_UMAP_spatial', 'pseudobulk_UMAP_spatial', 'markers_UMAP_spatial')"
## Overwrite the file
save(
sce,
file = here::here(
"data-raw",
"spatialLIBD_files",
"Human_DLPFC_Visium_processedData_sce_scran_spatialLIBD.Rdata"
)
)
## Reproducibility information
print("Reproducibility information:")
Sys.time()
proc.time()
options(width = 120)
session_info()
# ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
# setting value
# version R version 3.6.1 Patched (2019-10-31 r77350)
# os CentOS Linux 7 (Core)
# system x86_64, linux-gnu
# ui X11
# language (EN)
# collate en_US.UTF-8
# ctype en_US.UTF-8
# tz US/Eastern
# date 2020-02-19
#
# ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
# package * version date lib source
# assertthat 0.2.1 2019-03-21 [2] CRAN (R 3.6.1)
# backports 1.1.5 2019-10-02 [1] CRAN (R 3.6.1)
# Biobase * 2.46.0 2019-10-29 [2] Bioconductor
# BiocGenerics * 0.32.0 2019-10-29 [1] Bioconductor
# BiocParallel * 1.20.1 2019-12-21 [1] Bioconductor
# Biostrings 2.54.0 2019-10-29 [1] Bioconductor
# bitops 1.0-6 2013-08-17 [2] CRAN (R 3.6.1)
# cli 2.0.0 2019-12-09 [1] CRAN (R 3.6.1)
# colorout * 1.2-2 2019-10-31 [1] Github (jalvesaq/colorout@641ed38)
# colorspace 1.4-1 2019-03-18 [2] CRAN (R 3.6.1)
# crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.1)
# DelayedArray * 0.12.2 2020-01-06 [2] Bioconductor
# digest 0.6.23 2019-11-23 [1] CRAN (R 3.6.1)
# fansi 0.4.0 2018-10-05 [1] CRAN (R 3.6.1)
# GenomeInfoDb * 1.22.0 2019-10-29 [1] Bioconductor
# GenomeInfoDbData 1.2.2 2019-10-28 [2] Bioconductor
# GenomicAlignments 1.22.1 2019-11-12 [1] Bioconductor
# GenomicRanges * 1.38.0 2019-10-29 [1] Bioconductor
# ggplot2 3.2.1 2019-08-10 [1] CRAN (R 3.6.1)
# glue 1.3.1 2019-03-12 [1] CRAN (R 3.6.1)
# gtable 0.3.0 2019-03-25 [2] CRAN (R 3.6.1)
# here * 0.1 2017-05-28 [1] CRAN (R 3.6.1)
# htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.1)
# htmlwidgets 1.5.1 2019-10-08 [1] CRAN (R 3.6.1)
# httpuv 1.5.2 2019-09-11 [1] CRAN (R 3.6.1)
# IRanges * 2.20.1 2019-11-20 [1] Bioconductor
# jsonlite 1.6.1 2020-02-02 [2] CRAN (R 3.6.1)
# later 1.0.0 2019-10-04 [1] CRAN (R 3.6.1)
# lattice 0.20-38 2018-11-04 [3] CRAN (R 3.6.1)
# lazyeval 0.2.2 2019-03-15 [2] CRAN (R 3.6.1)
# lifecycle 0.1.0 2019-08-01 [1] CRAN (R 3.6.1)
# magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.1)
# Matrix 1.2-17 2019-03-22 [3] CRAN (R 3.6.1)
# matrixStats * 0.55.0 2019-09-07 [1] CRAN (R 3.6.1)
# munsell 0.5.0 2018-06-12 [2] CRAN (R 3.6.1)
# pillar 1.4.3 2019-12-20 [1] CRAN (R 3.6.1)
# pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.1)
# png 0.1-7 2013-12-03 [2] CRAN (R 3.6.1)
# promises 1.1.0 2019-10-04 [1] CRAN (R 3.6.1)
# R6 2.4.1 2019-11-12 [2] CRAN (R 3.6.1)
# Rcpp 1.0.3 2019-11-08 [1] CRAN (R 3.6.1)
# RCurl 1.98-1.1 2020-01-19 [2] CRAN (R 3.6.1)
# rlang 0.4.2 2019-11-23 [1] CRAN (R 3.6.1)
# rmote * 0.3.4 2019-10-31 [1] Github (cloudyr/rmote@fbce611)
# rprojroot 1.3-2 2018-01-03 [2] CRAN (R 3.6.1)
# Rsamtools 2.2.1 2019-11-06 [1] Bioconductor
# rtracklayer 1.46.0 2019-10-29 [1] Bioconductor
# S4Vectors * 0.24.1 2019-12-01 [1] Bioconductor
# scales 1.1.0 2019-11-18 [2] CRAN (R 3.6.1)
# servr 0.15 2019-08-07 [1] CRAN (R 3.6.1)
# sessioninfo * 1.1.1 2018-11-05 [1] CRAN (R 3.6.1)
# SingleCellExperiment * 1.8.0 2019-10-29 [2] Bioconductor
# SummarizedExperiment * 1.16.1 2019-12-19 [1] Bioconductor
# tibble 2.1.3 2019-06-06 [1] CRAN (R 3.6.1)
# withr 2.1.2 2018-03-15 [2] CRAN (R 3.6.1)
# xfun 0.11 2019-11-12 [1] CRAN (R 3.6.1)
# XML 3.99-0.3 2020-01-20 [2] CRAN (R 3.6.1)
# XVector 0.26.0 2019-10-29 [1] Bioconductor
# zlibbioc 1.32.0 2019-10-29 [2] Bioconductor
#
# [1] /users/lcollado/R/3.6.x
# [2] /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-3.6.x/R/3.6.x/lib64/R/site-library
# [3] /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-3.6.x/R/3.6.x/lib64/R/library
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.