knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(rrrSingleCellUtils)
library(tidyverse)
read_tsv("/home/gdrobertslab/lab/BCLs/R0059/sampleInfoSheet_R0059.txt") %>%
    DT::datatable()

process_raw_data("/home/gdrobertslab/lab/BCLs/R0059/sampleInfoSheet_R0059.txt",
                 email = "matthew.cannon@nationwidechildrens.org")
# Enter password when prompted

Loading in raw data

Single species 3' GEX data

gex_3prime_data <-
    tenx_load_qc(path_10x = "/home/gdrobertslab/lab/Counts/S0130/filtered_feature_bc_matrix")

gex_3prime_data

Dual species 3' GEX data, keeping both species

gex_3prime_data_2_sp <-
    tenx_load_qc(path_10x = "/home/gdrobertslab/lab/Counts/S0148/filtered_feature_bc_matrix",
                 mt_pattern = "^hg19-MT-|^mm10-mt-")

gex_3prime_data_2_sp

Dual species 3' GEX data, keeping both species, but loading from h5 file

gex_3prime_data_2_sp_h5 <-
    tenx_load_qc(h5_file = "/home/gdrobertslab/lab/Counts/S0148/filtered_feature_bc_matrix.h5",
                 mt_pattern = "^hg19-MT-|^mm10-mt-")

gex_3prime_data_2_sp_h5

Dual species 3' GEX data, keeping only human

gex_3prime_data_human <-
    tenx_load_qc(path_10x = "/home/gdrobertslab/lab/Counts/S0148/filtered_feature_bc_matrix",
                 species_pattern = "^hg19-")

gex_3prime_data_human

Multiomics GEX data with two species

multiomics_gex_data <-
    tenx_load_qc(path_10x = "/home/gdrobertslab/lab/Counts/S0150/filtered_feature_bc_matrix",
                 mt_pattern = "^hg19-MT-|^mm10-mt-")

multiomics_gex_data

Multiomics ATAC data with two species

multiomics_atac_data <-
    tenx_load_qc(h5_file = "/home/gdrobertslab/lab/Counts/S0150/filtered_feature_bc_matrix.h5",
                 mt_pattern = "^hg19-MT-|^mm10-mt-",
                 exp_type = "ATAC")
    multiomics_atac_data

Multiomics data both GEX and ATAC with two species

multiomics_both_data <-
    tenx_load_qc(h5_file = "/home/gdrobertslab/lab/Counts/S0150/filtered_feature_bc_matrix.h5",
                 mt_pattern = "^hg19-MT-|^mm10-mt-",
                 exp_type = "GEX+ATAC")

multiomics_both_data

Processing a Seurat object

Run the standard processing steps in a single command

pbmc_fixed <-
    process_seurat(SeuratObject::pbmc_small,
                   run_umap_dims = 1:10,
                   resolution = 0.8)

Seurat::DimPlot(pbmc_fixed)

Optimizing the clustering resolution

test <- qs::qread("/home/gdrobertslab/lab/SeuratObj/S0090.qs")

df_of_sil_scores <-
    optimize_silhouette(test)

Optimize the clustering resolution on harmony embeddings

test <-
    qs::qread("/home/gdrobertslab/lab/SeuratObj/S0090.qs") %>%
    harmony::RunHarmony(group.by.vars = "seurat_clusters") %>%
    Seurat::RunUMAP(reduction = "harmony",
                    dims = 1:30)

DimPlot(test)

df_of_sil_scores <-
    optimize_silhouette(test,
                        reduction = "harmony")

Using SNPs to define tumor clusters

two_sobj <- qs::qread("/home/gdrobertslab/mvc002/analyses/roberts/dev/testSnps/output/patient_met_1/two_sobj.qs")

# Do cell type annotation so I can use macrophages and monocytes as known normal cells
# The idea is that Osteosarcoma cells are unlikely to be mistakenly annotated as macrophages or monocytes
hpca <- celldex::HumanPrimaryCellAtlasData()
imm_cells <- celldex::MonacoImmuneData()
blueprint <- celldex::BlueprintEncodeData()

cell_assign <-
    SingleR::SingleR(as.SingleCellExperiment(two_sobj),
                     ref = list(hpca,
                                imm_cells,
                                blueprint),
                     labels = list(hpca$label.main,
                                   imm_cells$label.main,
                                   blueprint$label.main))

two_sobj$cell_type <-
    cell_assign$labels

two_sobj$cell_score <-
    cell_assign$scores %>%
    apply(MARGIN = 1, function(x) max(x, na.rm = TRUE))

control_celltypes <- c("Monocytes", "Macrophages")

# Figure out which clusters are more than 50% control celltypes
normal_clusters <-
    match_celltype_clusters(sobject = two_sobj,
                            normal_celltypes = control_celltypes,
                            cluster_col = "used_clusters",
                            celltype_col = "cell_type")

c_b_t <-
    two_sobj@meta.data %>%
    select(used_clusters) %>%
    dplyr::rename(cell_group = used_clusters) %>%
    rownames_to_column("cell_barcode") %>%
    as_tibble() %>%
    mutate(bam_file = paste0("/home/gdrobertslab/lab/Counts/",
                             str_remove(cell_barcode, "_.*"),
                             "/outs/possorted_genome_bam.bam"),
           cell_barcode = str_remove(cell_barcode, ".+_"))

snp_tree <-
    get_snp_tree(cellid_bam_table = c_b_t,
                 ploidy = "GRCh37",
                 ref_fasta = "/home/gdrobertslab/lab/GenRef/10x-human/fasta/genome.fa",
                 min_depth = 5,
                 temp_dir = "/gpfs0/scratch/mvc002/test_two_sobj",
                 sbatch_base = "two_sobj",
                 slurm_base = "/gpfs0/scratch/mvc002/test_two_sobj/two_sobj",
                 min_sites_covered = 1000,
                 cleanup = FALSE)

png("output/figures/testTwoSobjOutput1.png",
    width = 2500,
    height = 2500,
    res = 300)
plot(snp_tree)
dev.off()

cut_n_groups <- 2

new_two_sobj <-
    label_tree_groups(sobject = two_sobj,
                      dist_tree = snp_tree,
                      group_col_name = "used_clusters",
                      normal_groups = normal_clusters,
                      cut_n_groups = cut_n_groups)

png("testTwoSobjOutput2.png",
    width = 7000,
    height = 2500,
    res = 300)
DimPlot(new_two_sobj,
        group.by = c("snp_tumor_call",
                     "cell_type"),
        label = TRUE,
        repel = TRUE) +
    NoLegend()
dev.off()


kidcancerlab/rrrSingleCellUtils documentation built on April 17, 2025, 5:10 p.m.