remotes::install_github("mojaveazure/seurat-disk")
library(SeuratDisk)
library(Seurat)
library(tidyverse)
library(plyr)

Convert("~/Dropbox/Columbia/TabulaSapiens.h5ad", dest = "h5seurat", overwrite = TRUE)

TS_rna <- LoadH5Seurat("~/Dropbox/Columbia/TabulaSapiens.h5seurat", assays = "RNA")
save(list=c("TS_rna"), file = "~/TabulaSapiens_Seurat.RData")
TS_rna@meta.data$CleanedAnnotation = TS_rna@meta.data$free_annotation
TS_rna@meta.data$CleanedAnnotation = gsub("cells", "cell", gsub("Cells", "cell", gsub("_", " ",  gsub("-Positive", "+", gsub("-positive", "+", gsub(" Positive", "+", gsub(" positive", "+", gsub(",", "", TS_rna@meta.data$CleanedAnnotation))))))))
TS_rna@meta.data$CleanedAnnotation = str_to_title(TS_rna@meta.data$CleanedAnnotation)
TS_rna@meta.data$CleanedAnnotation = gsub("Cd", "CD", TS_rna@meta.data$CleanedAnnotation)
TS_rna@meta.data$Comb = paste(TS_rna@meta.data$organ_tissue, TS_rna@meta.data$CleanedAnnotation, sep = " - ")
AllTSAnnot = as.data.frame(table(TS_rna@meta.data$Comb))
AllTSAnnot2 = as.data.frame(table(TS_rna@meta.data$CleanedAnnotation))

TS_rna@meta.data$CleanedAnnotation = gsub("Alpha-Beta T Cell", "Alpha Beta T Cell", 
                                     gsub("^Artery Cell (Endothelial Cells)$", "Arterial Endothelial Cell", 
                                     gsub("^Artery Endothelial Cell$", "Arterial Endothelial Cell", 
                                     gsub("^B Lymphocytes$", "B Cell", 
                                     gsub("^Basal$", "Basal Cell", 
                                     gsub("^Basal Cell Confirmed$", "Basal Cell", 
                                     gsub("^Basal Epithelial Cell$", "Basal Cell", 
                                     gsub("^Becs", "Cholangiocytes",
                                     gsub("^Bladder Urothelial Cell$", "Urothelial Cell", 
                                     gsub("^Capillary Endothelial Confirmed$", "Capillary Endothelial Cell", 
                                     gsub("CD1c", "CD1C", 
                                     gsub("^CD24 Neutrophil$", "CD24+ Neutrophil", 
                                     gsub("^CD4 CD8 T Cell$", "CD4+/CD8+ T Cell", 
                                     gsub("^CD4 Helper T Cell$", "CD4+ Helper T Cell", 
                                     gsub("^CD4 Helper T Cell-Like$", "CD4+ Helper T Cell-Like", 
                                     gsub("^CD4 T Cell", "CD4+ T Cell", 
                                     gsub("^CD8 T Cell", "CD8+ T Cell", 
                                     gsub("^CD8+ T-Cell", "CD8+ T Cell$", 
                                     gsub("CD8b", "CD8B", 
                                     gsub("Cpe ", "CPE-", 
                                     gsub("Areg ", "AREG-", 
                                     gsub("^Ductal$", "Ductal Cell", 
                                     gsub("^Endothelial$", "Endothelial Cell", 
                                     gsub(" Confirmed", "", 
                                     gsub("^Endothelium$", "Endothelial Cell", 
                                     gsub("^Enodthelial Cell$", "Endothelial Cell", 
                                     gsub("^Erythrocytes$", "Erythrocyte", 
                                     gsub("^Erythroid Cell$", "Erythrocytes", 
                                     gsub("^Erythroid Progenitor$", "Erythroid Progenitor Cell", 
                                     gsub("^Fibroblasts (Lipofibroblasts)$", "Fibroblast", 
                                     gsub("^Fibroblasts$", "Fibroblast", 
                                     gsub("^Ionocytes$", "Ionocyte", 
                                     gsub("Ltf", "LTF", 
                                     gsub("Lung ", "", 
                                     gsub("^Lymphatic Endothelium$", "Lymphatic Endothelial Cell", 
                                     gsub("^Macrophage$", "Macrophages", 
                                     gsub("Nk", "NK", 
                                     gsub("^Melanocytes Or Limbal Stem Cell$", "Melanocytes", 
                                     gsub("^Monocytes$", "Monocyte", 
                                     gsub("^Monocyte/Macrophage$", "Monocyte", 
                                     gsub("\\(.*", "", 
                                     gsub("^Myofibroblasts$", "Myofibroblast Cell", TS_rna@meta.data$CleanedAnnotation))))))))))))))))))))))))))))))))))))))))))

TS_rna@meta.data$CleanedAnnotation = gsub("^Nampt Neutrophil$", "NAMPT+ Neutrophil", 
                                     gsub("^Natural Killar Cell$", "NK Cell", 
                                     gsub("^Natural Killer Cell$", "NK Cell", 
                                     gsub("^Natural Killer T-Cell$", "NK Cell", 
                                     gsub("Nkt", "NK", 
                                     gsub("^Other Epithelial Cell$", "Epithelial Cell", 
                                     gsub("Pancreatic ", "", 
                                     gsub("^Pericyte Cell$", "Pericyte", 
                                     gsub("^Pericyte Cell Confirmed$", "Pericyte", 
                                     gsub("^Pericytes$", "Pericyte", 
                                     gsub("^Perivascular$", "Pericyte", 
                                     gsub("^Pigr High Club Epithelial Cell$", "PIGR-High Club Epithelial Cell", 
                                     gsub("^Plasma$", "Plasma Cell", 
                                     gsub("^Plasmablast$", "Plasma Cell", 
                                     gsub("^Scgb3a1 High Club Epithelial Cell$", "SCGB3A1-High Club Epithelial Cell", 
                                     gsub("^Stromal$", "Stromal Cell", 
                                     gsub("^T-Cell$", "T Cell", 
                                     gsub("Tongue ", "", 
                                     gsub(" Of Small Intestine", "", 
                                     gsub(" Of Large Intestine", "", 
                                     gsub(" Of Trachea", "", 
                                     gsub("^Type Ii Pneumocyte$", "Type II Pneumocyte", 
                                     gsub("Vascular ", "", 
                                     gsub("Vasuclar ", "", 
                                     gsub("Vein Cell ", "Venous Endothelial Cell", 
                                     gsub("^Vein Endothelial Cell$", "Venous Endothelial Cell", 
                                     gsub("Intestinal " , "", 
                                     gsub("Tracheal ", "", 
                                     gsub("Uterine ", "", 
                                     gsub("Acing", "Acinar", 
                                     gsub("Pp Cell", "Gamma Cell", 
                                     gsub("^Capillary Endothelial$", "Capillary Endothelial Cell", 
                                     gsub(" Of Epithelium Of Large Intestine", "", 
                                     gsub("Of Epithelium Of Small Intestine", "", 
                                     gsub("^Cardiac Fibroblast$", "Fibroblast", 
                                     gsub("T-Cell", "T Cell", 
                                     gsub("Natural Killer", "NK", 
                                     gsub(" Of Epithelium.*", "", 
                                     gsub("^Vascular Smooth Muscle$", "Smooth Muscle Cell", 
                                     gsub("^Endothelial Cell Of Artery$", "Arterial Endothelial Cell", 
                                     gsub("Endothelial Cell Of Lymphatic Vessel", "Lymphatic Endothelial Cell", 
                                     gsub("Endothelial Cell Of Vascular Tree", "Vascular Tree Endothelial Cell", 
                                     gsub("^T Lymphocytes$", "T Cell", 
                                     gsub("Associated ", "", 
                                     gsub("Conventional ", "", 
                                     gsub("NKt", "NK", 
                                     gsub("Naive ", "XXNaive",      
                                     gsub("Kidney ", "", TS_rna@meta.data$CleanedAnnotation))))))))))))))))))))))))))))))))))))))))))))))))

TS_rna@meta.data$CleanedAnnotation = gsub("^Na.*", "XXNaive Regulatory T Cell", TS_rna@meta.data$CleanedAnnotation)
TS_rna@meta.data$CleanedAnnotation = gsub("XXNa", "Na", TS_rna@meta.data$CleanedAnnotation)
TS_rna@meta.data$CleanedAnnotation = gsub("NaiveB Cell", "Naive B Cell", gsub("NaiveCD", "Naive CD", gsub("NaiveThymus", "Naive Thymus", TS_rna@meta.data$CleanedAnnotation)))




TS_rna@meta.data$Comb = paste(TS_rna@meta.data$organ_tissue, TS_rna@meta.data$CleanedAnnotation, sep = " - ")
AllTSAnnot = as.data.frame(table(TS_rna@meta.data$Comb))
write.csv(AllTSAnnot, "AllTSAnnot3.csv")

AllTSAnnot2 = as.data.frame(table(TS_rna@meta.data$CleanedAnnotation))
write.csv(AllTSAnnot2, "AllTSAnnot4.csv")

Idents(TS_rna) = "CleanedAnnotation"
TS_Clean = subset(TS_rna, idents = c("T Cell-Low Rna", "CD4+ T Cell-Low Rna", "Epithelium Of Lowquality Tsp7 Anterior", "Immune Notvalidated", "Not-Existing"), invert=T)
save(list=c("TS_Clean"), file = "~/TabulaSapiens_Seurat_CLEANED.RData")


Hannahglover/Glowworm documentation built on Jan. 16, 2024, 11:47 p.m.