inst/scripts/make-metadata.R

# ---------------------------
# Create metadata spreadsheet
# ---------------------------

# metadata for all datasets

df_all <- data.frame(
  Genome = NA, 
  SourceType = "FASTQ", 
  SourceVersion = NA, 
  Coordinate_1_based = NA, 
  DataProvider = NA, 
  Maintainer = "Lukas M. Weber <lmweberedu@gmail.com>", 
  stringsAsFactors = FALSE
)


# metadata for individual datasets

df_Visium_humanDLPFC <- cbind(
  df_all, 
  Title = "Visium_humanDLPFC", 
  Description = paste0(
    "A single sample (sample 151673) of human brain dorsolateral prefrontal ", 
    "cortex (DLPFC) in the human brain, measured using the 10x Genomics Visium ", 
    "platform. This is a subset of the full dataset containing 12 samples from ", 
    "3 neurotypical donors, published by Maynard and Collado-Torres et al. ", 
    "(2021). The full dataset is available from the spatialLIBD Bioconductor ", 
    "package."), 
  SourceUrl = "http://spatial.libd.org/spatialLIBD/", 
  Species = "Homo sapiens", 
  TaxonomyId = "9606", 
  RDataPath = "STexampleData/3_19/Visium_humanDLPFC.rds", 
  BiocVersion = "3.15", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_Visium_mouseCoronal <- cbind(
  df_all, 
  Title = "Visium_mouseCoronal", 
  Description = paste0(
    "A single coronal section from the mouse brain spanning one hemisphere, ", 
    "measured using the 10x Genomics Visium platform. This dataset was publicly ", 
    "released by 10x Genomics."), 
  SourceUrl = "https://support.10xgenomics.com/spatial-gene-expression/datasets", 
  Species = "Mus musculus", 
  TaxonomyId = "10090", 
  RDataPath = "STexampleData/3_19/Visium_mouseCoronal.rds", 
  BiocVersion = "3.15", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_seqFISH_mouseEmbryo <- cbind(
  df_all, 
  Title = "seqFISH_mouseEmbryo", 
  Description = paste0(
    "A subset of cells (embryo 1, z-slice 2) from a dataset investigating mouse ", 
    "embryogenesis by Lohoff and Ghazanfar et al. (2022), generated using the ", 
    "seqFISH platform. The full dataset is available from the original ", 
    "publication."), 
  SourceUrl = "https://marionilab.cruk.cam.ac.uk/SpatialMouseAtlas/", 
  Species = "Mus musculus", 
  TaxonomyId = "10090", 
  RDataPath = "STexampleData/3_19/seqFISH_mouseEmbryo.rds", 
  BiocVersion = "3.15", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_ST_mouseOB <- cbind(
  df_all, 
  Title = "ST_mouseOB", 
  Description = paste0(
    "A single sample from the mouse brain olfactory bulb (OB) measured with ", 
    "the Spatial Transcriptomics platform, published by Stahl et al. (2016). ", 
    "This dataset contains annotations for five cell layers from the original ", 
    "authors."), 
  SourceUrl = "https://www.science.org/doi/10.1126/science.aaf2403", 
  Species = "Mus musculus", 
  TaxonomyId = "10090", 
  RDataPath = "STexampleData/3_19/ST_mouseOB.rds", 
  BiocVersion = "3.15", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_SlideSeqV2_mouseHPC <- cbind(
  df_all, 
  Title = "SlideSeqV2_mouseHPC", 
  Description = paste0(
    "A single sample of mouse brain from the hippocampus (HPC) and ", 
    "surrounding regions, measured with the Slide-seqV2 platform by Stickels ", 
    "et al. (2021). This dataset contains cell type annotations generated by ", 
    "Cable et al. (2022)."), 
  SourceUrl = "https://www.nature.com/articles/s41587-020-0739-1", 
  Species = "Mus musculus", 
  TaxonomyId = "10090", 
  RDataPath = "STexampleData/3_19/SlideSeqV2_mouseHPC.rds", 
  BiocVersion = "3.15", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_Janesick_breastCancer_Chromium <- cbind(
  df_all, 
  Title = "Janesick_breastCancer_Chromium", 
  Description =  paste0(
    "10x Genomics Chromium single-cell RNA sequencing data from human breast ", 
    "cancer dataset by Janesick et al. (2023). ", 
    "High resolution mapping of the breast cancer tumor microenvironment using ", 
    "integrated single-cell, spatial, and in situ analysis of FFPE tissue. ", 
    "Contains annotations for cell type from the original authors."), 
  SourceUrl = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM7782698", 
  Species = "Homo sapiens", 
  TaxonomyId = "9606", 
  RDataPath = "STexampleData/3_19/Janesick_breastCancer_Chromium.rds", 
  BiocVersion = "3.19", 
  RDataClass = "SingleCellExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_Janesick_breastCancer_Visium <- cbind(
  df_all, 
  Title = "Janesick_breastCancer_Visium", 
  Description =  paste0(
    "10x Genomics Visium spatial transcriptomics data from human breast ", 
    "cancer dataset by Janesick et al. (2023). ", 
    "High resolution mapping of the breast cancer tumor microenvironment using ", 
    "integrated single-cell, spatial, and in situ analysis of FFPE tissue."), 
  SourceUrl = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM7782699", 
  Species = "Homo sapiens", 
  TaxonomyId = "9606", 
  RDataPath = "STexampleData/3_19/Janesick_breastCancer_Visium.rds", 
  BiocVersion = "3.19", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_Janesick_breastCancer_Xenium_rep1 <- cbind(
  df_all, 
  Title = "Janesick_breastCancer_Xenium_rep1", 
  Description = paste0(
    "10x Genomics Xenium in situ spatial data (sample 1, replicate 1) from ", 
    "human breast cancer dataset by Janesick et al. (2023). ", 
    "High resolution mapping of the breast cancer tumor microenvironment using ", 
    "integrated single-cell, spatial, and in situ analysis of FFPE tissue."), 
  SourceUrl = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM7780153", 
  Species = "Homo sapiens", 
  TaxonomyId = "9606", 
  RDataPath = "STexampleData/3_19/Janesick_breastCancer_Xenium_rep1.rds", 
  BiocVersion = "3.19", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_Janesick_breastCancer_Xenium_rep2 <- cbind(
  df_all, 
  Title = "Janesick_breastCancer_Xenium_rep2", 
  Description =  paste0(
    "10x Genomics Xenium in situ spatial data (sample 1, replicate 2) from ", 
    "human breast cancer dataset by Janesick et al. (2023). ", 
    "High resolution mapping of the breast cancer tumor microenvironment using ", 
    "integrated single-cell, spatial, and in situ analysis of FFPE tissue."), 
  SourceUrl = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM7780154", 
  Species = "Homo sapiens", 
  TaxonomyId = "9606", 
  RDataPath = "STexampleData/3_19/Janesick_breastCancer_Xenium_rep2.rds", 
  BiocVersion = "3.19", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_CosMx_lungCancer <- cbind(
  df_all, 
  Title = "CosMx_lungCancer", 
  Description =  paste0(
    "NanoString CosMx human non-small cell lung cancer (NSCLC) dataset. ", 
    "Contains data from one sample (patient 9, slice 1). This dataset was ", 
    "previously released by NanoString on their website."), 
  SourceUrl = "https://nanostring.com/resources/smi-ffpe-dataset-lung9-rep1-data/", 
  Species = "Homo sapiens", 
  TaxonomyId = "9606", 
  RDataPath = "STexampleData/3_19/CosMx_lungCancer.rds", 
  BiocVersion = "3.19", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_MERSCOPE_ovarianCancer <- cbind(
  df_all, 
  Title = "MERSCOPE_ovarianCancer", 
  Description =  paste0(
    "Vizgen MERSCOPE human ovarian cancer dataset. ", 
    "Contains data from one sample (patient 2, sample 1). This dataset was ", 
    "previously released by Vizgen on their website."), 
  SourceUrl = "https://console.cloud.google.com/storage/browser/vz-ffpe-showcase/HumanOvarianCancerPatient2Slice1;tab=objects?prefix=&forceOnObjectsSortingFiltering=false", 
  Species = "Homo sapiens", 
  TaxonomyId = "9606", 
  RDataPath = "STexampleData/3_19/MERSCOPE_ovarianCancer.rds", 
  BiocVersion = "3.19", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

df_STARmapPLUS_mouseBrain <- cbind(
  df_all, 
  Title = "STARmapPLUS_mouseBrain", 
  Description =  paste0(
    "STARmap PLUS mouse brain data by Shi et al. (2023). ", 
    "Contains data from one sample (well 05), including annotations for ", 
    "cell type and tissue regions from the original authors."), 
  SourceUrl = "https://zenodo.org/records/8327576", 
  Species = "Mus musculus", 
  TaxonomyId = "10090", 
  RDataPath = "STexampleData/3_19/STARmapPLUS_mouseBrain.rds", 
  BiocVersion = "3.19", 
  RDataClass = "SpatialExperiment", 
  DispatchClass = "Rds", 
  stringsAsFactors = FALSE
)

# combine and save as .csv spreadsheet file

df_combined <- rbind(
  df_Visium_humanDLPFC, 
  df_Visium_mouseCoronal, 
  df_seqFISH_mouseEmbryo, 
  df_ST_mouseOB, 
  df_SlideSeqV2_mouseHPC, 
  df_Janesick_breastCancer_Chromium, 
  df_Janesick_breastCancer_Visium, 
  df_Janesick_breastCancer_Xenium_rep1, 
  df_Janesick_breastCancer_Xenium_rep2, 
  df_CosMx_lungCancer, 
  df_MERSCOPE_ovarianCancer, 
  df_STARmapPLUS_mouseBrain
)

write.csv(df_combined, file = "../extdata/metadata.csv", row.names = FALSE)
lmweber/STdata documentation built on May 19, 2024, 6:39 p.m.