inst/scripts/make-metadata.R

####---- EXPERIMENTHUB METADATA TABLE ----####

meta <- list(
    data.frame(
        Title = "specht2019v2",
        Description = paste0("SCP expression data for monocytes (U-937) and macrophages ",
                             "at PSM, peptide and protein level"),
        BiocVersion = "3.15",
        Genome = NA_character_,
        SourceType = "CSV",
        SourceUrl = "https://drive.google.com/drive/folders/1Zhjik_JFjCQNIVjg63-fooJ4K0HZxWjV",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "GoogleDrive",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/specht2019v2.Rda",
        PublicationDate = as.Date("2019/12/5"),
        NumberAssays = 179,
        PreprocessingSoftware = "MaxQuant,DART-ID",
        LabelingProtocol = "TMT-11,TMT-16",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "specht2019v3",
        Description = paste0("SCP expression data for more monocytes (U-937) and ",
                             "macrophages at PSM, peptide and protein level"),
        BiocVersion = "3.15",
        Genome = NA_character_,
        SourceType = "CSV",
        SourceUrl = "https://drive.google.com/drive/folders/1Zhjik_JFjCQNIVjg63-fooJ4K0HZxWjV",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "GoogleDrive",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/specht2019v3.Rda",
        PublicationDate = as.Date("2020/10/04"),
        NumberAssays = 179,
        PreprocessingSoftware = "MaxQuant,DART-ID",
        LabelingProtocol = "TMT-11,TMT-16",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "dou2019_lysates",
        Description = paste0("SCP expression data for Hela digests (0.2 or 10 ng) at ",
                             "PSM and protein level"),
        BiocVersion = "3.14",
        Genome = NA_character_,
        SourceType = "XLS/XLSX",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000084110/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/dou2019_lysates.Rda",
        PublicationDate = as.Date("2019/10/15"),
        NumberAssays = 4,
        PreprocessingSoftware = "MS-GF+,MASIC",
        LabelingProtocol = "TMT-10",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = FALSE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "dou2019_mouse",
        Description = paste0("SCP expression data for C10, SVEC or Raw cells at ",
                             "PSM and protein level"),
        BiocVersion = "3.14",
        Genome = NA_character_,
        SourceType = "XLS/XLSX",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000084110/",
        SourceVersion = NA_character_,
        Species = "Mus musculus",
        TaxonomyId = 10090,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/dou2019_mouse.Rda",
        PublicationDate = as.Date("2019/10/15"),
        NumberAssays = 14,
        PreprocessingSoftware = "MS-GF+,MASIC",
        LabelingProtocol = "TMT-10",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "dou2019_boosting",
        Description = paste0("SCP expression data for C10, SVEC or Raw cells ",
                             "and 3 boosters (0, 5 or 50 ng) at PSM and protein level"),
        BiocVersion = "3.14",
        Genome = NA_character_,
        SourceType = "XLS/XLSX",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000084110/",
        SourceVersion = NA_character_,
        Species = "Mus musculus",
        TaxonomyId = 10090,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/dou2019_boosting.Rda",
        PublicationDate = as.Date("2019/10/15"),
        NumberAssays = 8,
        PreprocessingSoftware = "MS-GF+,MASIC",
        LabelingProtocol = "TMT-10",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "zhu2018MCP",
        Description = paste0("Near SCP expression data for micro-dissected rat ",
                             "brain samples (50, 100, or 200 um width) at PSM level"),
        BiocVersion = "3.15",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2018/07/PXD008844",
        SourceVersion = NA_character_,
        Species = "Rattus norvegicus",
        TaxonomyId = 10116,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/zhu2018MCP.Rda",
        PublicationDate = as.Date("2018/09/01"),
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        NumberAssays = 4,
        PsmsAvailable = FALSE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = FALSE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "zhu2018NC_hela",
        Description = paste0("Near SCP expression data for HeLa samples (aproximately ",
                             "12, 40, or 140 cells) at PSM level"),
        BiocVersion = "3.13",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2018/01/PXD006847",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/zhu2018NC_hela.Rda",
        PublicationDate = as.Date("2018/02/28"),
        NumberAssays = 4,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        PsmsAvailable = FALSE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = FALSE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "zhu2018NC_lysates",
        Description = paste0("Near SCP expression data for HeLa lysates (10, 40 and ",
                             "140 cell equivalent) at PSM level"),
        BiocVersion = "3.13",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2018/01/PXD006847",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/zhu2018NC_lysates.Rda",
        PublicationDate = as.Date("2018/02/28"),
        NumberAssays = 4,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        PsmsAvailable = FALSE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = FALSE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "zhu2018NC_islets",
        Description = paste0("Near SCP expression data for micro-dissected human ",
                             "pancreas samples (control patients or type 1 ",
                             "diabetes) at PSM level"),
        BiocVersion = "3.13",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2018/01/PXD006847",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/zhu2018NC_islets.Rda",
        PublicationDate = as.Date("2018/02/28"),
        NumberAssays = 4,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        PsmsAvailable = FALSE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = FALSE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "cong2020AC",
        Description = paste0("SCP expression data for Hela cells at PSM, peptide ",
                             "and protein level"),
        BiocVersion = "3.13",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2020/02/PXD016921",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/cong2020AC.Rda",
        PublicationDate = as.Date("2020/01/02"),
        NumberAssays = 9,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "zhu2019EL",
        Description = paste0("SCP expression data for chicken utricle samples (1, ",
                             "3, 5 or 20 cells) at PSM, peptide and protein level"),
        BiocVersion = "3.15",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2019/11/PXD014256",
        SourceVersion = NA_character_,
        Species = "Gallus gallus",
        TaxonomyId = 9031,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/zhu2019EL.Rda",
        PublicationDate = as.Date("2019/11/04"),
        NumberAssays = 63,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "liang2020_hela",
        Description = paste0("Expression data for HeLa cells (0, 1, ",
                             "10, 150, 500 cells) at PSM, peptide and ",
                             "protein level"),
        BiocVersion = "3.13",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2020/12/PXD021882/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/liang2020_hela.Rda",
        PublicationDate = as.Date("2020/12/22"),
        NumberAssays = 17,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "schoof2021",
        Description = paste0("Single-cell proteomics data from ",
                             "OCI-AML8227 cell culture to reconstruct ",
                             "the cellular hierarchy."),
        BiocVersion = "3.14",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "http://ftp.pride.ebi.ac.uk/pride/data/archive/2021/05/PXD020586/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/schoof2021.Rda",
        PublicationDate = as.Date("2021/06/07"),
        NumberAssays = 194,
        PreprocessingSoftware = "ProteomeDiscoverer",
        LabelingProtocol = "TMT-16",
        PsmsAvailable = TRUE,
        PeptidesAvailable = FALSE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "williams2020_lfq",
        Description = paste0("Single-cell label free proteomics data ",
                             "from a MCF10A cell line culture."),
        BiocVersion = "3.15",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000085230/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/williams2020_lfq.Rda",
        PublicationDate = as.Date("2020/08/04"),
        NumberAssays = 9,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        PsmsAvailable = FALSE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "williams2020_tmt",
        Description = paste0("Single-cell proteomics data ",
                             "from three acute myeloid leukemia cell ",
                             "line culture (MOLM-14, K562, CMK)."),
        BiocVersion = "3.15",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000085230/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/williams2020_tmt.Rda",
        PublicationDate = as.Date("2020/08/04"),
        NumberAssays = 4,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "TMT-11",
        PsmsAvailable = FALSE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "derks2022",
        Description = paste0("Single-cell and bulk (100-cell) ",
                             "proteomics data of PDAC, melanoma cells ",
                             "and monocytes."),
        BiocVersion = "3.16",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000089093/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/derks2022.Rda",
        PublicationDate = as.Date("2022/07/14"),
        NumberAssays = 66,
        PreprocessingSoftware = "DIA-NN",
        LabelingProtocol = "mTRAQ",
        PsmsAvailable = TRUE,
        PeptidesAvailable = FALSE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "brunner2022",
        Description = paste0("Single-cell proteomics data of cell ",
                             "cycle stages in HeLa."),
        BiocVersion = "3.16",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "https://www.ebi.ac.uk/pride/archive/projects/PXD024043",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/brunner2022.Rda",
        PublicationDate = as.Date("2022/02/08"),
        NumberAssays = 435,
        PreprocessingSoftware = "DIA-NN",
        LabelingProtocol = "LFQ",
        PsmsAvailable = TRUE,
        PeptidesAvailable = FALSE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "leduc2022_pSCoPE",
        Description = paste0("Single-cell proteomics data ",
                             "of 878 melanoma cells and 877 ",
                             "monocytes (pSCoPE)."),
        BiocVersion = "3.19",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000089159/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/leduc2022_pSCoPE.Rda",
        PublicationDate = as.Date("2022/03/30"),
        NumberAssays = 138,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "TMT-18",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "leduc2022_plexDIA",
        Description = paste0("Single-cell proteomics data ",
                             "of 126 melanoma cells (plexDIA)."),
        BiocVersion = "3.18",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000089159/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/leduc2022_plexDIA.Rda",
        PublicationDate = as.Date("2022/12/06"),
        NumberAssays = 48,
        PreprocessingSoftware = "DIA-NN",
        LabelingProtocol = "mTRAQ-3",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "woo2022_macrophage",
        Description = paste0("Single-cell proteomics data ",
                             "from LPS-treated macrophages."),
        BiocVersion = "3.18",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000085937/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/woo2022_macrophage.Rda",
        PublicationDate = as.Date("2022/05/18"),
        NumberAssays = 5,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        PsmsAvailable = FALSE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "woo2022_lung",
        Description = paste0("Single-cell proteomics data ",
                             "from primary human lung cells."),
        BiocVersion = "3.18",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://massive.ucsd.edu/MSV000085937/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/woo2022_lung.Rda",
        PublicationDate = as.Date("2022/05/18"),
        NumberAssays = 5,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "LFQ",
        PsmsAvailable = FALSE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "gregoire2023_mixCTRL",
        Description = paste0("Single-cell proteomics data ",
                             "from two monocyte cell lines"),
        BiocVersion = "3.19",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "https://www.ebi.ac.uk/pride/archive/projects/PXD046211",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "PRIDE",
        Maintainer = "Samuel Gregoire <samuel.gregoire@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/gregoire2023_mixCTRL.Rda",
        PublicationDate = as.Date("2024/01/22"),
        NumberAssays = 119,
        PreprocessingSoftware = "Sage",
        LabelingProtocol = "TMT-16",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "khan2023",
        Description = paste0("Single-cell proteomics data ",
                             "of 421 MCF-10A cells undergoing ",
                             "EMT triggered by TGFβ"),
        BiocVersion = "3.19",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "https://drive.google.com/drive/folders/1zCsRKWNQuAz5msxx0DfjDrIe6pUjqQmj",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Enes Sefa Ayar <enes.ayar@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/khan2023.Rda",
        PublicationDate = as.Date("2023/12/21"),
        NumberAssays = 47,
        PreprocessingSoftware = "MaxQuant",
        LabelingProtocol = "TMTPro 16plex",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "guise2024",
        Description = paste0("Single-cell proteomics data ",
                             "of 108 postmortem CTL or ALS spinal ",
                             "moto neurons"),
        BiocVersion = "3.19",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "ftp://massive.ucsd.edu/v05/MSV000092119/",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "MassIVE",
        Maintainer = "Christophe Vanderaa <christophe.vanderaa@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/guise2024.rda",
        PublicationDate = as.Date("2024/01/05"),
        NumberAssays = 47,
        PreprocessingSoftware = "Proteome Discoverer",
        LabelingProtocol = "LFQ",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "petrosius2023_mES",
        Description = paste0("Mouse embryonic stem cells across ground-state (m2i) ",
                             "and differentiation-permissive (m15) culture conditions."),
        BiocVersion = "3.19",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "https://dataverse.uclouvain.be/dataset.xhtml?persistentId=doi:10.14428/DVN/EMAVLT",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "Dataverse",
        Maintainer = "Enes Sefa Ayar <enes.ayar@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/petrosius2023_mES.Rda",
        PublicationDate = as.Date("2024/04/09"),
        NumberAssays = 605,
        PreprocessingSoftware = "Spectronaut",
        LabelingProtocol = "LFQ",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    ),
    data.frame(
        Title = "petrosius2023_AstralAML",
        Description = paste0("Single-cell proteomics data ",
                             "of 4 cell types from the OCI-AML8227 ",
                             "model."),
        BiocVersion = "3.19",
        Genome = NA_character_,
        SourceType = "TXT",
        SourceUrl = "https://dataverse.uclouvain.be/dataset.xhtml?persistentId=doi:10.14428/DVN/4DSPJM",
        SourceVersion = NA_character_,
        Species = "Homo sapiens",
        TaxonomyId = 9606,
        Coordinate_1_based = TRUE,
        DataProvider = "Dataverse",
        Maintainer = "Samuel Gregoire <samuel.gregoire@uclouvain.be>",
        RDataClass = "QFeatures",
        DispatchClass = "Rda",
        RDataPath = "scpdata/petrosius2023_AstralAML.Rda",
        PublicationDate = as.Date("2023/06/08"),
        NumberAssays = 217,
        PreprocessingSoftware = "Spectronaut",
        LabelingProtocol = "LFQ",
        PsmsAvailable = TRUE,
        PeptidesAvailable = TRUE,
        ProteinsAvailable = TRUE,
        ContainsSingleCells = TRUE,
        Notes = NA_character_
    )
)


metadata <- do.call(rbind, meta)

write.csv(metadata,
          file = here::here("inst", "extdata", "metadata.csv"),
          row.names = FALSE)

stopifnot(file.exists(pkg_source <- here()))

AnnotationHubData::makeAnnotationHubMetadata(pathToPackage = pkg_source,
                                             fileName = "metadata.csv")
UCLouvain-CBIO/scpdata documentation built on May 6, 2024, 6:17 a.m.