inst/scripts/make-metadata.R

make_metadata <- function() {
    bam_samples <- c("02H003", "02H025", "02H026", "02H033", "02H043", "02H046")

    genes <- "SRSF1, SRSF2, SRSF3, TRA2A, TRA2B, TP53 and NSUN5"

    df_bams <- data.frame(
        Title = sprintf("NxtIRF/example_bam/%s", 
            c("02H003", "02H025", "02H026", "02H033", "02H043", "02H046")), 
        Description = paste(
            c("02H003", "02H025", "02H026", "02H033", "02H043", "02H046"),
            "- aligned reads (from this sample in GSE67039) were filtered by",
            "the regions of genes", genes, 
            ". These were re-aligned to the NxtIRF example genome using STAR."
        ),
        BiocVersion="3.14", 
        Genome="NxtIRF_chrZ", 
        SourceType="BAM",
        SourceUrl="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE67039",
        SourceVersion="1.0.0",
        Species="Homo sapiens",
        TaxonomyId="9606",
        Coordinate_1_based=0,
        DataProvider="Leucegene",
        Maintainer="Alex Wong <a.wong@centenary.org.au>",
        RDataClass="BamFile",
        DispatchClass="BamFile",
        RDataPath = sprintf(
                paste("NxtIRFdata/example_bam/1.0.0/%s.bam", 
                    "NxtIRFdata/example_bam/1.0.0/%s.bam.bai", sep = ":"),
                bam_samples, bam_samples),
        Tags = "ExperimentData:chrZGenome:Leucegene:NxtIRF",
        stringsAsFactors = FALSE
    )

    df_mappa <- data.frame(
        Title = sprintf("NxtIRF/mappability/%s", 
            c("hg38", "hg19", "mm10", "mm9")
        ),
        Description = paste(
            c(
                "Ensembl GRCh38 (hg38) release-104", 
                "Ensembl GRCh37 (hg19) release-75",
                "Ensembl GRCm38 (mm10) release-104",
                "Ensembl NCBIM37 (mm9) release-67"
            ),
            "mappability exclusion regions were generated by NxtIRF, using a modification of the method as described in",
            paste0("https://github.com/williamritchie/IRFinder/",
                "blob/master/bin/util/Mapability")
        ),
        BiocVersion="3.14", 
        Genome=c("hg38", "hg19", "mm10", "mm9"),
        SourceType="BED",
        SourceUrl=paste0("https://github.com/williamritchie/IRFinder/",
                "blob/master/bin/util/Mapability"),
        SourceVersion="1.0.0",
        Species=c(rep("Homo sapiens", 2),rep("Mus musculus", 2)),
        TaxonomyId=c(rep("9606", 2),rep("10090", 2)),
        Coordinate_1_based=1,
        DataProvider="NxtIRFdata",
        Maintainer="Alex Wong <a.wong@centenary.org.au>",
        RDataClass="GRanges",
        DispatchClass="Rds",
        RDataPath = sprintf(paste("NxtIRFdata/mappability/1.0.0",
            "%s.MappabilityExclusion.bed.Rds", sep="/"),
            c("hg38", "hg19", "mm10", "mm9")
        ),
        Tags = "Annotation:MappabilityExclusion:Mappability:NxtIRF:IRFinder", 
        stringsAsFactors = FALSE
    )

    df = rbind(df_bams, df_mappa)

    write.csv(file="../extdata/metadata.csv", df, row.names=FALSE)
}
alexchwong/NxtIRFdata documentation built on Sept. 14, 2024, 3:29 a.m.