Rowley et al., 2011; Bray et al., 2013; Kissopoulou et al., 2013; Londin et al., 2014; Lefrançais et al., 2017)
quantify_salmon(rtype = rtype, files = files, salmondir = salmondir, smp = smp, salmonbin = salmonbin, libtype = libtype, salmonindex = salmonindex, bias = bias) quantify_salmon <- function(rtype, files, salmondir, smp, salmonbin, libtype, salmonindex, bias = FALSE) { if (!any(file.exists(c(paste0(salmondir, "/", smp, "/aux_info/meta_info.json"), paste0(salmondir, "/", smp, "/aux/meta_info.json"))))) { if (rtype == "single") { salmon <- sprintf("bash -c '%s quant -p 10 -l %s -i %s -r <(cat %s) -o %s %s'", salmonbin, libtype, salmonindex, files, paste0(salmondir, "/", smp), ifelse(bias, "--seqBias", "")) system(salmon) } else if (rtype == "paired") { salmon <- sprintf("bash -c '%s quant -p 10 -l %s -i %s -1 <(cat %s) -2 <(cat %s) -o %s %s'", salmonbin, libtype, salmonindex, files$f1, files$f2, paste0(salmondir, "/", smp), ifelse(bias, "--seqBias", "")) system(salmon) } } else { message("Salmon has already been run for ", smp) } }
SRA535016 GSE94384
mydf <- data.table::fread("_db/SRA_Accessions_RUNSonly_SNAPSHOT_2018-01-31.tab") mygeo_gse <- getGEO("GSE94384")[[1]] mygeo_runs <- rownames(phenoData(mygeo_gse)) mygeo_runslist <- lapply(mygeo_runs, getGEO) mygeo_allGSM <- unlist(lapply(mygeo_runslist,function(arg) arg@header$geo_accession)) mygeo_allSRR <- lapply(mygeo_allGSM, function(arg) mydf[grep(arg,mydf$Alias),]) mygeo_SRAoverview <- do.call(rbind,mygeo_allSRR) # reverse checks via SRAdb library(SRAdb) sqlfile <- '_db/SRAmetadb.sqlite' sra_con <- dbConnect(SQLite(),sqlfile) myohmy <- getFASTQinfo( mygeo_SRAoverview$Accession, sra_con, srcType = 'ftp' ) myohmy2 <- getSRAinfo( mygeo_SRAoverview$Accession, sra_con) getSRAfile(myohmy$run, sra_con, fileType = 'fastq' ,destDir = "_fastq") mydf[grep("GSM2474455",mydf$Alias),] wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223045/SRR5223045.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223046/SRR5223046.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223047/SRR5223047.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223048/SRR5223048.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223049/SRR5223049.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223050/SRR5223050.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223051/SRR5223051.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223052/SRR5223052.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223053/SRR5223053.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223054/SRR5223054.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223055/SRR5223055.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223056/SRR5223056.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223057/SRR5223057.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR522/SRR5223058/SRR5223058.sra wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/006/SRR5223046/SRR5223046.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/007/SRR5223047/SRR5223047.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/008/SRR5223048/SRR5223048.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/009/SRR5223049/SRR5223049.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/000/SRR5223050/SRR5223050.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/001/SRR5223051/SRR5223051.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/002/SRR5223052/SRR5223052.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/003/SRR5223053/SRR5223053.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/004/SRR5223054/SRR5223054.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/005/SRR5223055/SRR5223055.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/006/SRR5223056/SRR5223056.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/007/SRR5223057/SRR5223057.fastq.gz wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR522/008/SRR5223058/SRR5223058.fastq.gz
GSE74246 - https://www.ncbi.nlm.nih.gov//geo/query/acc.cgi?acc=GSE74246 Corces MR, Buenrostro JD, Wu B, Greenside PG et al. Lineage-specific and single-cell chromatin accessibility charts human hematopoiesis and leukemia evolution. Nat Genet 2016 Oct;48(10):1193-203. PMID: 27526324 SRA062032 bray- The complex transcriptional landscape of the anucleate human platelet rs <- getSRAinfo ( c("SRA062032"), sra_con, sraType = "sra" ) rs2 <- getFASTQinfo ( c("SRA062032"), sra_con) getSRAfile(rs2$run, sra_con, fileType = 'fastq' ,destDir = "_fastq") wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR627/SRR627520/SRR627520.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628222/SRR628222.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628233/SRR628233.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628234/SRR628234.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628235/SRR628235.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628237/SRR628237.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628238/SRR628238.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628239/SRR628239.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628240/SRR628240.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628241/SRR628241.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628243/SRR628243.sra wget -c ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR628/SRR628244/SRR628244.sra 1 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR627/SRR627520/SRR627520.fastq.gz 2 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628222/SRR628222.fastq.gz 3 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628233/SRR628233.fastq.gz 4 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628234/SRR628234.fastq.gz 5 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628235/SRR628235.fastq.gz 6 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628237/SRR628237.fastq.gz 7 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628238/SRR628238.fastq.gz 8 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628239/SRR628239.fastq.gz 9 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628240/SRR628240.fastq.gz 10 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628241/SRR628241.fastq.gz 11 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628243/SRR628243.fastq.gz 12 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR628/SRR628244/SRR628244.fastq.gz -> add this too? https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE50858 # best 2015 RNA-seq of tumor-educated platelets enables blood-based pan-cancer, multiclass and molecular pathway cancer diagnostics Identifiers: SRA: SRP057500 BioProject: PRJNA281708 GEO: GSE68086 SRA260904 # best 2017 GEO: GSE89843 SRP093349 Study Type: Transcriptome Analysis Submission: SRA493919 getSRAinfo( "SRP093349", sra_con) https://www.thieme-connect.com/products/ejournals/pdf/10.1160/TH16-11-0873.pdf # data not provided! http://heart.bmj.com/content/101/Suppl_4/A102.2 # probably to REDO ????? https://trace.ncbi.nlm.nih.gov/Traces/sra?study=SRP092389 We here systematically studied the interaction network of bone marrow cells. To this end, we micro-dissected many small interacting structures (cell doublets, triplets etc.) into single cells, and sequenced their mRNAs, to infer cell identity. After grouping the cells into cell types (based on the single-cell transcriptomes), we identified actual physical interactions that occurred more, or less, than what would be expected by chance. Overall design: After mild dissociation of the bone marrow or fetal liver, we micro-dissected many small interacting structures (cell doublets, triplets etc.) into single cells, and sequenced their mRNAs. We also sorted single bone marrow cells. Finally we sorted 2000 or single neutrophils (Gr1+ CD115-) from control or megakaryocyte depleted mice. myohmy <- getFASTQinfo( "SRP092389", sra_con, srcType = 'ftp' ) getSRAinfo ( "SRP092389", sra_con, sraType = "sra" ) getFASTQinfo ( "SRP092389", sra_con) # chipseq? https://www.ncbi.nlm.nih.gov//geo/query/acc.cgi?acc=GSE42249 https://www.ncbi.nlm.nih.gov//geo/query/acc.cgi?acc=GSE24674 https://platelets.group.cam.ac.uk/publications ? http://www.bloodjournal.org/content/121/19/3908.long?sso-checked=true http://www.bloodjournal.org/content/118/7/1903.long https://www.sciencedirect.com/science/article/pii/S1873506115000665?via%3Dihub https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE60941 http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0183042 Data Availability: Data have been deposited at Gene Expression Omnibus (GEO) under accession number GSE94292. -> microarray!
# as in https://www.biostars.org/p/244150/ accessions_SRA_file <- "ftp://ftp.ncbi.nlm.nih.gov/sra/reports/Metadata/SRA_Accessions.tab" download.file(accessions_SRA_file,paste0("_db/SRA_Accessions_SNAPSHOT_",Sys.Date(),".tab")) # accessions_SRA <- readr::read_tsv(accessions_SRA_file) grep '^SRR' _db/SRA_Accessions_SNAPSHOT_2018-01-31.tab | grep 'GSM' > _db/SRA_Accessions_RUNSonly_SNAPSHOT_2018-01-31.tab # insert header as first line # Accession Submission Status Updated Published Received Type Center Visibility Alias Experiment Sample Study Loaded Spots Bases Md5sum BioSample BioProject ReplacedBy sed -i "1i\ $(head -n 1 _db/SRA_Accessions_SNAPSHOT_2018-01-31.tab)" _db/SRA_Accessions_RUNSonly_SNAPSHOT_2018-01-31.tab mydf <- data.table::fread("_db/SRA_Accessions_RUNSonly_SNAPSHOT_2018-01-31.tab") # colnames(mydf) <- dbListFields(sraacc_con,name = "SRAaccs") # to be done once mydf <- data.table::fread("SRA_Accessions_SNAPSHOT_2018-01-31.tab") library(DBI) mySRAaccessiondb <- dbConnect(RSQLite::SQLite(), paste0("_db/SRA_Accessions_SNAPSHOT_",Sys.Date(),".sqlite")) dbWriteTable(mySRAaccessiondb, "SRAaccs", mydf) # to use it: sraacc_con <- dbConnect(SQLite(),paste0("_db/SRA_Accessions_SNAPSHOT_",Sys.Date(),".sqlite")) dbListTables(sraacc_con) dbListFields(sraacc_con,name = "SRAaccs") dbGetQuery(sraacc_con, 'SELECT * FROM SRAaccs WHERE "Alias" == :x', params = list(x = "GSM2474455"))
library(SRAdb) sqlfile <- '_db/SRAmetadb.sqlite' if(!file.exists('_db/SRAmetadb.sqlite')) sqlfile <<- getSRAdbFile() sra_con <- dbConnect(SQLite(),sqlfile) getFASTQinfo( c("SRR000648","SRR000657"), sra_con, srcType = 'ftp' ) getFASTQinfo( c("SRX2532222"), sra_con, srcType = 'ftp' ) getFASTQinfo( c("SRR343051"), sra_con, srcType = 'ftp' ) getSRAfile( c("SRR343051"), sra_con, fileType = 'fastq' ) getSRAfile( c("SRR343051"), sra_con, fileType = 'sra',destDir = "_sra" ) sraFiles <- getFASTQinfo( c("SRR343051"), sra_con, srcType = 'ftp' ) method = "curl" fnames <- sraFiles$ftp fileinfo = NULL for (i in fnames) { f_reads <- gsub(".fastq.gz","_1.fastq.gz",i) r_reads <- gsub(".fastq.gz","_2.fastq.gz",i) download.file(f_reads, destfile = file.path(destDir, basename(f_reads)), method = method) download.file(r_reads, destfile = file.path(destDir, basename(r_reads)), method = method) } get.fastq.urls <- function(df) { url <- rep(NA, length(df$run)) for(i in 1:length(df$run)) { run <- df$run[i] filename <- paste0(df$run[i],".fastq.gz") if(nchar(run) < 10) { url[i] <- file.path('ftp://ftp.sra.ebi.ac.uk/vol1/fastq', substring(run, 1, 6), run, filename) } else { dir2 <- paste( c(rep(x='0', 12-nchar(run)), substring(run, 10, nchar(run))), collapse = '' ) url[i] <- file.path('ftp://ftp.sra.ebi.ac.uk/vol1/fastq', substring(run, 1, 6), dir2, run, filename) } } return(url) }
rs <- getSRA( search_terms = "platelets",out_types = c('run','study'), sra_con )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.