library(devtools);
source_url("https://raw.githubusercontent.com/zhezhangsh/rchive/master/load.r");
path=paste(RCHIVE_HOME, 'data/gene/public/gtf', sep='/');
if (!file.exists(path)) dir.create(path, recursive=TRUE);
if(!file.exists(paste(path, 'r', sep='/'))) dir.create(paste(path, 'r', sep='/'), recursive=TRUE);
if(!file.exists(paste(path, 'src', sep='/'))) dir.create(paste(path, 'src', sep='/'), recursive=TRUE);
library(rtracklayer);
##############################################################################################################
# Files to be downloaed directly from sources
fn<-c(
#'human_GRCh38_NCBI_Scaffold' = "ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GFF/ref_GRCh38.p2_scaffolds.gff3.gz",
'human_GRCh38_NCBI' = "ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GFF/ref_GRCh38.p2_top_level.gff3.gz",
#'human_GRCh37_NCBI_Scaffold' = "ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.37.3/GFF/ref_GRCh37.p5_scaffolds.gff3.gz",
'human_GRCh37_NCBI' = "ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.37.3/GFF/ref_GRCh37.p5_top_level.gff3.gz",
'human_GRCh38_Gencode' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_22/gencode.v22.annotation.gtf.gz",
#'human_GRCh38_Gencode_Scaffold' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_22/gencode.v22.chr_patch_hapl_scaff.annotation.gtf.gz",
'human_GRCh38_Gencode_PolyA' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_22/gencode.v22.polyAs.gtf.gz",
'human_GRCh38_Gencode_Pseudo' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_22/gencode.v22.2wayconspseudos.gtf.gz",
'human_GRCh38_Gencode_tRNA' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_22/gencode.v22.tRNAs.gtf.gz",
'human_GRCh37_Gencode' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz",
#'human_GRCh37_Gencode_Scaffold' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.chr_patch_hapl_scaff.annotation.gtf.gz",
'human_GRCh37_Gencode_PolyA' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.polyAs.gtf.gz",
'human_GRCh37_Gencode_Pseudo' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.2wayconspseudos.gtf.gz",
'human_GRCh37_Gencode_tRNA' = "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.tRNAs.gtf.gz",
'human_GRCh38_Ensembl' = "ftp://ftp.ensembl.org/pub/release-79/gtf/homo_sapiens//Homo_sapiens.GRCh38.79.gtf.gz",
'human_GRCh37_Ensembl' = "ftp://ftp.ensembl.org/pub//release-75/gtf/homo_sapiens/Homo_sapiens.GRCh37.75.gtf.gz",
'human_GRCh37_AceView' = "ftp://ftp.ncbi.nih.gov/repository/acedb/ncbi_37_Aug10.human.genes/AceView.ncbi_37.genes_gff.gff.gz",
#'mouse_GRCm38_NCBI_Scaffold' = "ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/GFF/ref_GRCm38.p3_scaffolds.gff3.gz",
'mouse_GRCm38_NCBI' = "ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/GFF/ref_GRCm38.p3_top_level.gff3.gz",
#'mouse_MGSCv37_NCBI_Scaffold' = "ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/ARCHIVE/BUILD.37.2/GFF/ref_MGSCv37_scaffolds.gff3.gz",
'mouse_MGSCv37_NCBI' = "ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/ARCHIVE/BUILD.37.2/GFF/ref_MGSCv37_top_level.gff3.gz",
#'mouse_MGSCv37_AceView' = "ftp://ftp.ncbi.nih.gov/repository/acedb/ncbi_37_Sep07.mouse.genes/AceView.mm_37.genes_gff.tar.gz",
'mouse_GRCm38_Ensembl' = "ftp://ftp.ensembl.org/pub/release-79/gtf//mus_musculus/Mus_musculus.GRCm38.79.gtf.gz",
'worm_WB235_Ensembl' = "ftp://ftp.ensembl.org/pub/release-79/gtf//caenorhabditis_elegans/Caenorhabditis_elegans.WBcel235.79.gtf.gz"
)
fn0<-sapply(strsplit(fn, '/'), function(x) x[length(x)]);
fn0<-paste(path, 'src', fn0, sep='/');
names(fn0)<-names(fn);
fn1<-sapply(names(fn), function(nm) if(!file.exists(fn0[nm])) download.file(fn[nm], fn0[nm]));
##############################################################################################################
# GTF files previously downloaded from UCSC using Table Browser
fn2<-dir(paste(path, 'src', sep='/'));
fn2<-fn2[grep('UCSC', fn2)];
nm<-sapply(strsplit(fn2, '\\.'), function(x) x[1]);
fn2<-paste(path, 'src', fn2, sep='/');
names(fn2)<-nm;
fn.gtf<-c(fn0, fn2); # All source gtf files
##############################################################################################################
# Create parsed data and save to file
fn<-sapply(names(fn.gtf), function(nm) {
cat(nm, '\n');
ParseGtf(fn.gtf[nm], nm);
});
##############################################################################################################
tm<-strsplit(as.character(Sys.time()), ' ')[[1]][1];
fn0<-paste(RCHIVE_HOME, 'source/update/gene/UpdateGtf.r', sep='/');
fn1<-paste(RCHIVE_HOME, '/source/update/gene/log/', tm, '_UpdateGtf.r' , sep='');
file.copy(fn0, fn1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.