library(ezRun)
This is helpful in the case of a host and pathogen. E.g. a virus. It is demonstrated using the example of a human sample infected with the human adenovirus
https://www.ncbi.nlm.nih.gov/genome/10271?genome_assembly_id=891476
If not available download the fasta and GTF
library(ezRun) setwd("/srv/GT/analysis/p23963-Maarit/reference") dir() ezSystem("wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/857/865/GCF_000857865.1_ViralProj15107/GCF_000857865.1_ViralProj15107_genomic.gff.gz") ezSystem("wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/857/865/GCF_000857865.1_ViralProj15107/GCF_000857865.1_ViralProj15107_genomic.fna.gz")
Copy the files to /srv/GT/databases/Human_adenovirus_5/
Convert the gff to gtf
gff = ezRead.table("GCF_000857865.1_ViralProj15107_genomic.gff", comment.char = "#", row.names=NULL, header = FALSE) colnames(gff) = c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes") gff = gff[ gff$type == "CDS", ] gff$type = sub("CDS", "exon", gff$type) gff$gene_name = ezGffAttributeField(gff$attributes, "Name", attrsep = ";", valuesep="=") gff$gene_id = gff$gene_name gff$transcript_id = gff$gene_name gff$transcript_type = "protein_coding" gff$gene_biotype = "protein_coding" gff$attributes = ezBuildAttributeField(gff[ , c("gene_name", "gene_id", "transcript_id", "transcript_type", "gene_biotype")]) ezWriteGff(gff, file="/srv/GT/databases/Human_adenovirus_5/Human_adenovirus_5.gtf")
Run STAR alignment with the option specialOptions
set to spikeInSet=Human_adenovirus_5
.
Build a combined annotation file
seqAnno = data.frame(row.names=unique(gff$gene_id)) seqAnno$gene_name = rownames(seqAnno) seqAnno$seqid = tapply(gff$seqid, gff$gene_id, ezCollapse, empty.rm=TRUE, uniqueOnly=TRUE)[rownames(seqAnno)] seqAnno$strand = tapply(gff$strand, gff$gene_id, ezCollapse, empty.rm=TRUE, uniqueOnly=TRUE)[rownames(seqAnno)] seqAnno$start = tapply(gff$start, gff$gene_id, min)[rownames(seqAnno)] seqAnno$end = tapply(gff$end, gff$gene_id, max)[rownames(seqAnno)] seqAnno$featWidth = tapply(gff$end - gff$start + 1, gff$gene_id, sum)[rownames(seqAnno)] seqAnno$gc = 0.5 seqAnno$type = "protein_coding" humAnno = ezRead.table("/srv/GT/reference/Homo_sapiens/GENCODE/GRCh38.p13/Annotation/Release_37-2021-05-04/Genes/genes_annotation_byGene.txt") colnames(humAnno) colnames(seqAnno) %in% colnames(humAnno) useCols = intersect(colnames(seqAnno), colnames(humAnno)) humAnno[ rownames(seqAnno), useCols] = seqAnno[ , useCols] humAnno[is.na(humAnno)] = "" ezWrite.table(humAnno, file="/srv/GT/reference/Homo_sapiens/GENCODE/GRCh38.p13/Annotation/Release_37-2021-05-04/Genes/genes_STARIndex_Human_adenovirus_5/genesAndSpikes_annotation_byGene.txt", head="gene_id")
Run FeatureCounts
Run CountQC
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.