Compiled date: r Sys.Date()

Last edited: 2021-07-14

License: r packageDescription("plateletopedia")[["License"]]

knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  error    = FALSE,
  warning  = FALSE,
  eval     = FALSE,
  message  = FALSE,
  fig.width = 10
)
options(width = 100)
stopifnot(requireNamespace("htmltools"))
htmltools::tagList(rmarkdown::html_dependency_font_awesome())

Setup {#setup}

Retrieval of references

setup_folders(project_name = "plateletopedia")

retrieve_ensembl_refs(version_number = 102, species = "human", just_check = FALSE)
retrieve_ensembl_refs(version_number = 102, species = "mouse", just_check = FALSE)

retrieve_ensembl_refs(version_number = 104, species = "human", just_check = FALSE)
retrieve_ensembl_refs(version_number = 104, species = "mouse", just_check = FALSE)



retrieve_gencode_refs(version_number = "38",just_check = FALSE)
retrieve_gencode_refs(version_number = "M27",just_check = FALSE)

retrieve_gencode_refs(version_number = "37",just_check = FALSE)
retrieve_gencode_refs(version_number = "M25",just_check = FALSE)

Creating indices for alignment and quantifications

Quantifications with Salmon

salmon --version

## for Ensembl

grep "^>" <(gunzip -c plateletopedia_ref/Ensembl.GRCh38.102/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz) | cut -d " " -f 1 > plateletopedia_ref/Ensembl.GRCh38.102/decoys.txt
sed -i.bak -e 's/>//g' plateletopedia_ref/Ensembl.GRCh38.102/decoys.txt

grep "^>" <(gunzip -c plateletopedia_ref/Ensembl.GRCh38.104/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz) | cut -d " " -f 1 > plateletopedia_ref/Ensembl.GRCh38.104/decoys.txt
sed -i.bak -e 's/>//g' plateletopedia_ref/Ensembl.GRCh38.104/decoys.txt


grep "^>" <(gunzip -c plateletopedia_ref/Ensembl.GRCm38.102/Mus_musculus.GRCm38.dna.primary_assembly.fa.gz) | cut -d " " -f 1 > plateletopedia_ref/Ensembl.GRCm38.102/decoys.txt
sed -i.bak -e 's/>//g' plateletopedia_ref/Ensembl.GRCm38.102/decoys.txt

grep "^>" <(gunzip -c plateletopedia_ref/Ensembl.GRCm39.104/Mus_musculus.GRCm39.dna.primary_assembly.fa.gz) | cut -d " " -f 1 > plateletopedia_ref/Ensembl.GRCm39.104/decoys.txt
sed -i.bak -e 's/>//g' plateletopedia_ref/Ensembl.GRCm39.104/decoys.txt



## for Gencode

grep "^>" <(gunzip -c plateletopedia_ref/GENCODE_37/GRCh38.primary_assembly.genome.fa.gz) | cut -d " " -f 1 > plateletopedia_ref/GENCODE_37/decoys.txt
sed -i.bak -e 's/>//g' plateletopedia_ref/GENCODE_37/decoys.txt

grep "^>" <(gunzip -c plateletopedia_ref/GENCODE_38/GRCh38.primary_assembly.genome.fa.gz) | cut -d " " -f 1 > plateletopedia_ref/GENCODE_38/decoys.txt
sed -i.bak -e 's/>//g' plateletopedia_ref/GENCODE_38/decoys.txt


grep "^>" <(gunzip -c plateletopedia_ref/GENCODE_M25/GRCm38.primary_assembly.genome.fa.gz) | cut -d " " -f 1 > plateletopedia_ref/GENCODE_M25/decoys.txt
sed -i.bak -e 's/>//g' plateletopedia_ref/GENCODE_M25/decoys.txt

grep "^>" <(gunzip -c plateletopedia_ref/GENCODE_M27/GRCm39.primary_assembly.genome.fa.gz) | cut -d " " -f 1 > plateletopedia_ref/GENCODE_M27/decoys.txt
sed -i.bak -e 's/>//g' plateletopedia_ref/GENCODE_M27/decoys.txt





cat plateletopedia_ref/GENCODE_37/gencode.v37.transcripts.fa.gz plateletopedia_ref/GENCODE_37/GRCh38.primary_assembly.genome.fa.gz > plateletopedia_ref/GENCODE_37/gentrome.fa.gz
cat plateletopedia_ref/GENCODE_38/gencode.v38.transcripts.fa.gz plateletopedia_ref/GENCODE_38/GRCh38.primary_assembly.genome.fa.gz > plateletopedia_ref/GENCODE_38/gentrome.fa.gz
cat plateletopedia_ref/GENCODE_M25/gencode.vM25.transcripts.fa.gz plateletopedia_ref/GENCODE_M25/GRCm38.primary_assembly.genome.fa.gz > plateletopedia_ref/GENCODE_M25/gentrome.fa.gz
cat plateletopedia_ref/GENCODE_M27/gencode.vM27.transcripts.fa.gz plateletopedia_ref/GENCODE_M27/GRCm39.primary_assembly.genome.fa.gz > plateletopedia_ref/GENCODE_M27/gentrome.fa.gz



salmon index -t plateletopedia_ref/GENCODE_37/gentrome.fa.gz -d plateletopedia_ref/GENCODE_37/decoys.txt -p 12 -i plateletopedia_ref/salmon_index_gentrome_GENCODE_37 --gencode
salmon index -t plateletopedia_ref/GENCODE_38/gentrome.fa.gz -d plateletopedia_ref/GENCODE_38/decoys.txt -p 12 -i plateletopedia_ref/salmon_index_gentrome_GENCODE_38 --gencode
salmon index -t plateletopedia_ref/GENCODE_M25/gentrome.fa.gz -d plateletopedia_ref/GENCODE_M25/decoys.txt -p 12 -i plateletopedia_ref/salmon_index_gentrome_GENCODE_M25 --gencode
salmon index -t plateletopedia_ref/GENCODE_M27/gentrome.fa.gz -d plateletopedia_ref/GENCODE_M27/decoys.txt -p 12 -i plateletopedia_ref/salmon_index_gentrome_GENCODE_M27 --gencode

Alignments with STAR

STAR --version

mkdir plateletopedia_ref/STAR_index_GENCODE_37
mkdir plateletopedia_ref/STAR_index_GENCODE_38
mkdir plateletopedia_ref/STAR_index_GENCODE_M25
mkdir plateletopedia_ref/STAR_index_GENCODE_M27

gunzip --keep plateletopedia_ref/GENCODE_37/GRCh38.primary_assembly.genome.fa.gz
gunzip --keep plateletopedia_ref/GENCODE_38/GRCh38.primary_assembly.genome.fa.gz
gunzip --keep plateletopedia_ref/GENCODE_M25/GRCm38.primary_assembly.genome.fa.gz
gunzip --keep plateletopedia_ref/GENCODE_M27/GRCm39.primary_assembly.genome.fa.gz

gunzip --keep plateletopedia_ref/GENCODE_37/gencode.v37.annotation.gtf.gz
gunzip --keep plateletopedia_ref/GENCODE_38/gencode.v38.annotation.gtf.gz
gunzip --keep plateletopedia_ref/GENCODE_M25/gencode.vM25.annotation.gtf.gz
gunzip --keep plateletopedia_ref/GENCODE_M27/gencode.vM27.annotation.gtf.gz


STAR --runThreadN 16 --runMode genomeGenerate --genomeDir plateletopedia_ref/STAR_index_GENCODE_37 \
  --genomeFastaFiles plateletopedia_ref/GENCODE_37/GRCh38.primary_assembly.genome.fa \
  --sjdbGTFfile plateletopedia_ref/GENCODE_37/gencode.v37.annotation.gtf --sjdbOverhang 100

STAR --runThreadN 16 --runMode genomeGenerate --genomeDir plateletopedia_ref/STAR_index_GENCODE_38 \
  --genomeFastaFiles plateletopedia_ref/GENCODE_38/GRCh38.primary_assembly.genome.fa \
  --sjdbGTFfile plateletopedia_ref/GENCODE_38/gencode.v38.annotation.gtf --sjdbOverhang 100 

STAR --runThreadN 16 --runMode genomeGenerate --genomeDir plateletopedia_ref/STAR_index_GENCODE_M25 \
  --genomeFastaFiles plateletopedia_ref/GENCODE_M25/GRCm38.primary_assembly.genome.fa \
  --sjdbGTFfile plateletopedia_ref/GENCODE_M25/gencode.vM25.annotation.gtf --sjdbOverhang 100

STAR --runThreadN 16 --runMode genomeGenerate --genomeDir plateletopedia_ref/STAR_index_GENCODE_M27 \
  --genomeFastaFiles plateletopedia_ref/GENCODE_M27/GRCm39.primary_assembly.genome.fa \
  --sjdbGTFfile plateletopedia_ref/GENCODE_M27/gencode.vM27.annotation.gtf --sjdbOverhang 100 


federicomarini/plateletopedia documentation built on Dec. 20, 2021, 7:49 a.m.