inst/doc/tapseq_primer_design.R

## ---- include=FALSE-----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- eval=FALSE--------------------------------------------------------------
#  Sys.setenv(PATH = paste(Sys.getenv("PATH"), "/full/path/to/primer3-x.x.x/src", sep = ":"))
#  
#  Sys.setenv(PATH = paste(Sys.getenv("PATH"), "/full/path/to/blast+/ncbi-blast-x.x.x+/bin",
#                          sep = ":"))

## ----message=FALSE, warning=FALSE---------------------------------------------
library(TAPseq)
library(GenomicRanges)
library(BiocParallel)

# gene annotations for chromosome 11 genomic region
data("chr11_genes")

# convert to GRangesList containing annotated exons per gene. for the sake of time we only use
# a small subset of the chr11 genes. use the full set for a more realistic example
target_genes <- split(chr11_genes, f = chr11_genes$gene_name)
target_genes <- target_genes[18:27]

# K562 Drop-seq read data (this is just a small example file within the R package)
dropseq_bam <- system.file("extdata", "chr11_k562_dropseq.bam", package = "TAPseq")

# register backend for parallelization
register(MulticoreParam(workers = 5))

# infer polyA sites from Drop-seq data
polyA_sites <- inferPolyASites(target_genes, bam = dropseq_bam, polyA_downstream = 50,
                               wdsize = 100, min_cvrg = 1, parallel = TRUE)

## ---- eval=FALSE--------------------------------------------------------------
#  library(rtracklayer)
#  export(polyA_sites, con = "path/to/polyA_sites.bed", format = "bed")
#  export(unlist(target_genes), con = "path/to/target_genes.gtf", format = "gtf")

## ---- message=FALSE-----------------------------------------------------------
# truncate transcripts at inferred polyA sites
truncated_txs <- truncateTxsPolyA(target_genes, polyA_sites = polyA_sites, parallel = TRUE)

## ---- message=FALSE-----------------------------------------------------------
library(BSgenome)

# human genome BSgenome object (needs to be istalled from Bioconductor)
hg38 <- getBSgenome("BSgenome.Hsapiens.UCSC.hg38")

# get sequence for all truncated transcripts
txs_seqs <- getTxsSeq(truncated_txs, genome = hg38)

## -----------------------------------------------------------------------------
# create TAPseq IO for outer forward primers from truncated transcript sequences
outer_primers <- TAPseqInput(txs_seqs, target_annot = truncated_txs,
                             product_size_range = c(350, 500), primer_num_return = 5)

## ---- eval=FALSE--------------------------------------------------------------
#  # design 5 outer primers for each target gene
#  outer_primers <- designPrimers(outer_primers)

## ---- include=FALSE-----------------------------------------------------------
# Primer3 and BLAST are not run in this vignette, because they might be missing on the build system.
# already designed primers are loaded from internal data
outer_primers <- TAPseq:::vignette_data$outer_primers

## ---- results="hide"----------------------------------------------------------
# get primers and pcr products for specific genes
tapseq_primers(outer_primers$HBE1)
pcr_products(outer_primers$HBE1)

# these can also be accessed for all genes
tapseq_primers(outer_primers)
pcr_products(outer_primers)

## ---- eval=FALSE--------------------------------------------------------------
#  # chromosome 11 sequence
#  chr11_genome <- DNAStringSet(getSeq(hg38, "chr11"))
#  names(chr11_genome) <- "chr11"
#  
#  # create blast database
#  blastdb <- file.path(tempdir(), "blastdb")
#  createBLASTDb(genome = chr11_genome, annot = unlist(target_genes), blastdb = blastdb)

## ---- eval=FALSE--------------------------------------------------------------
#  library(BSgenome)
#  
#  # human genome BSgenome object (needs to be istalled from Bioconductor)
#  hg38 <- getBSgenome("BSgenome.Hsapiens.UCSC.hg38")
#  
#  # download and import gencode hg38 annotations
#  url <- "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_32/gencode.v32.annotation.gtf.gz"
#  annot <- import(url, format = "gtf")
#  
#  # extract exon annotations for protein-coding genes to build transcripts
#  tx_exons <- annot[annot$type == "exon" & annot$gene_type == "protein_coding"]
#  
#  # create blast database
#  blastdb <- file.path(tempdir(), "blastdb")
#  createBLASTDb(genome = hg38, annot = tx_exons, blastdb = blastdb)

## ---- eval=FALSE--------------------------------------------------------------
#  # now we can blast our outer primers against the create database
#  outer_primers <- blastPrimers(outer_primers, blastdb = blastdb, max_mismatch = 0,
#                                min_aligned = 0.75)
#  
#  # the primers now contain the number of estimated off-targets
#  tapseq_primers(outer_primers$IFITM1)

## ---- results="hide"----------------------------------------------------------
# select best primer per target gene based on the number of potential off-targets
best_outer_primers <- pickPrimers(outer_primers, n = 1, by = "off_targets")

# each object now only contains the best primer
tapseq_primers(best_outer_primers$IFITM1)

## ---- eval=FALSE--------------------------------------------------------------
#  # create new TsIO objects for inner primers, note the different product size
#  inner_primers <- TAPseqInput(txs_seqs, target_annot = truncated_txs,
#                               product_size_range = c(150, 300), primer_num_return = 5)
#  
#  # design inner primers
#  inner_primers <- designPrimers(inner_primers)
#  
#  # blast inner primers
#  inner_primers <- blastPrimers(inner_primers, blastdb = blastdb, max_mismatch = 0,
#                                min_aligned = 0.75)
#  
#  # pick best primer per target gene
#  best_inner_primers <- pickPrimers(inner_primers, n = 1, by = "off_targets")

## ---- include=FALSE-----------------------------------------------------------
best_inner_primers <- TAPseq:::vignette_data$best_inner_primers

## ---- eval=FALSE--------------------------------------------------------------
#  # use checkPrimers to run Primer3's "check_primers" functionality for every possible primer pair
#  outer_comp <- checkPrimers(best_outer_primers)
#  inner_comp <- checkPrimers(best_inner_primers)

## ---- include=FALSE-----------------------------------------------------------
outer_comp <- TAPseq:::vignette_data$outer_comp
inner_comp <- TAPseq:::vignette_data$inner_comp

## ---- message=FALSE, fig.height=3.4, fig.width=7.15---------------------------
library(dplyr)
library(ggplot2)

# merge outer and inner complementarity scores into one data.frame
comp <- bind_rows(outer = outer_comp, inner = inner_comp, .id = "set")

# add variable for pairs with any complemetarity score higher than 47
comp <- comp %>%
  mutate(high_compl = if_else(primer_pair_compl_any_th > 47 | primer_pair_compl_end_th > 47,
                              true = "high", false = "ok")) %>% 
  mutate(high_compl = factor(high_compl, levels = c("ok", "high")))
                              
# plot complementarity scores
ggplot(comp, aes(x = primer_pair_compl_any_th, y = primer_pair_compl_end_th)) +
  facet_wrap(~set, ncol = 2) +
  geom_point(aes(color = high_compl), alpha = 0.25) +
  scale_color_manual(values = c("black", "red"), drop = FALSE) +
  geom_hline(aes(yintercept = 47), colour = "darkgray", linetype = "dashed") +
  geom_vline(aes(xintercept = 47), colour = "darkgray", linetype = "dashed") +
  labs(title = "Complementarity scores TAP-seq primer combinations",
       color = "Complementarity") +
  theme_bw()

## ---- results="hide"----------------------------------------------------------
# create data.frames for outer and inner primers
outer_primers_df <- primerDataFrame(best_outer_primers)
inner_primers_df <- primerDataFrame(best_inner_primers)

# the resulting data.frames contain all relevant primer data
head(outer_primers_df)

## ---- results="hide"----------------------------------------------------------
# create BED tracks for outer and inner primers with custom colors
outer_primers_track <- createPrimerTrack(best_outer_primers, color = "steelblue3")
inner_primers_track <- createPrimerTrack(best_inner_primers, color = "goldenrod1")

# the output data.frames contain lines in BED format for every primer
head(outer_primers_track)
head(inner_primers_track)

# export tracks to .bed files ("" writes to the standard output, replace by a file name)
exportPrimerTrack(outer_primers_track, con = "")
exportPrimerTrack(inner_primers_track, con = "")

## ---- eval=FALSE--------------------------------------------------------------
#  exportPrimerTrack(createPrimerTrack(best_outer_primers, color = "steelblue3"),
#                    createPrimerTrack(best_inner_primers, color = "goldenrod1"),
#                    con = "path/to/primers.bed")

Try the TAPseq package in your browser

Any scripts or data that you put into this service are public.

TAPseq documentation built on Nov. 8, 2020, 7:51 p.m.