Chebuu
The first infectious cause of cancer was identified in 1911 by Peyton Rous who demonstrated the transmissibility of a tumor in fowl through injection of sub 0.2 micron filtrate of the tumor .
devtools::install_github('Chebuu/HPV-Serovars')
library(HPVSerovars)
(Not run) This vignette is packaged and accessible via:
browseVignettes('HPVSerovars')
.
Download OligoArrayAux and see the following vignette for installation instructions.
library(devtools)
devtools::install_github('chebuu/Design-Group-Specific-Primers')
library(Design_Group_Specific_Primers)
vignette('Installing-OligoArrayAux', package = 'Design_Group_Specific_Primers')
# Install from Bioconductor
if (!requireNamespace("BiocManager", quietly = T))
install.packages("BiocManager")
BiocManager::install("Biostrings")
BiocManager::install("DECIPHER")
# Open Biostrings vignettes
library(Biostrings)
browseVignettes("Biostrings")
# Open DECIPHER docs and vignettes
library(DECIPHER)
help("DECIPHER")
browseVignettes("DECIPHER")
library(dplyr)
library(tidyr)
library(stringr)
library(Biostrings)
library(DECIPHER)
library(HPVSerovars)
doAlignment <- function(xset) {
useqs <- unique(xset)
uidxs <- match(xset, useqs)
aseqs <- AlignSeqs(useqs, verbose=F)
aseqs[uidxs]
}
I copied the tables from Wiki:
Human_papillomavirus_infection
and included them in the R package assoc. w/ this document
github.com/Chebuu/HPV-Serovars
.
data(HPV.Sequelae)
data(HPV.GenitalCancerRisk)
The Wiki data contains phenotypic categorizations of several HPV strains. It’s useful for testing.
HPV <- c(HPV.Sequelae, HPV.GenitalCancerRisk)
p.1 <- unique(unlist(HPV))
knitr::kable(
sprintf('HPV-%s', sort(p.1)), col.names = '',
caption = sprintf(
'Unique HPV serotypes (N=%s) (Total=%s): ',
length(p.1), length(HPV)
)
)
hist(unlist(HPV), max(unlist(HPV)), col='blue'); head(HPV, 4)
I included GenBank queries for all the HPV DNA in this repo. If you want to run them just paste in the search bar at: https://www.ncbi.nlm.nih.gov/nuccore
https://www.ncbi.nlm.nih.gov/nuccore
"Human papillomavirus"[Primary Organism]
AND viruses[filter]
NOT Polyamides[All Fields]
NOT Method[All Fields]
NOT Patent[All Fields]
https://www.ncbi.nlm.nih.gov/nuccore
"Human papillomavirus"[Primary Organism]
AND "complete genome"[All Fields]
NOT Isolate[Title]
https://www.ncbi.nlm.nih.gov/nuccore
"Human papillomavirus"[Primary Organism]
AND viruses[filter]
NOT Polyamides[All Fields]
NOT Method[All Fields]
NOT Patent[All Fields]
AND Oral[All Fields]
A collection of l1 DNA sequences (avg. \~600bp) from various oral isolates of HPV (extracted from results of GenBank query above) are aligned and loaded in SQLite (RAM) along with annotations from GenBank that describe the serotype of each isolate.
DECIPHER
utilities.data(Oral.L1.seqs)
data(Oral.L1.vars)
dbConn <- dbConnect(RSQLite::SQLite(), ':memory:')
Seqs2DB(Oral.L1.seqs, 'XStringSet', dbConn, '')
Add2DB(Oral.L1.vars %>% mutate(identifier = SVAR), dbConn)
dbGetQuery(dbConn, "select * from Seqs") %>% head(4)
tiles.L1 <- TileSeqs(
dbFile = dbConn,
minLength = 18,
maxLength = 29,
minCoverage = 0.8
)
print(
tiles.L1[,c(6,11)] %>% sample_n(6)
)
oligos.L1 <- DesignPrimers(
tiles = tiles.L1,
identifier = 'HPV4',
worstScore = -1E3,
maxPermutations = 5,
minGroupCoverage = 0.85,
minCoverage = 0.85,
minLength = 20,
maxLength = 28
)
head(oligos.L1)
Plot hybridization curves for the reverse compliment of each primer pair (5) in each primer set (7).
plotMeltCurves <- function(temps, effs) {
plot(
temps, effs[,1], ylim=c(0,1),
ylab="Hybridization Efficiency",
xlab=expression(paste("Temperature (", degree, "C)", sep="")),
type="l", lwd=2, col="Blue", main="Denaturation Plot"
)
lines(temps, effs[,2], col="Red", lwd=2)
abline(h=0.5, lty=2, lwd=2, col="Orange")
abline(v=64, lty=2, lwd=2, col="Green")
legend(
"topright",
legend = c(
"Forward Primer",
"Reverse Primer",
"50% Efficiency",
"Annealing Temperature"
),
col = c("Blue", "Red", "Orange", "Green"),
lwd = c(2, 2, 2, 2), lty = c(1, 1, 2, 2)
)
}
meltCurves <-
function(primers, target=reverseComplement(DNAStringSet(primers)), temps=60:75, P=4e-7, ions=.225, doPlot=TRUE, ...) {
fxn <- function(temp) CalculateEfficiencyPCR(primers, target, temp, P=P, ions=ions, ...)
effs <- matrix(unlist(lapply(temps, fxn)), ncol=2, byrow=TRUE)
if (doPlot) plotMeltCurves(temps, effs)
list(temps = temps, effs = effs)
}
nsets <- nrow(oligos.L1)
npair <- ncol(oligos.L1)
for (i in 1:nsets)
for (j in 1:npair)
tryCatch(
{
c(
oligos.L1$forward_primer[i,j],
oligos.L1$reverse_primer[i,j]
) %>% meltCurves
},
error = function(e) NULL
# # ¿ --- It's supposed to be 7o x 17p --- ?
# warning(sprintf('%s [iteration i=%s j=%s]', e, i, j))
)
More primer designs (RFLP, sequencing, etc.):
TYPE <- 'sequence'
MIN_LENGTH <- 15
MAX_LENGTH <- 25
MIN_SIZE <- 60
MAX_SIZE <- 100
LEVELS <- 2
RESOLUTION <- 3
ENZYMES <- NULL
DesignSignatures(
dbConn,
type = TYPE,
minLength = MIN_LENGTH,
maxLength = MAX_LENGTH,
minProductSize = MIN_SIZE,
maxProductSize = MAX_SIZE,
resolution = RESOLUTION,
levels = LEVELS,
enzymes = ENZYMES
)
data(Oral.L1.seqs)
data(Oral.L1.vars)
dbConn <- dbConnect(SQLite(), ':memory:')
print(Oral.L1.seqs)
Seqs2DB(Oral.L1.seqs, 'XStringSet', dbConn, '')
Add2DB(Oral.L1.vars %>% mutate(identifier = SVAR), dbConn)
dbGetQuery(dbConn, "select * from Seqs") %>% head(4)
data(RESTRICTION_ENZYMES)
FOCUS_ID <- 'HPV11'
TYPE <- 'melt'
LEVELS <- 4
MIN_LENGTH <- 15
MAX_LENGTH <- 25
MIN_SIZE <- 60
MAX_SIZE <- 200
RESOLUTION <- seq(75, 300, 15)
RFLP.Oal.L1 <- lapply(
rep(sample(1:3, 3), 3), # Random RE combinations
function(i)
DesignSignatures(
dbConn,
type = TYPE,
identifier = '',
focusID = FOCUS_ID,
enzymes = RESTRICTION_ENZYMES[i],
minProductSize = MIN_SIZE,
maxProductSize = MAX_SIZE,
resolution = RESOLUTION,
levels = LEVELS
)
)
head(RFLP.Oal.L1)
# TYPE <- 'length'
# LEVELS <- 2
# MIN_SIZE <- 200
# MAX_SIZE <- 1400
# RESOLUTION <- c(
# seq(200, 700, 3),
# seq(705, 1000, 5),
# seq(1010, 1400, 10)
# )
#
# TYPE <- 'melt'
# MIN_SIZE <- 55
# MAX_SIZE <- 400
FOCUS_ID <- 'HPV18'
params <- expand.grid(
enzymes = lapply(1:3, function(x)
RESTRICTION_ENZYMES[
sample(1:length(RESTRICTION_ENZYMES), 3)
]
),
minProductSize = seq(80, 150, length.out = 3),
maxProductSize = seq(500, 5000, length.out = 3),
resolution = seq(1, 2, length.out = 3),
levels = seq(5, 20, length.out = 3)
) %>% mutate(type = rep('melt', 243))
gridsearch <- apply(params, 1, function(...)
list(
arg = as.list(...),
out = do.call(
DesignSignatures,
append(
list(
dbFile = dbConn,
tblName = 'Seqs',
identifier = '',
focusID = FOCUS_ID
),
as.list(...)
)
)
)
) %>% bind_rows
objective <- function(x) {
print(x)
}
https://www.ncbi.nlm.nih.gov/nuccore
"Human papillomavirus"[Primary Organism]
AND viruses[filter]
NOT Polyamides[All Fields]
NOT Method[All Fields]
NOT Patent[All Fields]
AND Anal[All Fields]
https://www.ncbi.nlm.nih.gov/nuccore
"Human papillomavirus"[Primary Organism]
NOT Polyamides[All Fields]
NOT Method[All Fields]
NOT Patent[All Fields]
AND Anal[All Fields]
AND "Complete Genome"[All Fields]
Reference genomes for all serovars listed in data(HPV.Sequelae)
and
data(HPV.GenitalCancerRisk)
were selected from among the results of the
following PaVEPaVE query:
https://pave.niaid.nih.gov/#search/search_database/kw
?dbNamespace=Genomes
&includeNR=true
&refCloneOnly=false
&sort=Locus_ID
&sortType=true
&page=600&start=1
&text=Human&showTable=1
&
# data(PaVE.Wikiv)
data(HPV.REF.PaVE.seqs)
data(HPV.REF.PaVE.vars)
testVars <- lapply(grep('[1-9]*', HPV.REF.PaVE.vars, value = T), function(x) {
sapply(HPV.Sequelae, function(y) any(x %in% y))
})
dbConn <- dbConnect(SQLite(), ':memory:')
DECIPHER::Seqs2DB(HPV.REF.PaVE.seqs, 'XStringSet', dbConn, '')
DECIPHER::Add2DB(
data.frame(
# identifier = testVars
identifier = lapply(HPV.REF.PaVE.vars, function(x) grep('[1-9]', x, perl = T))
),
dbConn, verbose = F
)
HPV reference genomes from PaVE categorized by risk of genital (but not anal) cancer.
library(HPVSerovars)
data(gA) # High risk
data(gB) # Med risk
data(gC) # Some risk
dbDisconnect(dbConn)
dbConn <- dbConnect(RSQLite::SQLite(), ':memory:')
Seqs2DB(
c(gA,gB,gC),
type = 'XStringSet',
dbFile = dbConn,
identifier = ''
)
Add2DB(
data.frame(
identifier = c(
rep('A', length(gA)),
rep('B', length(gB)),
rep('C', length(gC))
)
),
dbConn
)
gABC.synt <- FindSynteny(dbConn)
summary(gABC.synt)
pairs(gABC.synt, boxBlocks=TRUE)
unlist(AlignSynteny(gABC.synt, dbConn)) %>% head(4)
From the openPimer vignette:
openPrimeR requires external programs for some features, particularly for computing the physicochemical properties of primers. Please make sure you have the following tools installed on your system such that they are in your system’s path:
MELTING (>= 5.1.1): For melting temperature computations. ViennaRNA (>= 2.2.4): For secondary structure prediction. OligoArrayAux (>= 3.8): For primer efficiency computations as performed by DECIPHER. MAFFT (>= 7.305): For computing multiple sequence alignments. Pandoc (>= 1.19.1): For creating PDF reports.
If you would like to be able to access the immunoglobulin repository IMGT from the openPrimeR Shiny app, you should additionally fulfill the following dependencies:
PhantomJS (>= 2.1): For headless website calls. Python (>=2.7.9) and the selenium (>=3.0.1) module: For data extraction scripts. openPrimeR will automatically check for all dependencies and inform you about any missing dependencies when the package is attached.
Note that the tool is still functional if there are missing external programs. However, we recommend that all dependencies are fulfilled to guarantee the best user experience.
library(openPrimeR)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.