## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## -----------------------------------------------------------------------------
## load vignette specific libraries
library(CRBHits)
suppressPackageStartupMessages(library(Biostrings))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(curl))
## compile LAST and DAGchainer for the vignette
vignette.paths <- CRBHits::make_vignette()
## -----------------------------------------------------------------------------
## example how to check coding sequences if all are a mutiple of three
## load CDS file
cdsfile <- system.file("fasta", "ath.cds.fasta.gz", package="CRBHits")
cds <- Biostrings::readDNAStringSet(cdsfile)
## the following statement should return TRUE,
## if all sequences are a mutiple of three
all(Biostrings::width(cds) %% 3==0)
## -----------------------------------------------------------------------------
## example how to get CRBHit pairs from two CDS using classical RBH
## define CDS file1
cdsfile1 <- system.file("fasta", "ath.cds.fasta.gz", package="CRBHits")
## define CDS file2
cdsfile2 <- system.file("fasta", "aly.cds.fasta.gz", package="CRBHits")
## get CDS1
cds1 <- Biostrings::readDNAStringSet(cdsfile1)
## get CDS2
cds2 <- Biostrings::readDNAStringSet(cdsfile2)
## example how to perform classical RBH
ath_aly_crbh <- cds2rbh(
cds1=cds1,
cds2=cds2,
crbh=FALSE,
lastpath=vignette.paths[1])
## show summary
summary(ath_aly_crbh)
## get help
#?cds2rbh
## -----------------------------------------------------------------------------
## example how to get CRBHit pairs using one thread
## and plot CRBHit algorithm fitting curve
## example how to perform CRBH
ath_aly_crbh <- cds2rbh(
cds1=cds1,
cds2=cds2,
plotCurve=TRUE,
lastpath=vignette.paths[1])
## show summary
summary(ath_aly_crbh)
## get help
#?cds2rbh
## -----------------------------------------------------------------------------
## example showing cds2rbh() results
## show dimension and first parts of retained hit pairs
dim(ath_aly_crbh$crbh.pairs)
head(ath_aly_crbh$crbh.pairs)
## show first retained hit pairs for the query > target matrix
head(ath_aly_crbh$crbh1)
## get the number of CRBHit classified as rbh and sec hit pairs
table(ath_aly_crbh$crbh1$rbh_class)
table(ath_aly_crbh$crbh2$rbh_class)
## -----------------------------------------------------------------------------
## example how to use the fitting function for manual plotting
## plot fitting function
curve(ath_aly_crbh$rbh1_rbh2_fit(x),
from=1,
to=1000,
xlab="alnlength",
ylab="-log10(evalue)",
main="CRBH fitting")
## -----------------------------------------------------------------------------
## example how to retain single direction secondary homologs
## get CRBHit pairs with keepSingleDirection = TRUE
ath_aly_crbh <- cds2rbh(
cds1=cds1,
cds2=cds2,
plotCurve=TRUE,
keepSingleDirection=TRUE,
lastpath=vignette.paths[1])
## get the number of CRBHit classified as rbh and sec hit pairs
table(ath_aly_crbh$crbh1$rbh_class)
table(ath_aly_crbh$crbh2$rbh_class)
dim(ath_aly_crbh)
## get help
#?cds2rbh
## -----------------------------------------------------------------------------
## example how to filter prior crbh for query coverage of 50%
## get CRBHit pairs with direct query coverage filter
ath_aly_crbh <- cds2rbh(
cds1=cds1,
cds2=cds2,
plotCurve=TRUE,
qcov=0.5,
lastpath=vignette.paths[1])
dim(ath_aly_crbh$crbh.pairs)
## get help
#?cds2rbh
## -----------------------------------------------------------------------------
## detailed explanation for the Rost (1999) twilight-zone filter
## define eq2 from Rost (1999)
get_pident_by_length <- function(x){
eq2 <- function(L){
if(L<=11){return(100)}
if(L<=450){return(480*(L^(-0.32*(1+(exp(-L/1000))))))}
if(L>450){return(19.5)}
}
return(unlist(lapply(x, eq2)))
}
## plot expected pident by alignment length using eq2 from Rost (1999)
curve(get_pident_by_length,
11,
500,
pch=20,
xlab="alignment length",
ylab="pident",
main="expected protein identity (eq2; Rost B. 1999)")
## -----------------------------------------------------------------------------
## example how to filter prior crbh for eq2 from Rost (1999)
## get CRBHit pairs with direct twilight-zone filter
ath_aly_crbh <- cds2rbh(
cds1=cds1,
cds2=cds2,
plotCurve=TRUE,
rost1999=TRUE,
lastpath=vignette.paths[1])
dim(ath_aly_crbh$crbh.pairs)
## get help
#?cds2rbh
## get help
#?filter.alnlen
## get help
#?filter.eval
## get help
#?filter.pident
## get help
#?filter.qcov
## get help
#?filter.rost1999
## get help
#?filter.tcov
## -----------------------------------------------------------------------------
## example for a custom filters
## define custom filter for e.g. bit score (column 12)
myfilter1 <- function(rbh, value=500.0){
return(dplyr::filter(rbh, bit_score>=value))
}
## define custom filter for e.g. corrected query_coverage
myfilter2 <- function(rbh, value=0.5){
return(dplyr::filter(rbh,
((alignment_length-mismatches-gap_opens) / query_length)>=value))
}
## get CRBHit pairs with custom filter list
ath_aly_crbh <- cds2rbh(
cds1=cds1,
cds2=cds2,
plotCurve=TRUE,
filter=list(myfilter1, myfilter2),
lastpath=vignette.paths[1])
dim(ath_aly_crbh$crbh.pairs)
## -----------------------------------------------------------------------------
## example to extract CRBHit pairs classified as rbh
## reduce to rbh_class rbh
data("ath_aly_crbh", package = "CRBHits")
head(dplyr::filter(ath_aly_crbh$crbh1, rbh_class=="rbh"))
head(dplyr::filter(ath_aly_crbh$crbh2, rbh_class=="rbh"))
## -----------------------------------------------------------------------------
## example how to get crbh from two coding fasta files using median fitting
## get CRBHit pairs with median fitting
ath_aly_crbh <- cds2rbh(
cds1=cds1,
cds2=cds2,
plotCurve=TRUE,
fit.type="median",
lastpath=vignette.paths[1])
## get help
#?cds2rbh
## -----------------------------------------------------------------------------
## example to get a codon alignment
## define two CDS
cds1 <- Biostrings::DNAString("ATGCAACATTGC")
cds2 <- Biostrings::DNAString("ATGCATTGC")
## get codon alignment
MSA2dist::cds2codonaln(
cds1=cds1,
cds2=cds2)
## get help
#?MSA2dist::cds2codonaln
## -----------------------------------------------------------------------------
## example to alter the substitionMatrix and use the BLOSUM45 cost matrix
## for the codon alignment
## get codon alignemnt with BLOSUM45 cost matrix
MSA2dist::cds2codonaln(
cds1=cds1,
cds2=cds2,
substitutionMatrix="BLOSUM45")
## -----------------------------------------------------------------------------
## example to remove codon gaps
## get codon alignment with gaps removed
MSA2dist::cds2codonaln(
cds1=cds1,
cds2=cds2,
remove.gaps=TRUE)
## -----------------------------------------------------------------------------
## calculate Ka/Ks on two CDS
## load example sequence data
data("ath", package="CRBHits")
data("aly", package="CRBHits")
## select a sequence pair according to a best hit pair (done for you)
cds1 <- ath[1]
cds2 <- aly[282]
## calculate Ka/Ks values on two CDS using Li model
MSA2dist::dnastring2kaks(
c(cds1,
cds2),
model="Li",
isMSA=FALSE)
## get help
#?MSA2dist::dnastring2kaks
## -----------------------------------------------------------------------------
## example to use an alternative substitutionMatrix for the codon alignment
## and obtain Ka/Ks
## calculate Ka/Ks values on two CDS using Li model and BLOSUM45 cost matrix
MSA2dist::dnastring2kaks(
c(cds1,
cds2),
model="Li",
isMSA=FALSE,
substitutionMatrix="BLOSUM45")
## -----------------------------------------------------------------------------
## example how to get CRBHit pairs from two coding fasta files
cdsfile1 <- system.file("fasta", "ath.cds.fasta.gz", package="CRBHits")
cdsfile2 <- system.file("fasta", "aly.cds.fasta.gz", package="CRBHits")
ath <- Biostrings::readDNAStringSet(cdsfile1)
aly <- Biostrings::readDNAStringSet(cdsfile2)
## the following function calculates CRBHit pairs
## using one thread and plots the fitted curve
ath_aly_crbh <- cds2rbh(
cds1=ath,
cds2=aly,
lastpath=vignette.paths[1])
## calculate Ka/Ks using the CRBHit pairs
ath_aly_crbh$crbh.pairs <- head(ath_aly_crbh$crbh.pairs)
ath_aly_crbh.kaks <- rbh2kaks(
rbhpairs=ath_aly_crbh,
cds1=ath,
cds2=aly,
model="Li")
head(ath_aly_crbh.kaks)
## get help
#?rbh2kaks
## -----------------------------------------------------------------------------
## calculate Ka/Ks using the CRBHit pairs and multiple threads
ath_aly_crbh.kaks <- rbh2kaks(
rbhpairs=ath_aly_crbh,
cds1=ath,
cds2=aly,
model="Li",
threads=2)
head(ath_aly_crbh.kaks)
## get help
#?rbh2kaks
## -----------------------------------------------------------------------------
## calculate Ka/Ks using the CRBHit pairs and multiple threads
ath_aly_crbh.kaks <- rbh2kaks(
rbhpairs=ath_aly_crbh,
cds1=ath,
cds2=aly,
model="Li",
threads=2,
substitutionMatrix="BLOSUM45")
head(ath_aly_crbh.kaks)
## get help
#?rbh2kaks
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.