First install Biostrings package from bioconductor.
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("Biostrings")
Second install additional packages from cran CRAN.
In most cases you need to first install the following system-wide packages to be able to compile the R dependencies.
Ubuntu/Debian
sudo apt-get install libcurl4-openssl-dev libssl-dev libxml2-dev
CentOS
sudo yum install libcurl-devel openssl-devel libxml2-devel
install.packages("devtools")
install.packages("Rcpp")
install.packages("RcppThread")
install.packages("ape")
install.packages("foreach")
install.packages("doMC")
install.packages("magrittr")
install.packages("rlist")
Third install distIUPAC
package from github or gwdg gitlab using the devtools package.
library(devtools)
install_github("kullrich/distIUPAC", build_vignettes = TRUE, dependencies = FALSE)
#install_git("https://gwdg.gitlab.de/evolgen/distIUPAC.git", build_vignettes = TRUE, dependencies = FALSE)
These vignettes introduce distIUPAC
library(distIUPAC)
#browse vignettes
browseVignettes("distIUPAC")
#load IUPAC encoded nucleotide sequences with Biostrings
#change path to your input file
#lada fasta file
input.fasta <- readDNAStringSet(paste0(find.package("distIUPAC"),"/data/seqIUPAC.fasta.gz"))
#use example sequences
data("MySequences", package = "distIUPAC")
MySequences
#consider only a subset of all sequences
CAS.pos <- 5:34
#get IUPAC distances using a pre-defined distance matrix
CAS.distIUPAC <- distIUPAC(as.character(MySequences[CAS.pos]))
#get pairwise IUPAC distances as distance matrix
as.dist(CAS.distIUPAC$distIUPAC)
#get pairwise used sites as distance matrix
as.dist(CAS.distIUPAC$sitesUsed)
#plot bionj tree based on IUPAC distances
plot(bionj(as.dist(CAS.distIUPAC$distIUPAC)))
#get IUPAC distance using your own distance matrix
MyScoreMatrix <- scoreMatrix()
MyScoreMatrix["C","Y"] <- 1.0
distIUPACmatrix(as.character(MySequences[CAS.pos]), MyScoreMatrix)
#get mean IUPAC distances using a pre-defined distance matrix on sliding windows (xStats)
xStats(MySequences, x.pos=CAS.pos)
#get mean IUPAC distances using a pre-defined distance matrix on sliding windows using multiple threads (xStats)
xStats(MySequences[CAS.pos], threads = 4)
#get dXY calculations for three populations ((P1:i,P2:x),P3:y) on sliding windows and the (xyiStats)
AFG.pos<-82:87
SPRE.pos<-106:113
xyiStats(MySequences, x.pos=AFG.pos, y.pos=SPRE.pos, i.pos=CAS.pos, threads = 4)
#calculate ABBA-BABA statistics for four-taxon scenario (((P1:i,P2:x),P3:y),P4:o)
APO.pos<-1
xyiStats(MySequences, x.pos=AFG.pos, y.pos=SPRE.pos, i.pos=CAS.pos, o.pos=APO.pos threads = 4, do.ABBA = TRUE)
#get bi-allelic sites for population
CAS.biSites<-biSites(MySequences, x.pos = CAS.pos)
as.matrix(MySequences[CAS.pos])[,head(CAS.biSites)]
MIT (see LICENSE)
Please report any errors or requests regarding distIUPAC
to Kristian Ullrich (ullrich@evolbio.mpg.de)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.