inst/doc/QSutils-Diversity.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----install,message=FALSE----------------------------------------------------
BiocManager::install("QSutils")
library(QSutils)

## ----load-ex------------------------------------------------------------------
filepath<-system.file("extdata","ToyData_10_50_1000.fna", package="QSutils")
lst <- ReadAmplSeqs(filepath,type="DNA")
lst

## ----numbhaplo----------------------------------------------------------------
length(lst$hseqs)

## ----segsites-----------------------------------------------------------------
SegSites(lst$hseqs)

## ----nummutations-------------------------------------------------------------
TotalMutations(lst$hseqs)

## ----shannon------------------------------------------------------------------
Shannon(lst$nr)
NormShannon(lst$nr)

## ----shannon var--------------------------------------------------------------
ShannonVar(lst$nr)
NormShannonVar(lst$nr)

## ----ginnisimpsion------------------------------------------------------------
GiniSimpson(lst$nr)
GiniSimpsonMVUE(lst$nr)

## ----ginivar------------------------------------------------------------------
GiniSimpsonVar(lst$nr)

## ----hill---------------------------------------------------------------------
Hill(lst$nr,q=1)

## ----hillplot, fig.cap="Hill numbers profile", eval=FALSE---------------------
#  HillProfile(lst$nr)
#  plot(HillProfile(lst$nr),type ="b", main="Hill numbers obtained
#      with HillProfile")

## ----hillprofile,echo=FALSE---------------------------------------------------
HillProfile(lst$nr)

## ----plothill, fig.cap="Hill numbers profile", echo=FALSE---------------------
plot(HillProfile(lst$nr),type ="b", main="Hill numbers obtained
    with HillProfile")

## ----renyi--------------------------------------------------------------------
Renyi(lst$nr,q=3)

## ----reniyprofile,fig.cap="Rényi entropy profile",eval=FALSE------------------
#  RenyiProfile(lst$nr)
#  plot(RenyiProfile(lst$nr),type ="b", main="Rényi entropy obtained with
#      RenyiProfile")

## ----plotreniy,echo=FALSE-----------------------------------------------------
RenyiProfile(lst$nr)

## ----reniyplot,fig.cap="Rényi entropy profile",echo=FALSE---------------------
plot(RenyiProfile(lst$nr),type ="b", main="Rényi entropy obtained with
    RenyiProfile")

## ----hcq----------------------------------------------------------------------
HCq(lst$nr, q= 4)

## ----hcqvar-------------------------------------------------------------------
HCqVar(lst$nr, q= 4)

## ----hcprofile,fig.cap="Havrda-Charvat entropy profile",eval=FALSE------------
#  HCqProfile(lst$nr)
#  plot(HCqProfile(lst$nr),type ="b", main="Havrda-Charvat entropy obtained
#      with HCqProfile")

## ----hcqplot,echo=FALSE-------------------------------------------------------
HCqProfile(lst$nr)

## ----plothcq,fig.cap="Havrda-Charvat entropy profile",echo=FALSE--------------
plot(HCqProfile(lst$nr),type ="b", main="Havrda-Charvat entropy obtained 
    with HCqProfile")

## ----dist ,message=FALSE,fig.cap="Correlation among haplotype distances"------
dst <- DNA.dist(lst$hseqs,model="raw")
library(psych)
D <- as.matrix(dst)
rownames(D) <- sapply(rownames(D),function(str) strsplit(str,
                split="\\|")[[1]][1])
colnames(D) <- rownames(D)
D

## ----mfe----------------------------------------------------------------------
MutationFreq(dst) 

## ----fad----------------------------------------------------------------------
FAD(dst)

## ----pi_e---------------------------------------------------------------------
NucleotideDiversity(dst)  

## ----mfm----------------------------------------------------------------------
nm <- nmismatch(pairwiseAlignment(lst$hseqs,lst$hseqs[1]))
MutationFreq(nm=nm,nr=lst$nr,len=width(lst$hseqs)[1])

## ----mfvar--------------------------------------------------------------------
MutationFreqVar(nm,lst$nr,len=width(lst$hseqs)[1])

## ----pi-----------------------------------------------------------------------
NucleotideDiversity(dst,lst$nr)

## ----rao----------------------------------------------------------------------
RaoPow(dst,4,lst$nr)

## ----raovar-------------------------------------------------------------------
RaoVar(dst,lst$nr)

## ----raoprofile,fig.cap="Havrda-Charvat entropy profile",eval=FALSE-----------
#  RaoPowProfile(dst,lst$nr)
#  plot(RaoPowProfile(dst,lst$nr),type ="b", main="Rao entropy obtained
#      with RaoPowProfile")

## ----raoplot,echo=FALSE-------------------------------------------------------
RaoPowProfile(dst,lst$nr)

## ----plotrao,fig.cap="Rao entropy profile",echo=FALSE-------------------------
plot(RaoPowProfile(dst,lst$nr),type ="b", main="Rao entropy obtained 
        with RaoPowProfile")

## ----downsampling,fig.cap="Diversity index variations due to sample size"-----
set.seed(123)
n <- 2000
y <- geom.series(n,0.8)+geom.series(n,0.00025)
nr.pop <- round(y*1e7)

thr <- 0.1
sz1 <- 5000
sz2 <- 10000

qs.sample <- function(nr.pop,sz1,sz2){ 
    div <- numeric(6)
    nr.sz1 <- table(sample(length(nr.pop),size=sz1,replace=TRUE,p=nr.pop))
    div[1] <- Shannon(nr.sz1)
    nr.sz1 <- nr.sz1[nr.sz1>=sz1*thr/100]
    div[3] <- Shannon(nr.sz1)
    div[5] <- Shannon(nr.sz1[DSFT(nr.sz1,sz1)])
    
    nr.sz2 <- table(sample(length(nr.pop),size=sz2,replace=TRUE,p=nr.pop))
    div[2] <- Shannon(nr.sz2)
    nr.sz2 <- nr.sz2[nr.sz2>=sz2*thr/100]
    div[4] <- Shannon(nr.sz2)
    div[6] <- Shannon(nr.sz2[DSFT(nr.sz2,sz1)])
    div
}

hpl.sim <- replicate(2000,qs.sample(nr.pop,sz1,sz2))
nms <- paste(c(sz1,sz2))

par(mfrow=c(1,3))

boxplot(t(hpl.sim[1:2,]),names=nms,col="lavender",las=2,
        ylab="Shannon entropy",main="raw")
boxplot(t(hpl.sim[3:4,]),names=nms,col="lavender",las=2,
        ylab="Shannon entropy",main="filt")
boxplot(t(hpl.sim[5:6,]),names=nms,col="lavender",las=2,
        ylab="Shannon entropy",main="DSFT")

## ----ex-dsft------------------------------------------------------------------
set.seed(123)
n <- 2000
y <- geom.series(n,0.8)+geom.series(n,0.0002)
nr.pop <- round(y*1e7)
rare <- nr.pop/sum(nr.pop) < 0.01
RHL <- sum(nr.pop[rare])/sum(nr.pop)
round(RHL,4)

## ----ex2-dsft-----------------------------------------------------------------
thr <- 0.1
sz1 <- 5000
sz2 <- 10000
qs.sample <- function(nr.pop,sz1,sz2){
    div <- numeric(2)
    nr.sz1 <- table(sample(length(nr.pop),size=sz1,replace=TRUE,p=nr.pop))
    rare <- nr.sz1/sum(nr.sz1) < 0.01
    div[1] <- sum(nr.sz1[rare])/sum(nr.sz1)
    nr.sz2 <- table(sample(length(nr.pop),size=sz2,replace=TRUE,p=nr.pop))
    rare <- nr.sz1/sum(nr.sz2) < 0.01
    div[2] <- sum(nr.sz2[rare])/sum(nr.sz2)
    div
}

hpl.sim <- replicate(2000,qs.sample(nr.pop,sz1,sz2))
nms <- paste(c(sz1,sz2))
boxplot(t(hpl.sim),names=nms,col="lavender",las=2,ylab="RHL")
abline(h=RHL,lty=4,col="navy")

## ----echo=FALSE---------------------------------------------------------------
sessionInfo()

Try the QSutils package in your browser

Any scripts or data that you put into this service are public.

QSutils documentation built on Nov. 8, 2020, 7:42 p.m.