Nothing
## ----Auxillary functions, echo=FALSE-----------------------------------------
# Here we provide some auxillary functions which are needed below.
#'Numerical Approximation of characteristic function
#'
#'\code{ApproxCDF} approximates the cdf F when given a characteristic function phi of a centered random variable, using the formula found in Waller (1995) with
#'original reference to Bohman (1974). The procedure can be numerically unstable in the tails of the distribution, so
#'only the center of the approximation is returned. Some simplifying approximations explained in "Numerical inversion of Laplace transform and characteristic function"
#'are used. Note that phi should have a finite first moment.
#'
#'@param phi the characteristic function to be inverted
#'@param H A total of 2H+1 values of F are approximated. By default H of these values are returned unless an interval is provided.
#'@param eta A scaling paramter. By default equidistant points in the interval (-2*phi/eta,2*phi/(eta)) are approximated.
#'@param xlim (optional) limits on the x-axis
#'@param smoothe (optional) Should smoothing be used? If TRUE default weights of the function \code{simple_smoothe} are used. If an object of length > 1 is provided,
#'this will be passed to \code{simple_smoothe}
#'
#'@examples
#'phi <- function(t) exp(-t^2/2)
#'appvals=ApproxCDF(phi,H=1000,eta=0.5,xlim=c(-3,3))
#'plot(appvals[[1]],appvals[[2]],type="l",lwd=2)
#'lines(appvals[[1]],pnorm(appvals[[1]]),type="l",col="red")
#'
#'phi <- function(t) sqrt(2)*abs(t)*besselK(sqrt(2)*abs(t),1)
#'appvals=ApproxCDF(phi,H=10000,eta=0.1,xlim=c(-3,3))
#'plot(appvals[[1]],appvals[[2]],type="l",lwd=2)
#'lines(appvals[[1]],pt(appvals[[1]],df=2),type="l",col="red")
#'
#'@export
ApproxCDF = function(phi,H=2000,eta=0.5,xlim=NULL,smoothe=FALSE) {
z_vals = rep(0,H)
co = 1
for(n in 1:(H-1)) {
z_vals[co+1]= phi(eta*n)/(pi*1i*(n)) #start at index 2 - the first value of z is 0
co = co + 1
}
yvals_pos=0.5+(1:H)/H-Re(fft(z_vals,inverse=FALSE))
yvals_neg=0.5-(1:H)/H-Re(fft(z_vals,inverse=TRUE))
xvals_pos = 2*pi*(1:H)/(eta*H)
xvals_neg = -xvals_pos
xvals_neg = rev(xvals_neg)
yvals_neg = rev(yvals_neg)
xvals = c(xvals_neg,xvals_pos)
yvals = c(yvals_neg,yvals_pos)
if(!is.null(xlim)) {
indexes = intersect(which(xvals>xlim[1]),which(xvals<xlim[2]))
}
else {
indexes = (1:(H+1))+floor( (H-1)/2)
}
xvals = xvals[indexes]
yvals = yvals[indexes]
if(smoothe) {
if(length(smoothe)>1) {
yvals = simple_smoothe(yvals,smoothe)
}
else {
yvals = simple_smoothe(yvals)
}
}
return(list(xvals,yvals))
}
#' Simple smoothing
#'
#' \code{simple_smoothe} computes a simple moving weighted average of a input vector \code{x}. The weight vector must have an odd number of entries, and defaults to
#' \code{c(0.1,0.20,0.4,0.20,0.1)}
#'
#' @param x input to be smoothed
#' @param svec smoothing vector
#'
#'@examples
#'smoothed_yvals = simple_smoothe(yvals)
#'smoothed_yvals = simple_smoothe(yvals,c(0.2,0.6,0.2))
#'
#'@export
simple_smoothe <- function(x,svec= c(0.1,0.20,0.4,0.20,0.1)) {
if ((length(svec) %% 2) == 0) {stop("Please provide an odd number of smoothing weigths")}
out = x
offset = floor(length(svec)/2)
for(i in (1+offset):(length(x)-offset)) {
out[i] = sum(x[(i-offset):(i+offset)]*svec)
}
return(out)
}
#' Construction of DPH-representation
#'
#'
#' \code{DPHrep} computes the representation of of a integer-linear-combination of the Site Frequency as a discrete phase-type distribution.
#' The construction is described in Section 7.1 of Hobolth (2020).
#'
#' @param bM Subtransition probabilities un the underlying discrete Markov chain (cf. Figure 6).
#' @param bA Statespace of the underlying block-counting process
#' @param ba Vector of integer coeffcients
#'
#' @return List consisting of
#' bMt: The constructed matrix subtransition probabilities.
#' sizes_of_blocks: The sizes of the constructed blocks.
#'
#' @examples
#' ba = c(1,2,3)
#' ph_bcp = block_counting_process(4)
#' subintensity_matrix = ph_bcp$subint_mat
#' bA = ph_bcp$reward_mat
#' ph = PH(subintensity_matrix)
#' ph_rew_obj = reward_phase_type(ph, rowSums(rew_mat))
#' bS = ph_rew_obj$subint_mat
#' bM = solve(diag(dim(bS)[1])-(2/theta)*bS)
#' DPHrep(ba,bM)
#'
#'
#' @export
DPHrep <- function(bM,bA,ba) {
m = nrow(bA) #this is p in the paper
sizes_of_blocks = rep(0,m) #obtain the sizes of the blocks using formula XX
for(i in 1:m) {
sizes_of_blocks[i]=max(ba*(bA[i,] > 0))
}
bMt = matrix(0,sum(sizes_of_blocks),sum(sizes_of_blocks)) #bold-Mtilde
for(i in 1:m) {
for(j in 1:m) {
if(i <= j) { #off-diagonal blocks
bmvec = rep(0,sizes_of_blocks[j])
# The bottom row is calculated using formula DD
for(k in 1:sizes_of_blocks[j]) {
bmvec[sizes_of_blocks[j]-k+1]=sum(bA[j,]*(ba == k))
}
bmvec = bM[i,j]*bmvec/sum(bmvec)
cur_i = sum(sizes_of_blocks[1:i])
if(j == 1) {
cur_j = 1
}
else {
cur_j = sum(sizes_of_blocks[1:(j-1)]) + 1
}
bMt[cur_i,cur_j:(cur_j+sizes_of_blocks[j]-1)] = bmvec
}
# The diagonal-blocks are treated as a separate case
if((i == j) && sizes_of_blocks[i] > 1) {
size_of_current_block = sizes_of_blocks[i]
cur_i = sum(sizes_of_blocks[1:i]) - size_of_current_block + 1
cur_j = sum(sizes_of_blocks[1:j]) - size_of_current_block + 2
bMt[cur_i:(cur_i + size_of_current_block - 2),cur_j:(cur_j + size_of_current_block - 2)] = diag(size_of_current_block-1) #add identity-matrix of appropriate size
}
}
}
return(list(bMt,sizes_of_blocks))
}
#' Rate matrix and state space of the block counting process
#'
#'
#' \code{RateMatAndStateSpace} finds the state space and corresponding rate matrix
#' for the block counting process for a number of samples n in the
#' standard coalescent.
#'
#' @param n Number of samples
#'
#' @return List consisting of
#' RateM: Rate matrix
#' StSpM: Matrix with rows corresponding to the states
#' A state is a n-dimensional row vector (a1,...,an).
#' For example the beginning state is (n,0,...,0),
#' the next state is (n-2,1,0,...,0),
#' and the ending state is (0,...,0,1)
#'
#' @examples
#' RateMAndStateSpace(8)
#'
#'
#'
#' @export
RateMAndStateSpace <- function(n){
##----------------------------------------------------
## State space
##----------------------------------------------------
## Size of the state space (number of states)
nSt <- partitions::P(n)
## Definition of the state space
StSpM <- matrix(ncol=n,nrow=nSt)
## Set of partitions of [n]
x <- partitions::parts(n)
## Rewriting the partitions as (a1,...,an)
for (i in 1:nSt) {
st <- x[,i]
StSpM[i,] <- tabulate(x[,i],nbins=n)
}
## Reordering
StSpM <- StSpM[order(rowSums(StSpM),decreasing=TRUE),]
## Because of this ordering we can't 'go back', i.e.
## below the diagonal the entries are always zero
##----------------------------------------------------
## Intensity matrix
##----------------------------------------------------
RateM <- matrix(0,ncol=nSt,nrow=nSt)
## Following Algorithm 4.2
for (i in 1:(nSt-1)){
for (j in (i+1):nSt){
# cat(i," state i",StSpM[i,])
# cat(" ",j," state j",StSpM[j,])
cvec <- StSpM[i,]-StSpM[j,]
# cat(" cvec",cvec)
## Two branches are merged, i.e. removed from state i
check1 <- sum(cvec[cvec>0])==2
# cat(" check1",check1)
## One new branch is created, i.e. added in state from j
check2 <- sum(cvec[cvec<0])==-1
# cat(" check2",check2)
if (check1 & check2){
## Size(s) of the block(s) and the corresponding rates
tmp <- StSpM[i,which(cvec>0)]
RateM[i,j] <- ifelse(length(tmp)==1,tmp*(tmp-1)/2,prod(tmp))
}
}
}
## Diagonal part of the rate matrix
for (i in 1:nSt){
RateM[i,i] <- -sum(RateM[i,])
}
return(list(RateM=RateM,StSpM=StSpM))
}
#' \code{block_counting_process} return a the block counting process for a given sample size
#' as a \code{mult_cont_phase_type} object.
#'
#' @param n Number of samples
#'
#' @return ph_rew_ob
#' A \code{mult_cont_phase_type} representation of the block counting process of size n
#'
#' @examples
#' block_counting_process(8)
#'
#' @export
block_counting_process <- function(n){
RMASS = RateMAndStateSpace(n)
m = dim(RMASS$RateM)[1] #(m should be equal to partitions::P(n))
# Obtain subintensity matrix
ph = PH(RMASS$RateM[1:(m-1),1:(m-1)])
# The reward matrix is the state space matrix of the block counting process, except the row & column related to the absorbing state.
rew_mat = RMASS$StSpM[1:(m-1),1:(n-1)]
ph_rew_obj = MPH(ph$subint_mat, ph$init_probs, reward_mat = rew_mat)
return(ph_rew_obj)
}
## ----setup, message=FALSE, warning=FALSE-------------------------------------
set.seed(0)
library(PhaseTypeR)
library(expm)
# source('auxiliary_functions.R')
## ---- message=FALSE, warning=FALSE--------------------------------------------
n = 10
# Generate the block counting process for n = 10 as a
# phase-type distribution. The resulting object will be of class
# "mult_cont_phase_type" with subintensity matrix "subint_mat"
# and state space/reward matrix "reward_mat".
ph_bcp <- block_counting_process(n)
bmu <- mean(ph_bcp) #bmu = bold mu
bSigma <- var(ph_bcp) #bSigma = bold Sigma
## ---- message=FALSE, warning=FALSE--------------------------------------------
thetaVec <- c(0.1,1,5,10,100) #Vector theta values for Figure 1
bv <- 1/(1:(n-1)) #The vector bold v defined above
##BLUE Estimators
coef_matrix = matrix(0,length(thetaVec),n-1)
for(i in 1:length(thetaVec)) {
theta = thetaVec[i]
bLambda=(theta/2)^2*bSigma + (theta/2)*diag( bmu )
coef_matrix[i,]=solve(bLambda)%*%bv/c(bv%*%solve(bLambda)%*%bv)
}
## Watterson's estimator
xWatt <- rep(1,length(bv))/sum(bv)
##-------------------------------------------------------------
## Singleton estimator
xsngltns <- c(1,rep(0,(length(bv)-1)))
##-------------------------------------------------------------
## Pairwise difference estimator
xpair <- ( 1:(n-1) )*( (n-1):1 )/n/(n-1)*2
##-------------------------------------------------------------
## H estimator
xH <- ( 1:(n-1) )^2 *2/n/(n-1)
##-------------------------------------------------------------
## L estimator
xL <- ( 1:(n-1) )/(n-1)
##---------------------------------------------------------------
## Plot the coefficients of the 5 different estimators (W,S,P,H,L)
plot(1:(n-1),xWatt,ylim=c(-0.5,2),col="black",lwd=2,type="l",xlim=c(1,n),
xlab=bquote(i),ylab=bquote(c[i]),cex.lab=1.5,lty=2,las=1,cex.axis=1.5)
abline(v=1:9,col="gray")
points(1:(n-1),xsngltns,col="black",type="l",lty=3,lwd=2)
points(1:(n-1),xpair,col="black",type="l",lty=4,lwd=2)
points(1:(n-1),xH,col="black",type="l",lty=5,lwd=2)
points(1:(n-1),xL,col="black",type="l",lty=6,lwd=2)
for (i in 1:length(thetaVec)){
xhat = coef_matrix[i,]
points(1:(n-1),xhat,type="l",lwd=2)
text(n-1,xhat[n-1],bquote(theta==.(thetaVec[i])),pos=4,cex=1.0)
}
txtVec <- c("Watterson","Singleton","Pairwise","H","L","BLUE")
ltyVec <- c(2,3,4,5,6,1)
indx <- c(4,5,1,3,2,6)
legend(2,2,txtVec[indx],lty=ltyVec[indx],lwd=2,bty="n",cex=1.2)
## -----------------------------------------------------------------------------
thetaVec <- seq(0.01,2.5,by=0.1)
ntheta <- length(thetaVec)
vrW <- rep(0,ntheta) ; vrS <- rep(0,ntheta) ; vrP <- rep(0,ntheta)
vrH <- rep(0,ntheta) ; vrL <- rep(0,ntheta) ; vrMVUE <- rep(0,ntheta)
for (i in 1:ntheta){
tht <- thetaVec[i]
bLambda=(tht^2/4)*bSigma + (tht/2)*diag( bmu )
xhat <- (solve(bLambda) %*% bv)/as.numeric(bv %*% solve(bLambda) %*% bv)
vrMVUE[i] <- t(xhat) %*% bLambda %*% xhat
vrW[i] <- t(xWatt) %*% bLambda %*% xWatt
vrS[i] <- t(xsngltns) %*% bLambda %*% xsngltns
vrP[i] <- t(xpair) %*% bLambda %*% xpair
vrH[i] <- t(xH) %*% bLambda %*% xH
vrL[i] <- t(xL) %*% bLambda %*% xL
}
plot(thetaVec,vrMVUE,type="l",lty=1,lwd=2,xlim=c(0,max(thetaVec)),
xlab=bquote(theta),ylab="Variance for estimator",cex.lab=1.5,las=1,cex.axis=1.5)
points(thetaVec,vrW,type="l",lty=2,lwd=2)
points(thetaVec,vrS,type="l",lty=3,lwd=2)
points(thetaVec,vrP,type="l",lty=4,lwd=2)
points(thetaVec,vrH,type="l",lty=5,lwd=2)
points(thetaVec,vrL,type="l",lty=6,lwd=2)
indx <- c(4,5,2,3,1,6)
legend("topleft",txtVec[indx],lty=ltyVec[indx],lwd=2,bty="n",cex=1.2)
## ---- warning=FALSE-----------------------------------------------------------
n <- 4
# create rate-matrix and state space for block counting process
ph_bcp <- block_counting_process(n)
# Obtain sub-intensity matrix
subintensity_matrix <- ph_bcp$subint_mat
rew_mat <- ph_bcp$reward_mat
## ---- warning=FALSE-----------------------------------------------------------
R <- 1e4
ph_mv_sim_obj <- t(rMPH(R,ph_bcp))
lambda <- 0.5 #lambda = theta/2
ph_counts = matrix(0,dim(ph_mv_sim_obj)[2],dim(ph_mv_sim_obj)[1])
for(i in 1:R) {
ph_counts[i,] = rpois(n-1,lambda*ph_mv_sim_obj[,i])
}
## -----------------------------------------------------------------------------
bc = (2*((1:(n-1))*( (n-1):1))/(n*(n-1)) - 1/sum(1/(1:(n-1))))
## -----------------------------------------------------------------------------
res <- 1000
bc <- res*bc
## ---- warning=FALSE-----------------------------------------------------------
themean <- sum(mean(ph_bcp)*bc) #the mean of the linear combination (should be 0)
bT <- subintensity_matrix #bold T
bA <- rew_mat #bold A
bS <- bT - lambda*diag(rowSums(bA)) #bold S
be <- matrix(1,dim(bT)[1],1) #be = (1,1,...,1)^T
beone <- matrix(0,1,dim(bT)[1]);beone[1]=1 #(1,0,...,0)
phi <- function(t) (exp(-1i*themean*t))*beone%*%solve(bS+lambda*diag(c(bA%*%(exp(1i*t)^bc))) )%*%bT%*%be
## ---- warning=FALSE-----------------------------------------------------------
appvals <- ApproxCDF(phi,H = 1e5,eta=0.0001,xlim=c(-0.5*res,1.5*res),smoothe=TRUE)
xvals <- appvals[[1]]
yvals <- appvals[[2]]
#rescale
bc2 <- (1/res)*bc
xvals2 <- (1/res)*xvals
themean2 <- (1/res)*themean
centered_sim2 <- ph_counts%*%bc2-c(themean2)
ecdfobj2 <- ecdf(centered_sim2)
plot(xvals2,yvals,type="l",ylim=c(0,1),xlim=c(-1,1.5),xlab="",las=1,ylab="Cumulative distribution function",cex.axis=1.5,cex.lab=1.5)
lines(ecdfobj2,col="blue",col.01line = NULL)
lines(xvals2,yvals,lwd=2)
segments(-1.25, 0.025, x1 = xvals2[min(which(0.025<yvals))], y1 = 0.025,lty = 2)
segments(xvals2[min(which(0.025<yvals))], -1, x1 = xvals2[min(which(0.025<yvals))], y1 = 0.025,lty = 2)
segments(-1.25, 0.975, x1 = xvals2[max(which(0.975>yvals))], y1 = 0.975,lty = 2)
segments(xvals2[min(which(0.975<yvals))], -1, x1 = xvals2[max(which(0.975>yvals))], y1 = 0.975,lty = 2)
segments(xvals2[min(which(0.025<yvals))], 0, x1 = xvals2[max(which(0.975>yvals))], y1 = 0,lwd = 3)
points(xvals2[min(which(0.025<yvals))],-0.035,pch=17)
points(xvals2[max(which(0.975>yvals))],-0.035,pch=17)
## ----echo = FALSE, warning=FALSE----------------------------------------------
#Define reward process
n <- 8
# create rate-matrix and state space for block counting process
ph_bcp <- block_counting_process(n)
# Obtain sub-intensity matrix
subintensity_matrix <- ph_bcp$subint_mat
rew_mat <- ph_bcp$reward_mat
be <- matrix(1,dim(bT)[1],1)
alpha <- matrix(0,1,dim(bT)[1]);alpha[1]=1
ph_mv_sim_obj <- t(rMPH(R,ph_bcp))
rew_dim <- dim(ph_mv_sim_obj)[1]
ph_counts <- matrix(0,dim(ph_mv_sim_obj)[2],dim(ph_mv_sim_obj)[1])
for(i in 1:R) {
ph_counts[i,] <- rpois(n-1,lambda*ph_mv_sim_obj[,i])
}
res <- 1000
bc <- res*(2*((1:(n-1))*( (n-1):1))/(n*(n-1)) - 1/sum(1/(1:(n-1))))
#compute characteristic function
lambda <- 0.5
themean <- sum(mean(ph_bcp)*bc) #the mean of the linear combination
bT <- subintensity_matrix #bold T
bR <- rew_mat #bold R
bS <- bT - lambda*diag(rowSums(bR)) #bold S
be <- matrix(1,dim(bT)[1],1) #bold one = (1,1,...,1)^T
beone <- matrix(0,1,dim(bT)[1]);beone[1]=1 #(1,0,...,0)
phi <- function(t) (exp(-1i*themean*t))*beone%*%solve(bS+lambda*diag(c(bR%*%(exp(1i*t)^bc))) )%*%bT%*%be
#Invert numerically
appvals <- ApproxCDF(phi,H = 1e5,eta=0.0001,xlim=c(-1*res,2*res),smoothe=TRUE)
xvals <- appvals[[1]]
yvals <- appvals[[2]]
bc2 <- (1/res)*bc
xvals2 <- (1/res)*xvals
themean2 <- (1/res)*themean
centered_sim2 <- ph_counts%*%bc2-c(themean2)
ecdfobj2 <- ecdf(centered_sim2)
plot(xvals2,yvals,type="l",ylim=c(0,1),xlim=c(-1,1.5),xlab="",ylab="Cumulative distribution function",cex.lab=1.5,las = 1,cex.axis=1.5)
lines(ecdfobj2,col="blue",col.01line = NULL)
lines(xvals2,yvals,lwd=2)
segments(-1, 0.025, x1 = xvals2[min(which(0.025<yvals))], y1 = 0.025,lty = 2)
segments(xvals2[min(which(0.025<yvals))], 0, x1 = xvals2[min(which(0.025<yvals))], y1 = 0.025,lty = 2)
segments(-1, 0.975, x1 = xvals2[max(which(0.975>yvals))], y1 = 0.975,lty = 2)
segments(xvals2[min(which(0.975<yvals))], 0, x1 = xvals2[max(which(0.975>yvals))], y1 = 0.975,lty = 2)
segments(xvals2[min(which(0.025<yvals))], 0, x1 = xvals2[max(which(0.975>yvals))], y1 = 0,lwd = 3)
points(xvals2[min(which(0.025<yvals))],-0.035,pch=17)
points(xvals2[max(which(0.975>yvals))],-0.035,pch=17)
## ---- warning=FALSE-----------------------------------------------------------
n <- 5
ph_bcp <- block_counting_process(n)
subintensity_matrix <- ph_bcp$subint_mat
rew_mat <- ph_bcp$reward_mat
## -----------------------------------------------------------------------------
rew_mat
## ---- warning=FALSE-----------------------------------------------------------
ph = PH(subintensity_matrix)
ph_rew_obj <- reward_phase_type(ph, rew_mat[,1])
## -----------------------------------------------------------------------------
plot(1, type="n",xlim=c(0,4),ylim=c(0,1),
xlab="t",ylab="Cumulative distribution function",cex.lab=1.5,
main=bquote("Branch length distributions for i-tons with sample size"~n==.(n)),las=1,cex.axis=1.5,cex.main=1.4)
for(i in 1:(n-1)) {
ph_rew_obj <- reward_phase_type(ph,rew_mat[,i])
be <- matrix(1,length(ph_rew_obj$init_probs),1)
abstime <- function(u) {
1 - ph_rew_obj$init_probs%*%expm(ph_rew_obj$subint_mat*u)%*%be
}
abstime <- Vectorize(abstime)
curve(abstime,lty=i,add=TRUE,lwd=2)
}
legend("bottomright",c("singleton","doubleton","tripleton","quardrupleton"),
lty=1:4,bty="n",cex=1.6,lwd=2)
## -----------------------------------------------------------------------------
plot(1, type="n", xlim=c(0, 4), ylim=c(0, 1),las=1,xlab="Number of mutations",ylab="Probability",
main=bquote("Probability of mutations for"~theta==1),cex.lab=1.5,cex.main=1.4,cex.axis=1.5)
for(i in 1:(n-1)) {
ph_rew_obj=reward_phase_type(ph, rew_mat[,i])
bS <- ph_rew_obj$subint_mat
bM <- solve(diag(dim(bS)[1])-2*bS)
bpi <- ph_rew_obj$init_probs
be <- matrix(1,diag(dim(bS)[1],1))
bm <- be - bM%*%be
probs <- apply(matrix(0:5),1,function(i) bpi%*%(bM%^%i)%*%bm)
probs[1] <- probs[1] + ph_rew_obj$defect #note we have to take the possible defect into account
points(0:5,probs,pch=16)
lines(0:5,probs,lty=i,lwd=2)
}
legend("topright",c("singleton","doubleton","tripleton","quardrupleton"),
lty=1:4,bty="n",cex=1.5,lwd=2)
## ---- warning=FALSE-----------------------------------------------------------
n <- 4
theta <- 1
ph_bcp <- block_counting_process(n)
subintensity_matrix <- ph_bcp$subint_mat
rew_mat <- ph_bcp$reward_mat
ph = PH(subintensity_matrix)
# The reward vector is the rows sums of the state space matrix
ph_rew_obj <- reward_phase_type(ph, rowSums(rew_mat))
bS <- ph_rew_obj$subint_mat
bM <- solve(diag(dim(bS)[1])-(2/theta)*bS)
## -----------------------------------------------------------------------------
baMat <- matrix(c(1:(n-1),(1:(n-1))*((n-1):1),(1:(n-1))^2),n-1,n-1,byrow=TRUE)
baMat
## ---- warning=FALSE-----------------------------------------------------------
len <- n*(n-1)+1
probsMat <- matrix(0,3,len)
for(i_outer in 1:3) {
ba <- baMat[i_outer,] #This is the current a-vector
DPH_obj <- DPHrep(bM,ph_bcp$reward_mat,ba)
bMt <- DPH_obj[[1]]
sizes_of_blocks <- DPH_obj[[2]]
beone <- rep(0,dim(bMt)[1])
beone[sizes_of_blocks[1]] <- 1
be <- matrix(rep(1,dim(bMt)[1]))
bmt <- be - bMt%*%be
probs <- rep(0,len)
for(i in 1:len) {
probs[i] <- beone%*%(bMt%^%(i-1))%*%bmt
}
probsMat[i_outer,] <- probs
}
#Finally, plot the figures
main.txt <- bquote("Integer-weighted SFS distributions for"~n == .(n))
xs <- c(0,(1:(2*(n-1)))/(n-1))
plot(xs,probsMat[1,1:(2*(n-1)+1)],type="l",lty="dashed",pch=19,ylim=c(0,probsMat[1,1]),xlab="Weighted SFS",ylab="Probability",
main=main.txt,cex.main=1.4,cex.lab=1.5,las=1,lwd=2,cex.axis=1.20)
points(xs,probsMat[1,1:(2*(n-1)+1)],type="p",pch=19)
xs <- c(0,(1:((n*(n-1))))/(n*(n-1)/2))
points(xs,probsMat[2,],type="l",lty="dotted",lwd=2)
points(xs,probsMat[2,],type="p",pch=19)
points(xs,probsMat[3,],type="l",lty="solid",lwd=2)
points(xs,probsMat[3,],type="p",pch=19)
abline(v=1,col="gray")
txt <- bquote("mean"~theta==1)
text(1,probsMat[1,1],txt,pos=2,cex=1.5)
legend("topright",c("Pairwise","H","L"),lwd=2,
lty=c("dotted","solid","dashed"),bty="n",cex=1.5)
## ---- warning=FALSE-----------------------------------------------------------
n <- 6
theta <- 1
ph_bcp <- block_counting_process(n)
subintensity_matrix <- ph_bcp$subint_mat
rew_mat <- ph_bcp$reward_mat
ph <- PH(subintensity_matrix)
# The reward vector is the rows sums of the state space matrix
ph_rew_obj <- reward_phase_type(ph, rowSums(rew_mat))
bS <- ph_rew_obj$subint_mat
bM <- solve(diag(dim(bS)[1])-(2/theta)*bS)
baMat <- matrix(c(1:(n-1),(1:(n-1))*((n-1):1),(1:(n-1))^2),n-3,n-1,byrow=TRUE)
len <- n*(n-1)+1
probsMat <- matrix(0,3,len)
for(i_outer in 1:3) {
ba <- baMat[i_outer,] #This is the current a-vector
DPH_obj <- DPHrep(bM,ph_bcp$reward_mat,ba)
bMt <- DPH_obj[[1]]
sizes_of_blocks <- DPH_obj[[2]]
beone <- rep(0,dim(bMt)[1])
beone[sizes_of_blocks[1]] <- 1
be <- matrix(rep(1,dim(bMt)[1]))
bmt <- be - bMt%*%be
probs <- rep(0,len)
for(i in 1:len) {
probs[i] <- beone%*%(bMt%^%(i-1))%*%bmt
}
probsMat[i_outer,] <- probs
}
xs <- c(0,(1:(2*(n-1)))/(n-1))
main.txt <- bquote("Integer-weighted SFS distributions for"~n==.(n))
plot(xs,probsMat[1,1:(2*(n-1)+1)],type="l",lty="dashed",pch=19,ylim=c(0,probsMat[1,1]),xlab="Weighted SFS",ylab="Probability",
main=main.txt,cex.main=1.4,cex.lab=1.5,las=1,lwd=2,cex.axis=1.20)
points(xs,probsMat[1,1:(2*(n-1)+1)],type="p",pch=19)
xs <- c(0,(1:((n*(n-1))))/(n*(n-1)/2))
points(xs,probsMat[2,],type="l",lty="dotted",lwd=2)
points(xs,probsMat[2,],type="p",pch=19)
points(xs,probsMat[3,],type="l",lty="solid",lwd=2)
points(xs,probsMat[3,],type="p",pch=19)
xs <- c(0,(1:(2*(n-1)))/(n-1))
abline(v=1,col="gray")
txt <- bquote("mean"~theta==1)
text(1,probsMat[1,1],txt,pos=2,cex=1.5)
legend("topright",c("Pairwise","H","L"),lwd=2,
lty=c("dotted","solid","dashed"),bty="n",cex=1.5)
## ---- warning=FALSE, error=TRUE-----------------------------------------------
n <- 10
lambda <- 5
ph_bcp <- block_counting_process(n)
set.seed(0)
ph_mv_sim_obj <- t(rMPH(R,ph_bcp))
set.seed(19)
ph_counts <- matrix(0,dim(ph_mv_sim_obj)[2],dim(ph_mv_sim_obj)[1])
for(i in 1:R) {
ph_counts[i,] <- rpois(n-1,lambda*ph_mv_sim_obj[,i])
}
subintensity_matrix <- ph_bcp$subint_mat
rew_mat <- ph_bcp$reward_mat
bc <- coef_matrix[4,] #coef_matrix was generated above, its rows correspond to the entries of thetavec, fourth entry is theta = 10
res <- 1000
bc <- res*bc
#compute characteristic function
themean <- lambda*sum(mean(ph_bcp)*bc) #the mean of the linear combination
bT <- subintensity_matrix #bold T
bA <- rew_mat #bold A
bS <- bT - lambda*diag(rowSums(bA)) #bold S
be <- matrix(1,dim(bT)[1],1) #bold one = (1,1,...,1)^T
beone <- matrix(0,1,dim(bT)[1]);beone[1]=1 #(1,0,...,0)
phi <- function(t) (exp(-1i*themean*t))*beone%*%solve(bS+lambda*diag(c(bA%*%(exp(1i*t)^bc))) )%*%bT%*%be
#Invert numerically
appvals <- ApproxCDF(phi,H = 1e5,eta=0.0001,xlim=c(-10*res,10*res),smoothe=TRUE)
xvals <- appvals[[1]]
yvals <- appvals[[2]]
bc2 <- (1/res)*bc
xvals2 <- (1/res)*xvals
themean2 <- (1/res)*themean
#Compute point probabilities of Watterson's Theta for theta = 10
ph <- PH(ph_bcp$subint_mat)
watter <- reward_phase_type(ph, rowSums(ph_bcp$reward_mat))
lambda <- 5
bM <- solve( diag(dim(watter$subint_mat)[1])-(1/lambda)*watter$subint_mat)
bm <- rowSums(diag(dim(bM)[1]) - bM)
a1 <- 1/sum(1/1:(n-1)) #"normalizing" constant, in Watterson's theta
len <- 250 #number of points to include
out <- rep(0,len)
for(i in 0:(len-1)) {
out[i+1] <- ph$init_probs%*%(bM%^%i)%*%bm
}
wxt <- -10+a1*(0:(len-1)) # Possible values for Watterson's Theta
# Pairwise estimator for theta = 10
ba <- ( 1:(n-1) )*( (n-1):1 ) #Coefficients of pair-wise estimator
DPH_obj <- DPHrep(bM,ph_bcp$reward_mat,ba)
bMt <- DPH_obj[[1]]
sizes_of_blocks <- DPH_obj[[2]]
beone <- rep(0,dim(bMt)[1])
beone[sizes_of_blocks[1]] <- 1
be <- matrix(rep(1,dim(bMt)[1]))
bmt <- be - bMt%*%be
len = 1e3
# Running bMt - the bMt is pretty large, computing its matrix-powers in the naive way as above is time-consuming.
probs2 <- rep(0,len)
run_bMT <- beone
for(i in 1:len) {
probs2[i] <- run_bMT%*%bmt
run_bMT <- run_bMT%*%bMt
}
a1 <- n*(n-1)/2 #Normalizing constant of the pairwise estimator
wx <- -10+(1/a1)*(0:(len-1))
plot(xvals2,yvals,type="l",ylim=c(0,1),xlab="",ylab="Cumulative distribution function",cex.lab=1.5,las = 1,cex.axis=1.5)
lines(stepfun(wx,cumsum(c(0,probs2))),col="red",do.points=FALSE,lwd=2)
lines(xvals2,yvals,type="l",ylim=c(0,1),xlab="",ylab="",lwd=2)
lines(stepfun(wxt,cumsum(c(0,out))),col="blue",do.points=FALSE,lwd=2)
legend("bottomright",c("BLUE","Watterson","Pairwise"),
col=c("black","blue","red"),bty="n",cex=1.6,lwd=2)
## ----Aux_1--------------------------------------------------------------------
#'Numerical Approximation of characteristic function
#'
#'\code{ApproxCDF} approximates the cdf F when given a characteristic function phi of a centered random variable, using the formula found in Waller (1995) with
#'original reference to Bohman (1974). The procedure can be numerically unstable in the tails of the distribution, so
#'only the center of the approximation is returned. Some simplifying approximations explained in "Numerical inversion of Laplace transform and characteristic function"
#'are used. Note that phi should have a finite first moment.
#'
#'@param phi the characteristic function to be inverted
#'@param H A total of 2H+1 values of F are approximated. By default H of these values are returned unless an interval is provided.
#'@param eta A scaling paramter. By default equidistant points in the interval (-2*phi/eta,2*phi/(eta)) are approximated.
#'@param xlim (optional) limits on the x-axis
#'@param smoothe (optional) Should smoothing be used? If TRUE default weights of the function \code{simple_smoothe} are used. If an object of length > 1 is provided,
#'this will be passed to \code{simple_smoothe}
#'
#'@examples
#'phi <- function(t) exp(-t^2/2)
#'appvals=ApproxCDF(phi,H=1000,eta=0.5,xlim=c(-3,3))
#'plot(appvals[[1]],appvals[[2]],type="l",lwd=2)
#'lines(appvals[[1]],pnorm(appvals[[1]]),type="l",col="red")
#'
#'phi <- function(t) sqrt(2)*abs(t)*besselK(sqrt(2)*abs(t),1)
#'appvals=ApproxCDF(phi,H=10000,eta=0.1,xlim=c(-3,3))
#'plot(appvals[[1]],appvals[[2]],type="l",lwd=2)
#'lines(appvals[[1]],pt(appvals[[1]],df=2),type="l",col="red")
#'
#'@export
ApproxCDF = function(phi,H=2000,eta=0.5,xlim=NULL,smoothe=FALSE) {
z_vals = rep(0,H)
co = 1
for(n in 1:(H-1)) {
z_vals[co+1]= phi(eta*n)/(pi*1i*(n)) #start at index 2 - the first value of z is 0
co = co + 1
}
yvals_pos=0.5+(1:H)/H-Re(fft(z_vals,inverse=FALSE))
yvals_neg=0.5-(1:H)/H-Re(fft(z_vals,inverse=TRUE))
xvals_pos = 2*pi*(1:H)/(eta*H)
xvals_neg = -xvals_pos
xvals_neg = rev(xvals_neg)
yvals_neg = rev(yvals_neg)
xvals = c(xvals_neg,xvals_pos)
yvals = c(yvals_neg,yvals_pos)
if(!is.null(xlim)) {
indexes = intersect(which(xvals>xlim[1]),which(xvals<xlim[2]))
}
else {
indexes = (1:(H+1))+floor( (H-1)/2)
}
xvals = xvals[indexes]
yvals = yvals[indexes]
if(smoothe) {
if(length(smoothe)>1) {
yvals = simple_smoothe(yvals,smoothe)
}
else {
yvals = simple_smoothe(yvals)
}
}
return(list(xvals,yvals))
}
## ----Aux_2--------------------------------------------------------------------
#' Simple smoothing
#'
#' \code{simple_smoothe} computes a simple moving weighted average of a input vector \code{x}. The weight vector must have an odd number of entries, and defaults to
#' \code{c(0.1,0.20,0.4,0.20,0.1)}
#'
#' @param x input to be smoothed
#' @param svec smoothing vector
#'
#'@examples
#'smoothed_yvals = simple_smoothe(yvals)
#'smoothed_yvals = simple_smoothe(yvals,c(0.2,0.6,0.2))
#'
#'@export
simple_smoothe <- function(x,svec= c(0.1,0.20,0.4,0.20,0.1)) {
if ((length(svec) %% 2) == 0) {stop("Please provide an odd number of smoothing weigths")}
out = x
offset = floor(length(svec)/2)
for(i in (1+offset):(length(x)-offset)) {
out[i] = sum(x[(i-offset):(i+offset)]*svec)
}
return(out)
}
## ----Aux_3--------------------------------------------------------------------
#' Construction of DPH-representation
#'
#'
#' \code{DPHrep} computes the representation of of a integer-linear-combination of the Site Frequency as a discrete phase-type distribution.
#' The construction is described in Section 7.1 of Hobolth (2020).
#'
#' @param bM Subtransition probabilities un the underlying discrete Markov chain (cf. Figure 6).
#' @param bA Statespace of the underlying block-counting process
#' @param ba Vector of integer coeffcients
#'
#' @return List consisting of
#' bMt: The constructed matrix subtransition probabilities.
#' sizes_of_blocks: The sizes of the constructed blocks.
#'
#' @examples
#' ba = c(1,2,3)
#' ph_bcp = block_counting_process(4)
#' subintensity_matrix = ph_bcp$subint_mat
#' bA = ph_bcp$reward_mat
#' ph = PH(subintensity_matrix)
#' ph_rew_obj = reward_phase_type(ph, rowSums(rew_mat))
#' bS = ph_rew_obj$subint_mat
#' bM = solve(diag(dim(bS)[1])-(2/theta)*bS)
#' DPHrep(ba,bM)
#'
#'
#' @export
DPHrep <- function(bM,bA,ba) {
m = nrow(bA) #this is p in the paper
sizes_of_blocks = rep(0,m) #obtain the sizes of the blocks using formula XX
for(i in 1:m) {
sizes_of_blocks[i]=max(ba*(bA[i,] > 0))
}
bMt = matrix(0,sum(sizes_of_blocks),sum(sizes_of_blocks)) #bold-Mtilde
for(i in 1:m) {
for(j in 1:m) {
if(i <= j) { #off-diagonal blocks
bmvec = rep(0,sizes_of_blocks[j])
# The bottom row is calculated using formula DD
for(k in 1:sizes_of_blocks[j]) {
bmvec[sizes_of_blocks[j]-k+1]=sum(bA[j,]*(ba == k))
}
bmvec = bM[i,j]*bmvec/sum(bmvec)
cur_i = sum(sizes_of_blocks[1:i])
if(j == 1) {
cur_j = 1
}
else {
cur_j = sum(sizes_of_blocks[1:(j-1)]) + 1
}
bMt[cur_i,cur_j:(cur_j+sizes_of_blocks[j]-1)] = bmvec
}
# The diagonal-blocks are treated as a separate case
if((i == j) && sizes_of_blocks[i] > 1) {
size_of_current_block = sizes_of_blocks[i]
cur_i = sum(sizes_of_blocks[1:i]) - size_of_current_block + 1
cur_j = sum(sizes_of_blocks[1:j]) - size_of_current_block + 2
bMt[cur_i:(cur_i + size_of_current_block - 2),cur_j:(cur_j + size_of_current_block - 2)] = diag(size_of_current_block-1) #add identity-matrix of appropriate size
}
}
}
return(list(bMt,sizes_of_blocks))
}
## ----Aux_4--------------------------------------------------------------------
#' Rate matrix and state space of the block counting process
#'
#'
#' \code{RateMatAndStateSpace} finds the state space and corresponding rate matrix
#' for the block counting process for a number of samples n in the
#' standard coalescent.
#'
#' @param n Number of samples
#'
#' @return List consisting of
#' RateM: Rate matrix
#' StSpM: Matrix with rows corresponding to the states
#' A state is a n-dimensional row vector (a1,...,an).
#' For example the beginning state is (n,0,...,0),
#' the next state is (n-2,1,0,...,0),
#' and the ending state is (0,...,0,1)
#'
#' @examples
#' RateMAndStateSpace(8)
#'
#'
#'
#' @export
RateMAndStateSpace <- function(n){
##----------------------------------------------------
## State space
##----------------------------------------------------
## Size of the state space (number of states)
nSt <- partitions::P(n)
## Definition of the state space
StSpM <- matrix(ncol=n,nrow=nSt)
## Set of partitions of [n]
x <- partitions::parts(n)
## Rewriting the partitions as (a1,...,an)
for (i in 1:nSt) {
st <- x[,i]
StSpM[i,] <- tabulate(x[,i],nbins=n)
}
## Reordering
StSpM <- StSpM[order(rowSums(StSpM),decreasing=TRUE),]
## Because of this ordering we can't 'go back', i.e.
## below the diagonal the entries are always zero
##----------------------------------------------------
## Intensity matrix
##----------------------------------------------------
RateM <- matrix(0,ncol=nSt,nrow=nSt)
## Following Algorithm 4.2
for (i in 1:(nSt-1)){
for (j in (i+1):nSt){
# cat(i," state i",StSpM[i,])
# cat(" ",j," state j",StSpM[j,])
cvec <- StSpM[i,]-StSpM[j,]
# cat(" cvec",cvec)
## Two branches are merged, i.e. removed from state i
check1 <- sum(cvec[cvec>0])==2
# cat(" check1",check1)
## One new branch is created, i.e. added in state from j
check2 <- sum(cvec[cvec<0])==-1
# cat(" check2",check2)
if (check1 & check2){
## Size(s) of the block(s) and the corresponding rates
tmp <- StSpM[i,which(cvec>0)]
RateM[i,j] <- ifelse(length(tmp)==1,tmp*(tmp-1)/2,prod(tmp))
}
}
}
## Diagonal part of the rate matrix
for (i in 1:nSt){
RateM[i,i] <- -sum(RateM[i,])
}
return(list(RateM=RateM,StSpM=StSpM))
}
## ----Aux_5--------------------------------------------------------------------
#' \code{block_counting_process} return a the block counting process for a given sample size
#' as a \code{mult_cont_phase_type} object.
#'
#' @param n Number of samples
#'
#' @return ph_rew_ob
#' A \code{mult_cont_phase_type} representation of the block counting process of size n
#'
#' @examples
#' block_counting_process(8)
#'
#' @export
block_counting_process <- function(n){
RMASS = RateMAndStateSpace(n)
m = dim(RMASS$RateM)[1] #(m should be equal to partitions::P(n))
# Obtain subintensity matrix
ph = PH(RMASS$RateM[1:(m-1),1:(m-1)])
# The reward matrix is the state space matrix of the block counting process, except the row & column related to the absorbing state.
rew_mat = RMASS$StSpM[1:(m-1),1:(n-1)]
ph_rew_obj = MPH(ph$subint_mat, ph$init_probs, reward_mat = rew_mat)
return(ph_rew_obj)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.