We first set the stage.

library(tidyverse)
library(HotBirdHMM)
set.seed(1723)

Step 1: sampling

We consider for the moment parameters appropriate for human populations i.e.

Ne <- 1e4
r <- 1e-8
m <- 1.2e-8
rho <- 4 * Ne * r
theta <- 2 * Ne * m

M <- 10
L <- 2e7

We (for memmory resons) may prefer to consider a shorter segment with a number of events on the same order of magnitude:

convenience_factor <- 20
L <- L / convenience_factor
rho <- rho * convenience_factor
theta <- theta * convenience_factor
sim <- sample_XandY(rho,theta,M,L)
#X <- sim$X
#Y <- sim$Y
#rm(sim) #halve memmory use by removing the sim-object
sim$Index <- 1:L
head(sim)

Step 2: rudimentary analysis:

2.1 Basic exploration

print(count_changepoints(sim$X))
print(sum(sim$Y))

Generating a basic plot:

plt <- ggplot(data = sim) +
  geom_line(mapping = aes(x = Index, y = X), linetype = 1, color = 'gray') +
  geom_point(data = filter(sim, Y == 1),mapping = aes(x = Index, y = Y*X), size = 1, shape = '|') +
  scale_y_continuous(breaks = round(seq(1,M,length.out = min(M,M))), limits = c(0,M))+
  scale_x_continuous(breaks = round(seq(0,L,length.out = min(21,L+1))), limits = c(0,L))+
  theme_minimal(base_size = 11) +
  theme(panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = -45,vjust = 1, hjust = 0),
        axis.line = element_line(size = 0.3, linetype = "solid")) +
  ggtitle('Evolution of HMM', subtitle = 'Ticks indicate emissions/mutations') +
  xlab('Position') +
  ylab('Hidden state')
print(plt)

2.2 Maximum likelihood estimation (of $\rho$)

We examine the recombination rate across two windows: one with a recombination point in it, and one without.

win1 <- 1:1e4
win2 <- 6e4:7e4

#see what is going on in each window
print(c(count_changepoints(sim$X[win1]), sum(sim$Y[win1])))
print(c(count_changepoints(sim$X[win2]), sum(sim$Y[win2])))
theta_global_estimate <- sum(sim$Y == 1)/L #theta-value to be used in ML-estimation
ML_rho_1 <- compute_ML_rho(sim$Y[win1],M,theta = theta_global_estimate)
ML_rho_2 <- compute_ML_rho(sim$Y[win2],M,theta = theta_global_estimate)

We plot the likelihood functions in either window

print(ML_rho_1)
print(ML_rho_2)
rhos1 <- seq(0, 2*ML_rho_1,length.out = 21)
rhos2 <- seq(0, 2*ML_rho_2,length.out = 21)

f1 <- function(x) log_likelihood_Y(sim$Y[win1],M,rho = x, theta = theta_global_estimate)
f2 <- function(x) log_likelihood_Y(sim$Y[win2],M,rho = x, theta = theta_global_estimate)

logL1 <- sapply(X = rhos1, FUN = f1)
logL2 <- sapply(X = rhos2, FUN = f2)
pl_lik1 <- qplot(x = rhos1, y = logL1, geom = 'line')
print(pl_lik1)
pl_lik2 <- qplot(x = rhos2, y = logL2, geom = 'line')
print(pl_lik2)


Cronjaeger/HotBirdHMM documentation built on May 9, 2019, 6:01 p.m.