We first set the stage.
library(tidyverse) library(HotBirdHMM) set.seed(1723)
We consider for the moment parameters appropriate for human populations i.e.
Ne <- 1e4 r <- 1e-8 m <- 1.2e-8 rho <- 4 * Ne * r theta <- 2 * Ne * m M <- 10 L <- 2e7
We (for memmory resons) may prefer to consider a shorter segment with a number of events on the same order of magnitude:
convenience_factor <- 20 L <- L / convenience_factor rho <- rho * convenience_factor theta <- theta * convenience_factor
sim <- sample_XandY(rho,theta,M,L) #X <- sim$X #Y <- sim$Y #rm(sim) #halve memmory use by removing the sim-object sim$Index <- 1:L head(sim)
print(count_changepoints(sim$X)) print(sum(sim$Y))
Generating a basic plot:
plt <- ggplot(data = sim) + geom_line(mapping = aes(x = Index, y = X), linetype = 1, color = 'gray') + geom_point(data = filter(sim, Y == 1),mapping = aes(x = Index, y = Y*X), size = 1, shape = '|') + scale_y_continuous(breaks = round(seq(1,M,length.out = min(M,M))), limits = c(0,M))+ scale_x_continuous(breaks = round(seq(0,L,length.out = min(21,L+1))), limits = c(0,L))+ theme_minimal(base_size = 11) + theme(panel.grid.minor = element_blank(), axis.text.x = element_text(angle = -45,vjust = 1, hjust = 0), axis.line = element_line(size = 0.3, linetype = "solid")) + ggtitle('Evolution of HMM', subtitle = 'Ticks indicate emissions/mutations') + xlab('Position') + ylab('Hidden state') print(plt)
We examine the recombination rate across two windows: one with a recombination point in it, and one without.
win1 <- 1:1e4 win2 <- 6e4:7e4 #see what is going on in each window print(c(count_changepoints(sim$X[win1]), sum(sim$Y[win1]))) print(c(count_changepoints(sim$X[win2]), sum(sim$Y[win2])))
theta_global_estimate <- sum(sim$Y == 1)/L #theta-value to be used in ML-estimation ML_rho_1 <- compute_ML_rho(sim$Y[win1],M,theta = theta_global_estimate) ML_rho_2 <- compute_ML_rho(sim$Y[win2],M,theta = theta_global_estimate)
We plot the likelihood functions in either window
print(ML_rho_1) print(ML_rho_2) rhos1 <- seq(0, 2*ML_rho_1,length.out = 21) rhos2 <- seq(0, 2*ML_rho_2,length.out = 21) f1 <- function(x) log_likelihood_Y(sim$Y[win1],M,rho = x, theta = theta_global_estimate) f2 <- function(x) log_likelihood_Y(sim$Y[win2],M,rho = x, theta = theta_global_estimate) logL1 <- sapply(X = rhos1, FUN = f1) logL2 <- sapply(X = rhos2, FUN = f2)
pl_lik1 <- qplot(x = rhos1, y = logL1, geom = 'line') print(pl_lik1)
pl_lik2 <- qplot(x = rhos2, y = logL2, geom = 'line') print(pl_lik2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.