WHhowto.R
In DCLEAR: Distance Based Cell Lineage Reconstruction

## ----knitr_options, echo=FALSE, results=FALSE---------------------------------
library(knitr)
opts_chunk$set(fig.width = 12)

## ----loading, include=FALSE---------------------------------------------------
library(DCLEAR)
library(phangorn)
library(dplyr)
library(purrr)
library(ape)


## ----simulate1----------------------------------------------------------------
m = 30  # number of targets
acell = as.integer(rep(1,m)) ## mother cell with an unmutated state
mu_d = 0.1  ## mutation rate (ex 0.01 to 0.3)
d = 3  ## number of cell division
n_s = 5 ##  the number of possible mutational outcomes (1:n_s)
p_d = 0.05 ## dropout probability

nmstrings = c( '0', '-', LETTERS[1:n_s] ) 
sim_tree = list()
sim_tree[[1]] = acell

k = 2
while(k < 2^d) {
    ## codes for point mutation
    mother_cell = sim_tree[[k%/%2]]
    mu_loc = runif(m) < mu_d
    mutation_cites = (mother_cell == 1) &  mu_loc
    n_mut = sum(mutation_cites)
    if (n_mut > 0) {
        mother_cell[mutation_cites] = as.integer(sample(n_s, n_mut, replace = T)+2)
    }
    
    ## codes for dropout
    dropout_cites = runif(m) < p_d
    if (sum(dropout_cites) > 2 ) {
        dropout_between = sample(which(dropout_cites), 2 )
        mother_cell[dropout_between[1]:dropout_between[2]] = as.integer(2)
    }
    
    sim_tree[[k]] = mother_cell
    k = k+1
}

## ----simulate2----------------------------------------------------------------
1:7 %>% map(~paste(nmstrings[sim_tree[[.]]], collapse=""))

## ----parameters---------------------------------------------------------------
set.seed(1)
mu_d1 = c( 30, 20, 10, 5, 5, 1, 0.01, 0.001)
mu_d1 = mu_d1/sum(mu_d1)
simn = 100 # number of cell samples
m = 200  ## number of targets
mu_d = 0.03 # mutation rate
d = 12 # number of cell division
p_d = 0.005 # dropout probability

## ----simulatedata-------------------------------------------------------------
sD = sim_seqdata(sim_n = simn, m = m, mu_d = mu_d, d = d, n_s = length(mu_d1), outcome_prob = mu_d1, p_d = p_d )

## ----simulatedata_seq---------------------------------------------------------
sD$seqs

## ----simulatedata_tree--------------------------------------------------------
class(sD$tree)

## ----hamming------------------------------------------------------------------
distH = dist.hamming(sD$seqs)

## ----treeconst----------------------------------------------------------------
TreeNJ = NJ(distH)
TreeFM = fastme.ols(distH)

## ----eval---------------------------------------------------------------------
print( RF.dist(TreeNJ, sD$tree, normalize = TRUE) )
print( RF.dist(TreeFM, sD$tree, normalize = TRUE) )

## ----initial_wh1--------------------------------------------------------------
InfoW = -log(mu_d1)  ## entropy as their weights
InfoW[1:2] = 1   ## weight for initial state is 1 , weight for '-' is set to 1. 
InfoW[3:7] = 4.5 ## Constant weight for outcome states
dist_wh1 = WH(sD$seqs, InfoW)
#dist_wh1 = dist_weighted_hamming(sD$seqs, InfoW, dropout = FALSE)


## ----treeconst_wh1------------------------------------------------------------
TreeNJ_wh1 = NJ(dist_wh1)
TreeFM_wh1 = fastme.ols(dist_wh1)

## ----eval_wh1-----------------------------------------------------------------
print( RF.dist(TreeNJ_wh1, sD$tree, normalize = TRUE) )
print( RF.dist(TreeFM_wh1, sD$tree, normalize = TRUE) )

## ----initial_wh2--------------------------------------------------------------
InfoW = -log(mu_d1)  ## entropy as their weights
InfoW[1] = 1  # weight for initial state is 1
InfoW[2] = 12  # weight for interval dropout, '----', is set to 12. 
InfoW[3:7] = 3 # Constant weight for outcome states
dist_wh2 = WH(sD$seqs, InfoW, dropout=TRUE)

## ----treeconst_wh2------------------------------------------------------------
TreeNJ_wh2 = NJ(dist_wh2)
TreeFM_wh2 = fastme.ols(dist_wh2)

## ----eval_wh2-----------------------------------------------------------------
print( RF.dist(TreeNJ_wh2, sD$tree, normalize = TRUE) )
print( RF.dist(TreeFM_wh2, sD$tree, normalize = TRUE) )