## Yuanlong LIU
## 12/04/2018
## 23/05/2018
## 29/05/2018
## THINGS LEFT TO DO (MUST)
## 1. CHANGE A_final / ws. A_final should be A_whole, in if(adjust_segmentss_topdom==TRUE) segmentss = segmentss_adjust_topdom(A_final, segmentss, n2one, ws=5:20)
## A should be already named
## A should be symmetric
## Does not remove 0-rows/columns of A
HRG_MLE_each_compartment_fun <- function(pA_sym, seg, allowed_max_nbins_seq=500:1000, max_nbins_fine=max(allowed_max_nbins_seq), ncores, res_dir, fast, adjust_segmentss_topdom=TRUE)
{
min_n_bins_outer=2
min_n_bins_inner=2
distr = 'lnorm'
A = pA_sym[seg, seg]
if( is.null(rownames(A)) | is.null(colnames(A))) stop('A should be named by the bin indices')
arg_list = as.list(environment())
res_folder = file.path(res_dir)
dir.create(res_folder, recursive=TRUE, showWarnings = TRUE)
total_execution_time_file = file.path(res_dir, 'total_execution.time')
cat('n_core used:', ncores, '\n\n', file=total_execution_time_file, append=FALSE)
time_begin = Sys.time()
cat('Execution begins:', as.character(time_begin), '\n', file=total_execution_time_file, append=TRUE)
## max_nbins is defined as the one from allowed_max_nbins_seq has the smallest residue, from which, the smallest is taken
n_A = nrow(A)
## IF THE SEGMENT IS ALREADY SMALL ENOUGH
if( n_A <= max(allowed_max_nbins_seq) )
{
## This will be modified
{
dists = (2*min_n_bins_outer - 1):(nrow(A)-1)
counts = sapply(dists, function(v) n_cells2compute( A, v, min_n_bins_outer ))
split_info = split2BanlancedChunks_min_max(vec=counts, K=min(length(counts)-1, ncores))
chunks = c(dists[1]-1, dists[split_info$break_points])
res_outer = HRG_core( A=A, chunks=chunks, res_folder=NULL, min_n_bins=min_n_bins_outer, distr=distr, fast=fast )
}
## this part gets the hi_tree
{
hi_tree = get_tree_v0( res_outer )
V(hi_tree)$left_rescaled = V(hi_tree)$left
V(hi_tree)$right_rescaled = V(hi_tree)$right
V(hi_tree)$width_rescaled = V(hi_tree)$right - V(hi_tree)$left + 1
V(hi_tree)$name = paste('(',V(hi_tree)$left_rescaled, ',', V(hi_tree)$right_rescaled, ')', sep='')
}
full_tree = hi_tree
res_info = list( arg_list=arg_list, trunk=NULL, segmentss_nadj=NULL, segmentss=NULL, res_outer=NULL, res_inner=list(res_outer) )
res_info$full_tree = full_tree
res_folder_final = file.path(res_dir, 'final')
dir.create(res_folder_final, recursive=TRUE, showWarnings = TRUE)
save(res_info, file=file.path(res_folder_final, 'res_info.Rdata'))
time_finish = Sys.time()
cat('Execution finishes:', as.character(time_finish), '\n\n', file=total_execution_time_file, append=TRUE)
cat('Total execution time:', capture.output( time_finish - time_begin ), '\n\n', file=total_execution_time_file, append=TRUE)
return(res_info)
}
## which one leads to the least residue
max_nbins = allowed_max_nbins_seq[ min(which(n_A%%allowed_max_nbins_seq==min(n_A%%allowed_max_nbins_seq))) ]
residue = n_A %% max_nbins
residue_mat_info = get_least_residue_matrix(A, max_nbins=max_nbins, allowed_shifts=0)
A_final = residue_mat_info$A_final
n2one = residue_mat_info$n2one
## A_low has no row/col name
A_low = HighResolution2Low_k( A_final, k=max_nbins ) ## k is the dimension of A_final_low
## Starting from here, the original row/col names are no longer used
## this part run HRG_core on the outer part
{
dists = (2*min_n_bins_outer - 1):(nrow(A_low)-1)
counts = sapply(dists, function(v) n_cells2compute( A_low, v, min_n_bins_outer ))
split_info = split2BanlancedChunks_min_max(vec=counts, K=min(length(counts)-1, ncores))
chunks = c(dists[1]-1, dists[split_info$break_points])
res_outer = HRG_core( A=A_low, chunks=chunks, res_folder=NULL, min_n_bins=min_n_bins_outer, distr=distr, fast=fast )
}
## this part gets the hi_tree
{
hi_tree = get_tree_v0( res_outer )
V(hi_tree)$left_rescaled = n2one*(V(hi_tree)$left - 1) + 1
V(hi_tree)$right_rescaled = n2one*V(hi_tree)$right
node2adjust = which(V(hi_tree)$right_rescaled==max(V(hi_tree)$right_rescaled)) ## append the residue bins here
V(hi_tree)[node2adjust]$right_rescaled = n_A
V(hi_tree)$width_rescaled = V(hi_tree)$right_rescaled - V(hi_tree)$left_rescaled + 1
V(hi_tree)$name = paste('(',V(hi_tree)$left_rescaled, ',', V(hi_tree)$right_rescaled, ')', sep='')
}
# segmentss = get_segments(hi_tree, binsize_thresh=max_nbins_bp)
segmentss_nadj = get_segments(hi_tree, binsize_thresh=max_nbins_fine)
## THIS PARAT ADJUST THE segmentss BASED ON TOPDOM BIN_SIGNAL
## 14-05-2018
## Also adjust the tree
if(adjust_segmentss_topdom==TRUE)
{
shift = seg[1]-1
segmentss = segmentss_adjust_topdom(pA_sym, segmentss_nadj+shift, n2one, ws=5:20) - shift
for( i in 1:nrow(segmentss) )
{
index_left2adj = which(V(hi_tree)$left_rescaled == segmentss_nadj[i,1])
index_right2adj = which(V(hi_tree)$right_rescaled == segmentss_nadj[i,2])
V(hi_tree)[index_left2adj]$left_rescaled = segmentss[i,1]
V(hi_tree)[index_right2adj]$right_rescaled = segmentss[i,2]
V(hi_tree)$width_rescaled = V(hi_tree)$right_rescaled - V(hi_tree)$left_rescaled + 1
V(hi_tree)$name = paste('(',V(hi_tree)$left_rescaled, ',', V(hi_tree)$right_rescaled, ')', sep='')
}
}
trunk = hi_tree
# save(segmentss, file=file.path(res_dir, 'outer', 'segmentss.Rdata'))
res_inner = rep(list(list()), nrow(segmentss))
for( i in 1:nrow(segmentss) )
{
cat(i,'\n')
# if(i==2) while(as.numeric(substr(as.character(Sys.time()),12,13))!=20) {}
index = segmentss[i,1]:segmentss[i,2]
A_part = A[index, index]
dists = (2*min_n_bins_inner - 1):(nrow(A_part)-1)
counts = sapply(dists, function(v) n_cells2compute( A_part, v, min_n_bins_inner ))
split_info = split2BanlancedChunks_min_max(vec=counts, K=min(length(counts)-1, ncores))
chunks = c(dists[1]-1, dists[split_info$break_points])
## A_part is named
res_info_inner = HRG_core( A=A_part, chunks=chunks, res_folder=NULL, min_n_bins=min_n_bins_inner, distr=distr, fast=fast )
res_inner[[i]] = res_info_inner #minimum size filtering
cat('finished', '\n')
}
res_info = list( arg_list=arg_list, trunk=trunk, segmentss_nadj=segmentss_nadj, segmentss=segmentss, res_outer=res_outer, res_inner=res_inner )
res_folder_final = file.path(res_dir, 'final')
dir.create(res_folder_final, recursive=TRUE, showWarnings = TRUE)
save(res_info, file=file.path(res_folder_final, 'res_info.Rdata'))
time_finish = Sys.time()
cat('Execution finishes:', as.character(time_finish), '\n\n', file=total_execution_time_file, append=TRUE)
cat('Total execution time:', capture.output( time_finish - time_begin ), '\n\n', file=total_execution_time_file, append=TRUE)
## xenocfraf:
# res_inner = res_inner[sapply(res_inner, length) > 0]
branches = lapply( res_inner, get_tree_v0 )
for( i in 1:length(branches) ) branches[[i]] = update_branch_name(branches[[i]], root_start=segmentss[i,1])
full_tree = xenocraft( trunk, branches )
names_tmp = do.call(rbind, strsplit(V(full_tree)$name, ','))
V(full_tree)$left = substring(names_tmp[,1], 2)
V(full_tree)$right = substring(names_tmp[,2], 1, nchar(names_tmp[,2])-1)
if(!is_binary_tree(full_tree)) stop("Trunk + branches do not produce a binary tree")
res_info$full_tree = full_tree
save(res_info, file=file.path(res_folder_final, 'res_info.Rdata'))
return( res_info )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.