## These functions try to aggregate bin-wise contact matrix into a domain-wise contact matrix
HighResolution2Low_k_complete <-function( A, max_nbins )
{
nbins = nrow( A )
max_nbins_bp = max_nbins
max_nbinss = max_nbins + (-5:20)
expand_fold_info = t(sapply(max_nbinss, fold_expand_rate, K=nbins))
index = max(which(expand_fold_info[,'rescale_rate']==min(expand_fold_info[,'rescale_rate'])))
max_nbins = max_nbinss[ index ]
n2one_head = expand_fold_info[index, 'n2one_head']
n2one_last = expand_fold_info[index, 'n2one_last']
if(n2one_head==1) return(list())
rescale_rate = n2one_last / n2one_head
if(max_nbins!=max_nbins_bp) warning('Value of max_nbins has been adjusted from ', max_nbins_bp, ' to ', max_nbins, '. This results in rescale_rate as: ', rescale_rate, '\n')
if(rescale_rate!=1) ## only do when rescale is not 1
{
aa_end = n2one_head*(max_nbins-1)
bb_end = nrow(A)
A_aa = A[1:aa_end, 1:aa_end]
A_bb = A[(aa_end+1):bb_end, (aa_end+1):bb_end]
A_ab = A[1:aa_end, (aa_end+1):bb_end, drop=FALSE]
A_aa_low = HighResolution2Low_k(mat=A_aa, k=max_nbins-1)
A_bb_low = sum( A_bb )
## https://stackoverflow.com/questions/15265512/summing-every-n-points-in-r
v = apply(A_ab, 1, sum)
A_ab_low = unname(tapply(v, (seq_along(v)-1) %/% n2one_head, sum))
A_ab_low_new = A_ab_low / n2one_last*n2one_head
A_bb_low_new = A_bb_low / n2one_last^2*n2one_head^2
A_low = rbind(cbind(A_aa_low, A_ab_low_new), c( A_ab_low_new, A_bb_low_new ))
colnames(A_low) = NULL
}
if(rescale_rate==1) A_low = HighResolution2Low_k(mat=A, k=max_nbins) ## k is the dimension of A_final_low
## A_low has no row/col name
res = list(A_low=A_low, n2one_head=n2one_head, n2one_last=n2one_last)
return(res)
}
split_info <-function( K, max_nbins )
{
x = floor( K/max_nbins ) + 1
n2 = max_nbins*x - K
n1 = max_nbins - n2
vec = c(rep(x, n1), rep(x-1, n2))
split_vec = split(1:K, rep(1:max_nbins, vec))
expand_fold_ratio = min(n1/n2, n2/n1)
res = list(vec=vec, expand_fold_ratio=expand_fold_ratio, split_vec=split_vec)
return(res)
}
## diagonal values are summed twice
HighResolution2Low <-function( A, rescale, which=2 )
{
nbins = nrow(A)
nbins_low = floor(nbins/rescale)
A_low = matrix( , nbins_low, nbins_low)
keep_rows = nbins - nbins%%nbins_low
A_truncated = A[ 1:keep_rows, 1:keep_rows ]
if(which==1) A_low = matsplitter_sum(A_truncated, keep_rows/nbins_low, keep_rows/nbins_low)
if(which==2) A_low = mat_split_sum(A_truncated, keep_rows/nbins_low, keep_rows/nbins_low)
return( A_low )
}
## https://stackoverflow.com/questions/24299171/function-to-split-a-matrix-into-sub-matrices-in-r
## Function to split a matrix into sub-matrices in R
matsplitter_sum <- function(M, r, c)
{
rg <- (row(M)-1)%/%r + 1
cg <- (col(M)-1)%/%c + 1
rci <- (rg-1)*max(cg) + cg
N <- prod(dim(M))/r/c
cv <- unlist(lapply(1:N, function(x) M[rci==x]))
dim(cv)<-c(r, c, N)
res = matrix(apply(cv, 3, sum), nrow(M)/r, byrow=TRUE)
return(res)
}
mat_split_sum <- function(M, r, c)
{
nr <- ceiling(nrow(M)/r)
nc <- ceiling(ncol(M)/c)
newM <- matrix(NA, nr*r, nc*c)
newM[1:nrow(M), 1:ncol(M)] <- M
div_k <- kronecker(matrix(seq_len(nr*nc), nr, byrow = TRUE), matrix(1, r, c))
matlist <- split(newM, div_k)
res = matrix(sapply(matlist, sum), nrow(M)/r, byrow=TRUE)
return(res)
}
## this is much faster
HighResolution2Low_k <- function(mat, k)
{
chunk2 <- function(x, n) split(x, cut(seq_along(x), n, labels = FALSE))
n = nrow(mat)
A_low = matrix( , k, k )
indices = chunk2( 1:n, k )
for(row_index in 1:k)
{
A_sub = mat[indices[[row_index]], ]
for(col_index in 1:k) A_low[row_index, col_index] = sum( A_sub[ , indices[[col_index]]] )
}
return( A_low )
}
HighResolution2Low_k_rectangle <- function(mat, row_split, col_split, sum_or_mean=c('sum', 'mean', 'median', 'p_above_one'))
{
# if(remove_zero==TRUE) mat[mat==0] = NA
k_row = length(row_split)
k_col = length(col_split)
A_low = matrix( , k_row, k_col )
if(sum_or_mean=='sum')
{
for(row_index in 1:k_row)
{
A_sub = mat[row_split[[row_index]], , drop=FALSE ]
for(col_index in 1:k_col) A_low[row_index, col_index] = sum( A_sub[ , col_split[[col_index]]] )
}
}
if(sum_or_mean=='mean')
{
for(row_index in 1:k_row)
{
A_sub = mat[row_split[[row_index]], , drop=FALSE ]
for(col_index in 1:k_col) A_low[row_index, col_index] = mean( A_sub[ , col_split[[col_index]]], na.rm=TRUE )
}
}
if(sum_or_mean=='median')
{
for(row_index in 1:k_row)
{
A_sub = mat[row_split[[row_index]], , drop=FALSE ]
for(col_index in 1:k_col) A_low[row_index, col_index] = median( A_sub[ , col_split[[col_index]]], na.rm=TRUE )
}
}
if(sum_or_mean=='p_above_one')
{
for(row_index in 1:k_row)
{
A_sub = mat[row_split[[row_index]], , drop=FALSE ]
for(col_index in 1:k_col) A_low[row_index, col_index] = mean( A_sub[ , col_split[[col_index]]] > 1 )
}
}
# if(remove_zero==TRUE) A_low[is.na(A_low)] = 0
return( A_low )
}
## for a nxn matrix, this function generate a nxm matrix, with m*compress_size = n
## this function is used for generating the nxm matrix, where the i,jth value is the contact value
## 08-08-2018
compress_mat_fast = function(input_mat, compress_size)
{
mat_block <- function(n, r) suppressWarnings( matrix(c(rep(1, r), rep(0, n)), n, n/r) )
n = ncol(input_mat)
mat2prod = mat_block(n, compress_size)
# return(t(input_mat%*%mat2prod / compress_size))
return(t(matrix_multiplication_cpp(input_mat, mat2prod)) / compress_size)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.