get_ccA_atanh = function(contact_mat_file, compress_size, bin_size)
{
contact_mat_raw = fread(contact_mat_file)
contact_mat_raw = subset(contact_mat_raw, !is.na(V3))
contact_mat_raw[,1] = contact_mat_raw[,1]/bin_size
contact_mat_raw[,2] = contact_mat_raw[,2]/bin_size
contact_mat = contact_mat_raw
colnames(contact_mat) = c('pos_1', 'pos_2', 'val')
if(!all(contact_mat[[2]] >= contact_mat[[1]])) stop('\nYour provided matrix does not represent an upper triangular matrix!\n\n')
n_bins_whole = max(max(contact_mat[[1]]), max(contact_mat[[2]])) + 1 ## should +1 because contact_mat index starts from 0 (bin 0 represents: 0-10E3, checked by looking at the juicebox map, 2018-11-19)
contact_mat_sparse = Matrix(0, nrow=n_bins_whole, ncol=n_bins_whole)
contact_mat_sparse[cbind(contact_mat[[1]]+1, contact_mat[[2]]+1)] <- contact_mat[[3]]
rownames(contact_mat_sparse) = colnames(contact_mat_sparse) = as.character( 1:nrow(contact_mat_sparse) )
contact_mat_sparse = forceSymmetric(contact_mat_sparse, uplo='U')
# if(bin_size!=bin_size_initial) contact_mat_sparse = mat_10to40kb( contact_mat_sparse, bin_size, bin_size_initial )
A = remove_blank_cols(contact_mat_sparse, sparse=TRUE, ratio=zero_ratio) ## has the same rows/cols as A
if(nrow(A) < 100) A = remove_blank_cols(contact_mat_sparse, sparse=TRUE, ratio=0) ## when all are dense
while(min(apply(A, 1, sd))==0) ## sometimes after removing the cols / rows, the remained part will all be 0
{
A = remove_blank_cols(A, sparse=TRUE, ratio=1E-7) ## has the same rows/cols as A
if(nrow(A) < 1) stop('ERROR IN GENERATING MEANINGFUL A at the data generating step')
}
# #########################################################
n_bins_A = nrow(A) - nrow(A)%%compress_size
A_2_compress = A[, 1:n_bins_A]
bin_names = rownames(A)
A_compressed = compress_mat_fast( as.matrix(A_2_compress), compress_size=compress_size )
colnames(A_compressed) = bin_names
rm(A_2_compress); gc()
A_compressed_sparse = A_compressed
A_compressed = as.matrix(A_compressed)
A_compressed_log = log2(A_compressed + 1)
# #########################################################
cat('Compute correlation matrix\n')
cA_compressed_log = fast_cor(A_compressed_log)
ccA_compressed_log = fast_cor(cA_compressed_log)
cat('Finished computing correlation matrix\n')
# #########################################################
# # ccA_compressed_atanh = atanh(ccA_compressed - 1E-7)
ccA_atanh = atanh(ccA_compressed_log / (1+1E-7))
A = as.matrix(A)
rm(A_compressed, A_compressed_sparse, cA_compressed_log, A_compressed_log); gc()
# #########################################################
cat('Finished data generation\n')
return(list(A=A, ccA_atanh=ccA_atanh))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.