#! bin/R
## 加载包
library("tidyverse")
library("kcaoplot")
library("GenomicRanges")
library("parallel")
## 参数解析
singel_TF_mutation_dir <- "/public/home/kcao/Desktop/2018_NC_GS/Jasper_tf_somatic/"
paired_TF_mutation_dir <- "/public/home/kcao/Desktop/2018_NC_GS/Jasper_paired_tf_somatic/"
FIMO_TF_scan_dir <- "/public/home/kcao/genome_human/hg19_fimo/motif_dir/"
## 1.读取mutation_score,选取mutation score阈值
# 返回目录 2018_NC_GS
setwd(paste0(singel_TF_mutation_dir, "/.."))
data <- read_delim("tmp_merge_score.txt", delim = "\t", col_names = FALSE)
colnames(data) <- c("TF_name", "mutation_score")
cutoff <- as.vector(quantile(data$mutation_score, 0.8)) # 1.7
# TF filter vector
filter_tf <- data[data$mutation_score > cutoff,][["TF_name"]]
# 测试3个tf
filter_tf <- head(filter_tf, 10)
## 2.进行paired_TF_mutation_score calculate
# 排列组合
Permute <- function(TF_vector) {
TF_combind_matrix <- combn(TF_vector, 2)
TF_Permute_matrix <- cbind(TF_combind_matrix, apply(TF_combind_matrix, 2, rev))
return(TF_Permute_matrix)
}
TF_Permute_matrix <- Permute(filter_tf)
# r$> Permute(filter_tf)
# [,1] [,2] [,3]
# [1,] "HOXC12_MA0906.1.meme" "HOXC12_MA0906.1.meme" "Arid5a_MA0602.1.meme"
# [2,] "Arid5a_MA0602.1.meme" "E2F3_MA0469.3.meme" "E2F3_MA0469.3.meme"
# [,4] [,5] [,6]
# [1,] "Arid5a_MA0602.1.meme" "E2F3_MA0469.3.meme" "E2F3_MA0469.3.meme"
# [2,] "HOXC12_MA0906.1.meme" "HOXC12_MA0906.1.meme" "Arid5a_MA0602.1.meme"
## 3.循环进行计算
# 改写成函数,进行mclapply计算
#RunPairsTF <- function(i) {
# 4.2 开放区域合并
dnase.peak <- bed2GRanges("/public/home/kcao/Desktop/2018_NC_GS/TESTDATA/E094-DNase.all.peaks.v2.bed")
dnase.fdr <- bed2GRanges("/public/home/kcao/Desktop/2018_NC_GS/TESTDATA/E094-DNase.fdr0.01.peaks.v2.bed")
dnase.mac <- bed2GRanges("/public/home/kcao/Desktop/2018_NC_GS/TESTDATA/E094-DNase.macs2.narrowPeak")
dnase <- c(dnase.peak, dnase.fdr, dnase.mac)
dnase <- GenomicRanges::reduce(dnase)
# 4.3 读入snp信息
somatic_gastric <- bed2GRanges("/public/home/kcao/Desktop/2018_NC_GS/TESTDATA/41467_2018_3828_MOESM6_ESM.txt", header = TRUE)
names(mcols(somatic_gastric))[1:2] <- c("Ref", "Alt")
somatic_gastric$id = c(1:length(somatic_gastric))
RunPairsTF <- function(i) {
# for (i in seq(ncol(TF_Permute_matrix))) {
target_tf <- TF_Permute_matrix[1, i]
co_factor <- TF_Permute_matrix[2, i]
work_dir=paste0(paired_TF_mutation_dir, target_tf,"/",co_factor)
if (!dir.exists(work_dir)) {
dir.create(work_dir, recursive = TRUE)
# system(paste0("mkdir -p ",work_dir))
}
setwd(work_dir)
# print(c(target_tf,co_factor))
# 4.1 读入数据target_tf,co_factor
path_target_tf_fimo_dir <- paste0(FIMO_TF_scan_dir, str_replace(target_tf, ".meme", ""))
path_co_factor_fimo_dir <- paste0(FIMO_TF_scan_dir, str_replace(co_factor, ".meme", ""))
name_target.fimo <- list.files(path = path_target_tf_fimo_dir, pattern = "*.tsv")
name_co_factor.fimo <- list.files(path = path_co_factor_fimo_dir, pattern = "*.tsv")
# 转换格式GRanges ,cofactor.motif 换成全局变量
target.motif <- Fimo2GRanges(paste0(path_target_tf_fimo_dir, "/", name_target.fimo),"GRanges" )
cofactor.motif <<- Fimo2GRanges(paste0( path_co_factor_fimo_dir,"/",name_co_factor.fimo), "GRanges")
# warning(print(c(paste0(path_target_tf_fimo_dir, "/", name_target.fimo),"\n",paste0( path_co_factor_fimo_dir,"/",name_co_factor.fimo))))
# warning(print(c(length(target.motif), "\t", length(cofactor.motif)))) # ok
# 4.4 过滤target_fimo_motif
motif.ovl <- filter_fimo(dnase, target.motif, cofactor_GRanges = "cofactor.motif" , extend_size = 1000)
# warning(length(get("target.motif")))
# warning(length(get("cofactor.motif")))
# warning(nrow(get("motif.ovl")))
# 拓展到1kb
site <- filter_fimo_to_1000bp_gr(motif.ovl)
# 4.5 与突变位置取交集
# 与突变位置取交集
mut.table<-mut_pos_tb(site,somatic_gastric)
# warning(nrow(mut.table))
# 画1k附近突变分布图
x.labs <- as.character(mapply(paste0, str_replace(target_tf, ".meme", ""), "(", str_replace(co_factor, ".meme", ""), ")"))
p_1 <- plot_mutation_loci(mut.table, center_expend_each = 10,
tf_name = str_replace(target_tf, ".meme", "") ,
mutationScore_outdir="./")
p_1 <- p_1 + xlab(paste0(x.labs," + flanki n g region (bp)"))
p_1
ggsave("mutation_distribution_1k.pdf",p_1)
# 画flank_5bp 区间分布图
p_2 <- plot_flank_5bp(motif.ovl,somatic_gastric)+ # labs(caption=str_replace(target_tf,".meme", ""))+
xlab(paste0(x.labs," + flanking region (bp)"))
ggsave("mutation_distribution_5bp.pdf",p_2)
# 回到主目录
# setwd(paste0(singel_TF_mutation_dir, "/.."))
# 清理环境变量中“cofactor.motif”
# rm(list=c("cofactor.motif","motif.ovl"))
#}
}
########################################### 运行
require(parallel)
# 单核计算
# system.time({
# res <- lapply(seq(ncol(TF_Permute_matrix)), RunPairsTF);
# })
# lapply(1:3,RunPairsTF)
# 多核并行计算
# detectCores(logical = F) # 8
mc <- getOption("mc.cores", 23)
system.time({
res <- mclapply(seq(ncol(TF_Permute_matrix)), RunPairsTF)
})
print("all work is finished")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.