######################################################################
####
#### functions specifically for files generated by CO pipeline
####
######################################################################
snv_indel_rainfall = function(pid, type = c("all", "intragenic"),
..., species = "hg19", chromosome = c(paste0("chr", 1:22), c("chrX", "chrY")),
title = paste0("snv and indel for ", pid)) {
type = match.arg(type)[1]
vcf_list = list(...)
###################
## rainfall plot for snv
vcf_list = lapply(vcf_list, function(f) read.table(f, sep = "\t", header = TRUE, comment.char = "", stringsAsFactor = FALSE, check.names=FALSE))
vcf_density_list = vector("list", length(vcf_list))
names(vcf_density_list) = names(vcf_list)
for(i in seq_along(vcf_list)) {
vcf = vcf_list[[i]]
if(type == "intragenic") {
vcf = vcf[vcf$ANNOVAR_FUNCTION != "intergenic", ]
}
annotated_to_db = rep(FALSE, nrow(vcf))
db_column = grep("DBSNP|1K_GENOMES|ENSEMBL_SNP|SANGER", colnames(vcf))
for(cn in db_column) {
annotated_to_db = annotated_to_db | vcf[[cn]] != "."
}
col = ifelse(annotated_to_db, "#FF000080", "#00000080")
vcf = vcf[c(1, 2, 2)]; vcf[[3]] = vcf[[3]] + 1;
vcf[[1]] = ifelse(grepl("^chr", vcf[[1]]), vcf[[1]], paste("chr", vcf[[1]], sep = ""))
vcf = cbind(vcf, col, stringsAsFactor = FALSE)
vcf_list[[i]] = vcf
vcf_density = lapply(split(vcf, vcf[[1]]), function(gr) {
gr2 = circlize::genomicDensity(gr[2:3], window.size = 1e6)
cbind(chr = rep(gr[1,1], nrow(gr2)), gr2)
})
vcf_density_list[[i]] = do.call("rbind", vcf_density)
}
n = length(vcf_list)
gtrellis_layout(n_track = n*2, ncol = 5, byrow = FALSE, species = species, category = chromosome,
track_height = rep(c(2, 1), n),
track_ylim = unlist(lapply(vcf_density_list, function(df) c(0, 8, 0, max(df[[4]])))),
track_ylab = rep(names(vcf_list), times = 2),
add_name_track = TRUE, add_ideogram_track = TRUE,
title = title)
for(i in seq_along(vcf_list)) {
# track for rainfall plots
add_track(vcf_list[[i]], panel.fun = function(gr) {
if(nrow(gr) > 2) {
df = circlize::rainfallTransform(gr[2:3])
x = (df[[1]] + df[[2]])/2
y = log10(df[[3]])
col = as.vector(gr[[4]])
grid.points(x, y, pch = 16, size = unit(4, "bigpts"), gp = gpar(col = col))
}
})
# track for genomic density
add_track(vcf_density_list[[i]], panel.fun = function(gr) {
x = (gr[[3]] + gr[[2]])/2
y = gr[[4]]
grid.polygon(c(x[1], x, x[length(x)]),
c(0, y, 0), default.units = "native", gp = gpar(fill = "red"))
})
}
lg = legendGrob(c("Annotated", "others"), pch = 16, vgap = 0, gp = gpar(col = c("red", "black")))
pushViewport(viewport(0.99, 0.01, width = grobWidth(lg), height = grobHeight(lg), just = c(1, 0)))
grid.draw(lg)
upViewport()
}
# for(pid in dir("/icgc/dkfzlsdf/analysis/hipo/hipo_047/results_per_pid_pcawg")) {
# png(qq("@{pid}_snv_indel.png"), width = 1200, height = 1200)
# snv_indel_rainfall(pid = pid,
# snv_vcf = qq("/icgc/dkfzlsdf/analysis/hipo/hipo_047/results_per_pid_pcawg/@{pid}/mpileup/snvs_@{pid}_somatic_snvs_conf_8_to_10.vcf"),
# indel_vcf = qq("/icgc/dkfzlsdf/analysis/hipo/hipo_047/results_per_pid_pcawg/@{pid}/platypus_indel/indel_@{pid}_somatic_indels_conf_8_to_10.vcf")
# )
# dev.off()
# }
# for(f in dir("/home/guz/project/analysis/hipo47/snv_indel/germline_snv/")) {
# pid = gsub("snvs_(.*?)_germline.*$", "\\1", f)
# png(qq("@{pid}_germline_snv.png"), width = 1200, height = 1200)
# snv_indel_rainfall(pid = pid,
# germline_snv = qq("/home/guz/project/analysis/hipo47/snv_indel/germline_snv/@{f}"),
# title = qq("germline snv for @{pid}"))
# dev.off()
# }
# load("/icgc/dkfzlsdf/analysis/B080/guz/gencode/gencode_v19_transcript_merged.RData")
# gn = sapply(gene_annotation$gtf, function(x) x$name)
# gene_len = sapply(gene_annotation$gtf, function(x) {
# sum(sapply(x$transcript[[1]]$exon, function(y) y$end - y$start + 1))
# })
# load("/icgc/dkfzlsdf/analysis/hipo/hipo_047/hipo_047_rnaseq_gencode19_expression.RData")
# rpkm = log2(expression$rpkm + 1)
# l = !duplicated(gn)
# rpkm = rpkm[l, ]
# rownames(rpkm) = gn[rownames(rpkm)]
# colnames(rpkm) = gsub("_tumor$", "", colnames(rpkm))
read_snv_mutation = function(files, samples, type = "exonic") {
genes = list()
for(i in seq_along(files)) {
tb = read.table(files[i], stringsAsFactors = FALSE, sep = "\t", quote = "", header = TRUE, comment.char = "")
tb = tb[!tb[[18]] %in% c("synonymous SNV"), ,drop = FALSE]
tb = tb[tb[[16]] == type, ,drop = FALSE]
g = tb[[17]]
g = gsub("\\(.*\\)", "", g)
g = unique(unlist(strsplit(g, ",")))
genes[[i]] = unique(g)
}
all_genes = unique(unlist(genes))
qqcat("@{length(all_genes)} genes\n")
mat = matrix(0, nrow = length(all_genes), ncol = length(samples))
rownames(mat) = all_genes
colnames(mat) = samples
for(i in seq_along(genes)) {
mat[genes[[i]], i] = 1
}
return(mat)
}
snv_oncoprint = function(files, samples, filename = "Rplots.pdf") {
all_types = c("exonic", "exonic;splicing", "splicing",
"ncRNA_intronic", "intronic", "intergenic", "upstream", "downstream",
"UTR3", "UTR5")
for(type in all_types) {
qqcat("drawing snv oncoprint for @{type}\n")
mat = read_snv_mutation(files, samples, type = type)
if(nrow(mat) == 0) next
pdf(gsub("pdf$", paste0(type, ".pdf"), filename), width = 12, height = nrow(mat)/5+5)
ht_list = oncoPrint(list(snv = mat),
alter_fun_list = list(snv = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.9, gp = gpar(fill = "red", col = NA))),
col = c("snv" = "red"), show_column_names = TRUE, remove_empty_columns = FALSE,
heatmap_legend_param = list(title = "alterations"))
draw(ht_list, column_title = qq("@{nrow(mat)} genes having snvs (@{type})"))
dev.off()
if(nrow(mat) > 100) {
file = qq(filename)
pdf(qq("short_version_@{file}"), width = 12, height = 16)
draw(ht_list, column_title = qq("@{nrow(mat)} genes having snvs (@{type})"))
dev.off()
}
}
}
# files = scan(pipe("ls /icgc/dkfzlsdf/analysis/hipo/hipo_047/results_per_pid_pcawg/*/mpileup/*_somatic_snvs_conf_8_to_10.vcf"), what = "character")
# samples = gsub("^.*results_per_pid_pcawg/(.*?)/mpileup.*$", "\\1", files)
# snv_oncoprint(files, samples, filename = "nonsynonymous_somatic_snvs_oncoprint_@{type}.pdf", expr = rpkm)
# files = dir("/home/guz/project/analysis/hipo47/snv_indel/germline_snv/")
# files = paste0("/home/guz/project/analysis/hipo47/snv_indel/germline_snv/", files)
# samples = gsub("^.*snvs_(.*?)_germline.*$", "\\1", files)
# snv_oncoprint(files, samples, filename = "nonsynonymous_germline_snvs_oncoprint_@{type}.pdf", expr = rpkm)
read_indel_mutation = function(files, samples, type = "exonic") {
genes = list()
for(i in seq_along(files)) {
tb = read.table(files[i], stringsAsFactors = FALSE, sep = "\t", quote = "", header = TRUE, comment.char = "")
tb = tb[tb[[14]] == type, ,drop = FALSE]
g = tb[[15]]
g = gsub("\\(.*\\)", "", g)
g = unique(unlist(strsplit(g, ",")))
genes[[i]] = unique(g)
}
all_genes = unique(unlist(genes))
qqcat("@{length(all_genes)} genes\n")
mat = matrix(0, nrow = length(all_genes), ncol = length(samples))
rownames(mat) = all_genes
colnames(mat) = samples
for(i in seq_along(genes)) {
mat[genes[[i]], i] = 1
}
return(mat)
}
indel_oncoprint = function(files, samples, filename = "Rplots.pdf") {
# indels
all_types = c("exonic", "downstream", "intronic", "upstream", "UTR3", "UTR5")
for(type in all_types) {
qqcat("drawing indel oncoprint for @{type}\n")
mat = read_indel_mutation(files, samples, type = type)
if(nrow(mat) == 0) next
pdf(gsub("pdf$", paste0(type, ".pdf"), filename), width = 12, height = nrow(mat)/5+5)
ht_list = oncoPrint(list(indel = mat),
alter_fun_list = list(indel = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.9, gp = gpar(fill = "blue", col = NA))),
col = c("indel" = "blue"), show_column_names = TRUE, remove_empty_columns = FALSE,
heatmap_legend_param = list(title = "alterations"))
draw(ht_list, column_title = qq("@{nrow(mat)} genes having indel (@{type})"))
dev.off()
if(nrow(mat) > 100) {
file = qq(filename)
pdf(qq("short_version_@{file}"), width = 12, height = 16)
draw(ht_list, column_title = qq("@{nrow(mat)} genes having indel (@{type})"))
dev.off()
}
#print(head(mat))
}
}
snv_indel_oncoprint = function(files_snv, files_indel, samples, filename = "Rplots.pdf") {
if(!is.null(names(files_snv))) {
files_snv = files_snv[intersect(samples, names(files_snv))]
samples_snv = names(files_snv)
} else if(length(files_snv) != length(samples)) {
stop("length of `files_snv` and `samples` differ")
} else {
samples_snv = samples
}
if(!is.null(names(files_indel))) {
files_indel = files_indel[intersect(samples, names(files_indel))]
samples_indel = names(files_indel)
} else if(length(files_indel) != length(samples)) {
stop("length of `files_indel` and `samples` differ")
} else {
samples_indel = samples
}
mat_snv = read_snv_mutation(files_snv, samples_snv)
mat_indel = read_indel_mutation(files_indel, samples_indel)
common_rn = union(rownames(mat_snv), rownames(mat_indel))
common_cn = union(colnames(mat_snv), colnames(mat_indel))
mat_snv2 = matrix(0, nrow = length(common_rn), ncol = length(common_cn))
dimnames(mat_snv2) = list(common_rn, common_cn)
mat_snv2[rownames(mat_snv), colnames(mat_snv)] = mat_snv
mat_indel2 = matrix(0, nrow = length(common_rn), ncol = length(common_cn))
dimnames(mat_indel2) = list(common_rn, common_cn)
mat_indel2[rownames(mat_indel), colnames(mat_indel)] = mat_indel
pdf(filename, width = 12, height = nrow(mat_snv2)/5+5)
ht_list = oncoPrint(list(snv = mat_snv2, indel = mat_indel2),
alter_fun_list = list(
snv = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.9, gp = gpar(fill = "red", col = NA)),
indel = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.4, gp = gpar(fill = "blue", col = NA))
), col = c(snv = "red", indel = "blue"), show_column_names = TRUE, remove_empty_columns = FALSE,
heatmap_legend_param = list(title = "alterations"))
draw(ht_list, column_title = qq("@{nrow(mat_snv2)} genes having functional snv/indel"))
dev.off()
}
# files = scan(pipe("ls /icgc/dkfzlsdf/analysis/hipo/hipo_047/results_per_pid_pcawg/*/platypus_indel/*_somatic_indels_conf_8_to_10.vcf"), what = "character")
# samples = gsub("^.*results_per_pid_pcawg/(.*?)/platypus_indel.*$", "\\1", files)
# indel_oncoprint(files, samples, filename = "somatic_indels_@{type}_oncoprint.pdf", expr = rpkm)
coverage_hilbert_curve = function(pid, tumor_coverage_file, control_coverage_file, output_folder = ".") {
chr.len = read.chromInfo()$chr.len
tumor = read.table(tumor_coverage_file, stringsAsFactors = FALSE)
tumor = GRanges(seqnames = ifelse(grepl("^chr", tumor[[1]]), tumor[[1]], paste0("chr", tumor[[1]])),
ranges = IRanges(start = tumor[[2]],
end = tumor[[2]] + 999),
value = tumor[[3]])
x = tumor$value; x = x[x > 0]
col_fun = colorRamp2(quantile(log10(x), seq(0.1, 0.9, length = 11)), rev(brewer.pal(11, "Spectral")), space = "RGB")
cm = ColorMapping(col_fun = col_fun)
legend = color_mapping_legend(cm, plot = FALSE, title = "log10(cov)")
pdf(qq("@{output_folder}/@{pid}_tumor_hilbert_curve_coverage.pdf"), width = 8, height = 8)
for(chr in paste0("chr", c(1:22, "X", "Y"))) {
qqcat(" tumor: @{chr}\n")
hc = HilbertCurve(1, chr.len[chr], level = 10, mode = "pixel", title = qq("@{pid}, tumor: @{chr}"), legend = legend)
gr = tumor[seqnames(tumor) == chr]
gr = gr[gr$value > 0]
hc_layer(hc, ranges(gr), col = col_fun(log10(gr$value)))
hc_png(hc, file = qq("@{output_folder}/@{pid}_tumor_hilbert_curve_coverage_@{chr}.png"))
}
dev.off()
control = read.table(control_coverage_file, stringsAsFactors = FALSE)
control = GRanges(seqnames = ifelse(grepl("^chr", control[[1]]), control[[1]], paste0("chr", control[[1]])),
ranges = IRanges(start = control[[2]],
end = control[[2]] + 999),
value = control[[3]])
pdf(qq("@{output_folder}/@{pid}_control_hilbert_curve_coverage.pdf"), width = 8, height = 8)
for(chr in paste0("chr", c(1:22, "X", "Y"))) {
qqcat(" control: @{chr}\n")
hc = HilbertCurve(1, chr.len[chr], level = 10, mode = "pixel", title = qq("@{pid}, control: @{chr}"), legend = legend)
gr = control[seqnames(control) == chr]
gr = gr[gr$value > 0]
hc_layer(hc, ranges(gr), col = col_fun(log10(gr$value)))
hc_png(hc, file = qq("@{output_folder}/@{pid}_control_hilbert_curve_coverage_@{chr}.png"))
}
dev.off()
l = tumor$value > 0 | control$value > 0
tumor = tumor[l]
control = control[l]
ratio = tumor
q = quantile(c(tumor$value, control$value), 0.01)
ratio$value = (tumor$value+q)/(control$value+q) * (sum(control$value)/sum(tumor$value))
ratio$value = log2(ratio$value)
col_fun = colorRamp2(c(-1, 0, 1), c("green", "white", "red"), space = "RGB")
cm = ColorMapping(col_fun = col_fun)
legend = color_mapping_legend(cm, plot = FALSE, title = "log2(ratio)")
pdf(qq("@{output_folder}/@{pid}_coverage_ratio_hilbert_curve.pdf"), width = 8, height = 8)
for(chr in paste0("chr", c(1:22, "X", "Y"))) {
qqcat(" ratio: @{chr}\n")
hc = HilbertCurve(1, chr.len[chr], level = 10, mode = "pixel", title = qq("@{pid}, log2(ratio): @{chr}"), legend = legend)
gr = ratio[seqnames(ratio) == chr]
hc_layer(hc, ranges(gr), col = col_fun(gr$value))
hc_png(hc, file = qq("@{output_folder}/@{pid}_coverage_ratio_hilbert_curve_coverage_@{chr}.png"))
}
dev.off()
#### put all chromosome in one plot ####
cat("merging all chromosomes together.\n")
sum_chr = 0
s = numeric(length(ratio))
e = numeric(length(ratio))
for(chr in paste0("chr", c(1:22, "X", "Y"))) {
l = as.character(seqnames(ratio)) == chr
s[l] = start(ratio[l]) + sum_chr
e[l] = end(ratio[l]) + sum_chr
sum_chr = sum_chr + chr.len[chr]
}
ratio2 = GRanges(seqnames = rep("chr_merged", length(ratio)),
ranges = IRanges(start = round(s/1000),
end = round(e/1000)),
value = ratio$value)
chr_cumsum = cumsum(chr.len[paste0("chr", c(1:22, "X", "Y"))])
chr_ir = IRanges(start = round(c(1, chr_cumsum[-length(chr_cumsum)]+1)/1000)+1,
end = round(chr_cumsum/1000),
names = names(chr_cumsum))
col_fun = colorRamp2(c(-1, 0, 1), c("green", "white", "red"), space = "RGB")
cm = ColorMapping(col_fun = col_fun)
legend = color_mapping_legend(cm, plot = FALSE, title = "log2(ratio)")
pdf(qq("@{output_folder}/@{pid}_coverage_ratio_hilbert_curve_all_chromosomes.pdf"), width = 8, height = 8)
hc = HilbertCurve(0, round(max(chr_cumsum)/1000), level = 10, mode = "pixel",
title = qq("@{pid}, log2(ratio): all chromosomes"), legend = legend)
hc_layer(hc, ranges(ratio2), col = col_fun(ratio2$value), grid_line = 3)
#hc_layer(hc, chr_ir, col = rep(c("#00000020", "#20202020", "#40404020"), length(chr_ir))[seq_len(length(chr_ir))])
hc_png(hc, file = qq("@{output_folder}/@{pid}_coverage_ratio_hilbert_curve_coverage_all_chromosomes.png"))
hc = HilbertCurve(0, round(max(chr_cumsum)/1000), level = 8)
hc_rect(hc, chr_ir, gp = gpar(fill = rand_color(length(chr_ir), transparency = 0.5), col = NA))
hc_text(hc, chr_ir, names(chr_ir), gp = gpar(fontsize = 16))
dev.off()
}
visualize_defuse = function(files, prefix = "", ...) {
chr = NULL
lt = list()
for(f in files) {
df = read.table(f, stringsAsFactors = FALSE, header = TRUE, sep = "\t", quote = "")
x = df[,c("gene_chromosome1", "gene_chromosome2", "gene_name1", "gene_name2")]
x2 = list()
x2$gene1 = ifelse(x$gene_name1 > x$gene_name2, x$gene_name2, x$gene_name1)
x2$gene2 = ifelse(x$gene_name1 > x$gene_name2, x$gene_name1, x$gene_name2)
chr_info = ifelse(x$gene_chromosome1 == x$gene_chromosome2, "intra_chr", "inter_chr")
chr[paste(x2$gene1, x2$gene2, sep = "|")] = chr_info
lt[[basename(f)]] = table(paste(x2$gene1, x2$gene2, sep = "|"))
}
all_gn = unique(unlist(lapply(lt, names)))
mat = matrix(0, nr = length(all_gn), nc = length(lt))
rownames(mat) = all_gn
colnames(mat) = gsub(".gene_fusion.annotated.txt", "", names(lt))
for(i in seq_along(lt)) {
mat[names(lt[[i]]), i] = lt[[i]]
}
pdf(qq("@{prefix}_gene_fusion_stat.pdf"), width = 10, height = 12)
ht_list = rowAnnotation(bar = row_anno_barplot(apply(mat, 1, function(x) sum(x != 0)),
axis_direction = "reverse", axis = TRUE, axis_side = "bottom", baseline = 0), width = unit(2, "cm")) +
Heatmap(mat, col = c("white", "red"), show_row_dend = FALSE, show_row_names = FALSE,
heatmap_legend_param = list(title = "#fusion"), ...) +
Heatmap(chr[rownames(mat)], col = c("red", "blue"), name = "chr_info", show_row_names = FALSE, width = unit(5, "mm"))
draw(ht_list, column_title = qq("@{nrow(mat)} gene pairs"))
dev.off()
write.csv(cbind(mat, chr = chr[rownames(mat)]), file = qq("@{prefix}_gene_fusion_stat.csv"))
foo = apply(mat, 1, function(x) sum(x>0))
foo_col = colorRamp2(seq(min(foo), max(foo), length = 11), rev(brewer.pal(11, "Spectral")))
nc = ceiling(sqrt(length(files)))
nr = ceiling(length(files)/nc)
pdf(qq("@{prefix}_defuse_circos.pdf"), width = 4*nc, height = 4*nr)
par(mfrow = c(nr, nc))
chr = NULL
lt = list()
for(f in files) {
qqcat("plotting @{f}\n")
df = read.table(f, stringsAsFactors = FALSE, header = TRUE, sep = "\t", quote = "")
x = df[,c("gene_chromosome1", "gene_chromosome2", "gene_name1", "gene_name2")]
#x = x[x$gene_name1 %in% gn & x$gene_name2 %in% gn, ]
x2 = list()
x2$gene1 = ifelse(x$gene_name1 > x$gene_name2, x$gene_name2, x$gene_name1)
x2$gene2 = ifelse(x$gene_name1 > x$gene_name2, x$gene_name1, x$gene_name2)
gn= paste(x2$gene1, x2$gene2, sep = "|")
bed1 = data.frame(chr = paste0("chr", df$gene_chromosome1),
start = df$genomic_break_pos1,
end = df$genomic_break_pos1)
bed2 = data.frame(chr = paste0("chr", df$gene_chromosome2),
start = df$genomic_break_pos2,
end = df$genomic_break_pos2)
circos.initializeWithIdeogram()
circos.genomicLink(bed1, bed2, col = foo_col(foo[gn]))
text(1, 1, gsub(".gene_fusion.annotated.txt", "", basename(f)), adj = c(1, 1))
}
plot(1,1, type = "n", axes = FALSE, ann = FALSE)
legend = unique(round(seq(min(foo), max(foo), length = 5)))
legend("topleft", lty = 1, lwd = 2, col = foo_col(legend), legend = legend)
text(1, 1, "gene fusion", cex = 2)
dev.off()
}
visualize_crest_tx = function(files, prefix = "", somatic_only = TRUE) {
chr = NULL
lt = list()
for(f in files) {
df = read.table(f, stringsAsFactors = FALSE, header = TRUE, sep = "\t", quote = "", comment.char = "")
if(somatic_only) df = df[df$SOMATIC_GERMLINE_CLASSIFICATION == "somatic", , drop = FALSE]
x = df[,c("X.CHROM_FROM", "CHROM_TO", "POS_FROM", "POS_TO")]
x$GENE_LEFT = paste(df$X.CHROM_FROM, round(df$POS_FROM, -3), sep = "_")
x$GENE_RIGHT = paste(df$CHROM_TO, round(df$POS_TO, -3), sep = "_")
x2 = list()
x2$gene1 = ifelse(x$GENE_LEFT > x$GENE_RIGHT, x$GENE_RIGHT, x$GENE_LEFT)
x2$gene2 = ifelse(x$GENE_LEFT > x$GENE_RIGHT, x$GENE_LEFT, x$GENE_RIGHT)
chr_info = ifelse(x[["X.CHROM_FROM"]] == x$CHROM_TO, "intra", "inter")
chr[paste(x2$gene1, x2$gene2, sep = "|")] = chr_info
lt[[basename(f)]] = table(paste(x2$gene1, x2$gene2, sep = "|"))
}
all_gn = unique(unlist(lapply(lt, names)))
mat = matrix(0, nr = length(all_gn), nc = length(lt))
rownames(mat) = all_gn
colnames(mat) = gsub(".TX.annotated", "", names(lt))
for(i in seq_along(lt)) {
mat[names(lt[[i]]), i] = lt[[i]]
}
pdf(qq("@{prefix}_crest_tx_stat.pdf"), width = 8, height = 12)
ht_list = rowAnnotation(bar = row_anno_barplot(apply(mat, 1, function(x) sum(x != 0)),
axis_direction = "reverse", axis = TRUE, axis_side = "bottom"), width = unit(2, "cm")) +
Heatmap(mat, col = c("white", "red"), show_row_dend = FALSE, show_row_names = FALSE,
heatmap_legend_param = list(title = "#TX")) +
Heatmap(chr[rownames(mat)], col = c("red", "blue"), name = "chr_info", show_row_names = FALSE, width = unit(5, "mm"))
draw(ht_list, column_title = qq("@{nrow(mat)} pos pairs"))
dev.off()
write.csv(cbind(mat, chr = chr[rownames(mat)]), file = qq("@{prefix}_crest_tx_stat.csv"))
foo = apply(mat, 1, function(x) sum(x>0))
foo_col = colorRamp2(seq(min(foo), max(foo), length = length(files)), brewer.pal(length(files), "Spectral"))
nc = ceiling(sqrt(length(files)))
nr = ceiling(length(files)/nc)
nc = ceiling(sqrt(length(files)))
nr = ceiling(length(files)/nc)
pdf(qq("@{prefix}_crest_tx_circos.pdf"), width = 4*nc, height = 4*nr)
par(mfrow = c(nr, nc))
chr = NULL
lt = list()
for(f in files) {
df = read.table(f, stringsAsFactors = FALSE, header = TRUE, sep = "\t", quote = "", comment.char = "")
if(somatic_only) df = df[df$SOMATIC_GERMLINE_CLASSIFICATION == "somatic", , drop = FALSE]
x = df[,c("X.CHROM_FROM", "CHROM_TO", "POS_FROM", "POS_TO")]
x$GENE_LEFT = paste(df$X.CHROM_FROM, round(df$POS_FROM, -3), sep = "_")
x$GENE_RIGHT = paste(df$CHROM_TO, round(df$POS_TO, -3), sep = "_")
x2 = list()
x2$gene1 = ifelse(x$GENE_LEFT > x$GENE_RIGHT, x$GENE_RIGHT, x$GENE_LEFT)
x2$gene2 = ifelse(x$GENE_LEFT > x$GENE_RIGHT, x$GENE_LEFT, x$GENE_RIGHT)
gn= paste(x2$gene1, x2$gene2, sep = "|")
bed1 = data.frame(chr = paste0("chr", df$X.CHROM_FROM),
start = df$POS_FROM,
end = df$POS_FROM)
bed2 = data.frame(chr = paste0("chr", df$CHROM_TO),
start = df$POS_TO,
end = df$POS_TO)
l = bed1$chr %in% paste0("chr", c(1:22, c("X", "Y"))) & bed2$chr %in% paste0("chr", c(1:22, c("X", "Y")))
circos.initializeWithIdeogram()
circos.genomicLink(bed1[l, ], bed2[l, ], col = foo_col[foo[gn[l]]])
text(1, 1, gsub(".TX.annotated", "", basename(f)), adj = c(1, 1))
}
plot(1,1, type = "n", axes = FALSE, ann = FALSE)
legend = unique(round(seq(min(foo), max(foo), length = 5)))
legend("topleft", lty = 1, lwd = 2, col = foo_col(legend), legend = legend)
text(1, 1, "crest tx", cex = 2)
dev.off()
}
visualize_crest_deldupinv = function(files, prefix = "") {
pdf(qq("@{prefix}_crest_deldupinv.pdf"), width = 10, height = 8)
type_col = c("DEL" = "green", "DUP" = "red", "INV" = "blue")
gtrellis_layout(ncol = 4, byrow = FALSE, n_track = 1, track_ylim = c(0, 6), add_name_track = TRUE,
add_ideogram_track = TRUE, title = "DEL(green)_DUP(red)_INV(blue)")
for(i in seq_along(files)) {
df = read.table(files[i], stringsAsFactors = FALSE, header = TRUE, sep = "\t", quote = "", comment.char = "")
df = df[df$SOMATIC_GERMLINE_CLASSIFICATION == "somatic", , drop = FALSE]
bed = data.frame(chr = paste0("chr", df[[1]]),
start = df$POS,
end = df$END,
type = df$SV_TYPE)
add_track(NULL, track = 2, panel.fun = function(gr) {
grid.lines(get_cell_meta_data("original_xlim"), unit(c(i, i), "native"), default.unit = "native", gp = gpar(lty = 2, col = "#CCCCCC"))
if(get_cell_meta_data("name") == "chrY") {
grid.text(gsub(".DELDUPINV.annotated", "", basename(files[i])), get_cell_meta_data("original_xlim")[2], i,
default.unit = "native", just = "left", gp = gpar(fontsize = 8))
}
})
add_track(bed, track = 2, panel.fun = function(gr) {
l = convertWidth(unit(gr$end - gr$start, "native"), "mm", valueOnly = TRUE) < 1
if(sum(l)) {
grid.points((gr$start[l] + gr$end[l])/2, rep(i, sum(l)), default.unit = "native", pch = 16,
gp = gpar(col = type_col[gr$type[l]]), size = unit(1, "mm"))
}
if(sum(!l)) {
grid.segments(gr$start[!l], rep(i, nrow(gr[!l])), gr$end[!l], rep(i, nrow(gr[!l])), default.unit = "native",
gp = gpar(lwd = 4, col = type_col[gr$type[!l]]))
}
})
}
dev.off()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.