#' Prepare data for module: Circos
#'
#' The workflow of vigilante is highly module-based. To ensure a successful and smooth run, vigilante needs to prepare input data before continuing.
#'
#' @param doBAF logic, whether to prepare B Allele Frequency (BAF) data, if FALSE, no BAF data will be available for downstream v_Circos function and no BAF track will be shown on Circos plots; if TRUE, please make sure BAF data files are properly named, see Details for more information about file naming.
#' @param doCNA logic, whether to prepare Copy Number Alteration (CNA) data, if FALSE, no CNA data will be available for downstream v_Circos function and no CNA track will be shown on Circos plots; if TRUE, please make sure CNA data files are properly named, see Details for more information about file naming.
#' @param doMB mixed vector, c(FALSE, "somatic", "germline"), whether to prepare Mutation Burden (MB) data, if FALSE, no MB data will be available for downstream v_Circos function and no MB track will be shown on Circos plots; if TRUE, please follow the instruction and run v_llpMultiLoader function first. Also, if TRUE, specify the mutation type, can be either "somatic" or "germline" or both, each type will be shown as a separate track on Circos plots.
#' @param doGE mixed vector, c(FALSE, "log"), whether to prepare Gene Expression (GE) data, if FALSE, no GE data will be available for downstream v_Circos function and no GE track will be shown on Circos plots; if TRUE, please make sure GE data files are properly named, see Details for more information about file naming. Also, if TRUE, specify the value format, can be either "log" or "TPM", choose "TPM" for showing raw TPM value, choose "log" for showing value after log10 conversion (0 will be converted into negative value based on the value range across all samples).
#' @param doFL logic, whether to prepare Fusion Link (FL) data, if FALSE, no FL data will be available for downstream v_Circos function and no FL track will be shown on Circos plots; if TRUE, please make sure FL data files are properly named, see Details for more information about file naming.
#' @param unifyTrackYlim logic, whether to unify each track's y-axis range based on the value range across all samples, default TRUE.
#'
#' @details
#' Oftentimes input data files generated by upstream tools came with diverse naming conventions. It might be easy for the user to recognize those files, but not for vigilante if there is no recognizable patterns.
#'
#' To make input data files clear to vigilante, it would be nice to have them named something like "studyID_sampleID_(other descriptions).file extension". Here "studyID" is the name of the study or project, and it will be used in multiple naming situations (such as on the plot, or in the output file names), so it is recommended to be concise and meaningful.
#'
#' For module Circos, currently supported input data files are listed below, please contact the author if you want to add more files to the supported list:
#' B Allele Frequency (BAF): *baf.tsv
#' Copy Number Alteration (CNA): *cna.tsv
#' Mutation Burden (MB): see the help file of v_llpMultiLoader function for more information
#' Gene Expression (GE): *cDNA_genes.sf for Salmon
#' Fusion Link (FL): *-fusion-genes.txt for FusionCatcher, *star-fusion*abridged* for STAR-Fusion
#'
#' @return list, because R CMD check discourages assignments to the global environment within functions, user needs to run the function with explicitly assigning the return value to a global variable named "prepareVdata_Circos_returnList", which will be a list containing the required variables for downstream analyses.
#'
#' @import stats magrittr
#'
#' @export
#'
# v_prepareVdata_Circos function
v_prepareVdata_Circos = function(doBAF = FALSE, doCNA = FALSE, doMB = c(FALSE, "somatic", "germline"), doGE = c(FALSE, "log"), doFL = FALSE, unifyTrackYlim = TRUE) {
# ask user to make sure v_prepareVdata_Circos function return value is assigned to the global variable named "prepareVdata_Circos_returnList"
status_returnValue = menu(choices = c("Yes", "No"), title = "\nIs v_prepareVdata_Circos function return value assigned to the global variable named 'prepareVdata_Circos_returnList'?")
if (status_returnValue == 1) {
print("Function return value is properly assigned, now continue to the next step")
} else if (status_returnValue == 2) {
print("Please follow the instruction to assign the function return value before continue to the next step")
stop()
} else {
print("Please choose a valid answer")
stop()
}
rm(status_returnValue)
# check if globalSettings_returnList exists
if (!exists("globalSettings_returnList")) {
print("globalSettings_returnList not detected, please follow the instruction and run v_globalSettings function before continue")
stop()
} else {
print("globalSettings_returnList detected, start extracting global settings")
}
# extract global settings from return list
for (i in 1:length(globalSettings_returnList)) {
assign(x = names(globalSettings_returnList)[i], value = globalSettings_returnList[[i]])
}
rm(i)
# preset variables for temp_prepareVdata_Circos_returnList
baf.list.c = NULL
cna.list.c = NULL
cna_track_ylim = NULL
mb_somatic_d.list.c = NULL
mb_germline_d.list.c = NULL
mb_track_ylim = NULL
ge.list.c = NULL
flf.list.c = NULL
flt.list.c = NULL
fl_intraORinter.list.c = NULL
lb.list.c = NULL
lb_intraORinter.list.c = NULL
# if condition for whether to reform BAF data
if (doBAF == TRUE) {
# check if BAF data exist
baf_file_name = dir(path = folder_name, pattern = glob2rx("*baf.tsv"))
if (length(baf_file_name) == 0) {
print("BAF data not detected, please double check if BAF data files are properly named, or set doBAF = FALSE to skip BAF data processing")
stop()
} else {
print("Start processing BAF data")
}
# load and extract BAF data
baf_path = paste0("./", folder_name, "/", baf_file_name)
baf.list = lapply(baf_path, read.table, header = TRUE, stringsAsFactors = FALSE)
baf.list = lapply(baf.list, function(x) {
x = cbind(x, x$Position)
x[, 4] = x[, 3]
x[, 3] = x[, 2]
if (length(x[, 1]) > 0) {
x[, 1] = gsub("chr", "", x[, 1])
x[, 1] = paste0("chr", x[, 1])
}
colnames(x) = c("Chr", "Start", "End", "BAF")
return(x)
})
names(baf.list) = assayID
# calculate average BAF per sample
baf.list = lapply(baf.list, function(x) {
x = dplyr::group_by(x, Chr, Start, End)
x = dplyr::summarise(x, BAF = mean(BAF))
x = as.data.frame(x)
return(x)
})
# grouping and calculate average BAF per group
baf.grp = list()
for (i in 1:length(grpName)) {
baf.grp[[i]] = baf.list[names(baf.list) %in% grp.list[[i]]]
}
rm(i)
baf.grp.avg = list()
baf.grp.avg = lapply(baf.grp, dplyr::bind_rows)
baf.grp.avg = lapply(baf.grp.avg, function(x) {
x = dplyr::group_by(x, Chr, Start, End)
x = dplyr::summarise(x, BAF = mean(BAF))
x = as.data.frame(x)
})
# append grouped list to original list
baf.list.c = c(baf.list, baf.grp.avg)
names(baf.list.c) = c(assayID, grpName)
# remove temp baf data
rm(baf.grp, baf.grp.avg, baf.list, baf_file_name, baf_path)
print("BAF data processing completed")
}
# if condition for whether to reform CNA data
if (doCNA == TRUE) {
# check if CNA data exist
cna_file_name = dir(path = folder_name, pattern = glob2rx("*cna.tsv"))
if (length(cna_file_name) == 0) {
print("CNA data not detected, please double check if CNA data files are properly named, or set doCNA = FALSE to skip CNA data processing")
stop()
} else {
print("Start processing CNA data")
}
# load and extract CNA data
cna_path = paste0("./", folder_name, "/", cna_file_name)
cna.list = lapply(cna_path, read.table, header = TRUE, stringsAsFactors = FALSE)
cna.list = lapply(cna.list, function(x) {
x = cbind(x, x$Position)
x[, 4] = x[, 3]
x[, 3] = x[, 2]
if (length(x[, 1]) > 0) {
x[, 1] = gsub("chr", "", x[, 1])
x[, 1] = paste0("chr", x[, 1])
}
colnames(x) = c("Chr", "Start", "End", "CNA")
return(x)
})
names(cna.list) = assayID
# calculate average CNA per sample
cna.list = lapply(cna.list, function(x) {
x = dplyr::group_by(x, Chr, Start, End)
x = dplyr::summarise(x, CNA = mean(CNA))
x = as.data.frame(x)
return(x)
})
# grouping and calculate average CNA per group
cna.grp = list()
for (i in 1:length(grpName)) {
cna.grp[[i]] = cna.list[names(cna.list) %in% grp.list[[i]]]
}
rm(i)
cna.grp.avg = list()
cna.grp.avg = lapply(cna.grp, dplyr::bind_rows)
cna.grp.avg = lapply(cna.grp.avg, function(x) {
x = dplyr::group_by(x, Chr, Start, End)
x = dplyr::summarise(x, CNA = mean(CNA))
x = as.data.frame(x)
})
# append grouped list to original list
cna.list.c = c(cna.list, cna.grp.avg)
names(cna.list.c) = c(assayID, grpName)
# set cna_track_ylim for downstream render plot
cna_track_ylim = lapply(cna.list.c, function(x) {
x = ceiling(max(abs(x[, 4])))
return(x)
})
if (unifyTrackYlim == TRUE) {
cna_track_ylim_max = max(unlist(cna_track_ylim))
cna_track_ylim = lapply(cna_track_ylim, function(x) {
x = cna_track_ylim_max
return(x)
})
rm(cna_track_ylim_max)
}
# remove temp cna data
rm(cna.grp, cna.grp.avg, cna.list, cna_file_name, cna_path)
print("CNA data processing completed")
}
# if condition for whether to reform MB data
if (TRUE %in% doMB) {
# check if required data input exists
if (!(exists("llpMultiLoader_returnList") | exists("maf_germline.p"))) {
print("Required llpMultiLoader_returnList or maf_germline.p does not exist, please follow the instruction and run v_llpMultiLoader function first, or set doMB = FALSE to skip MB data processing")
stop()
} else {
print("Start processing MB data")
}
# set chr_len_auto and chr_len_max
if (speciesID %in% c("hg38", "hg19")) {
chr_len_auto = chr_len_H
} else {
chr_len_auto = chr_len_M
}
chr_len_max = max(chr_len_auto) + 1e7
# preset mb_somatic_d.list.c and mb_germline_d.list.c
mb_empty_template = as.data.frame(matrix(rep(NA, 4), nrow = 1, byrow = TRUE), stringsAsFactors = FALSE)
mb_empty_template = na.omit(mb_empty_template)
colnames(mb_empty_template) = c("Chr", "Start", "End", "Count")
mb_somatic_d.list.c = c(rep(list(mb_empty_template), times = length(plot_name)))
names(mb_somatic_d.list.c) = c(assayID, grpName)
mb_germline_d.list.c = c(rep(list(mb_empty_template), times = length(plot_name)))
names(mb_germline_d.list.c) = c(assayID, grpName)
rm(mb_empty_template)
# load, extract and reform MB data, somatic, remember to add "chr" prefix!!!
if ("somatic" %in% doMB) {
# load and extract MB data, somatic
mb_somatic = llpMultiLoader_returnList[[length(llpMultiLoader_returnList)]]@data
mb_somatic = mb_somatic[, c("Chromosome", "Start_Position", "End_Position", "Tumor_Sample_Barcode")]
colnames(mb_somatic)[1:3] = c("Chr", "Start", "End")
mb_somatic$Chr = gsub("chr", "", mb_somatic$Chr)
mb_somatic$Chr = paste0("chr", mb_somatic$Chr)
mb_somatic.list.c = list()
for (i in 1:nrow(groupInfo)) {
mb_somatic.list.c[[i]] = subset(mb_somatic, subset = Tumor_Sample_Barcode == groupInfo$aliasID[i])
}
rm(i, mb_somatic)
names(mb_somatic.list.c) = assayID
# grouping MB data, somatic
mb.list.grp = list()
for (i in 1:length(grpName)) {
mb.list.grp[[i]] = dplyr::bind_rows(mb_somatic.list.c[names(mb_somatic.list.c) %in% grp.list[[i]]])
mb.list.grp[[i]] = unique(mb.list.grp[[i]])
}
rm(i)
mb_somatic.list.c = c(mb_somatic.list.c, mb.list.grp)
rm(mb.list.grp)
names(mb_somatic.list.c) = c(assayID, grpName)
# set empty density list to be filled, somatic
mb_somatic_d.list.c = lapply(mb_somatic.list.c, function(x) {
x = circlize::genomicDensity(x, window.size = 1e7, overlap = FALSE, chr.len = chr_len_auto + 1e7)
x[, 4] = 0
colnames(x) = c("Chr", "Start", "End", "Count")
x[, 1] = as.character(x[, 1])
x[, "unique_identifier"] = paste0(x$Chr, ":", x$Start, ":", x$End)
return(x)
})
# set cut list for downstream filling, somatic
mb_somatic_cut.list.c = lapply(mb_somatic.list.c, function(x) {
x[, 2] = cut(unlist(x[, 2]), breaks = seq(0, chr_len_max, 1e7), include.lowest = TRUE)
x[, 3] = cut(unlist(x[, 3]), breaks = seq(0, chr_len_max, 1e7), include.lowest = TRUE)
x = x %>% dplyr::group_by(Chr, Start, End) %>% dplyr::summarize(count = dplyr::n())
x[, 2] = gsub("\\(", "", unlist(x[, 2]))
x[, 2] = gsub("\\[", "", unlist(x[, 2]))
x[, 2] = gsub("\\]", "", unlist(x[, 2]))
x[, "End"] = NULL
colnames(x)[3] = "Count"
x = tidyr::separate(x, col = "Start", into = c("Start", "End"), sep = ",", remove = TRUE)
x[, 2] = as.integer(unlist(x[, 2])) + as.integer(1)
x[, 3] = as.integer(unlist(x[, 3]))
x[, "unique_identifier"] = paste0(x$Chr, ":", x$Start, ":", x$End)
return(x)
})
# fill empty density list with cut list, somatic
mb_somatic_d.list.c = mapply(x = mb_somatic_d.list.c, y = mb_somatic_cut.list.c, SIMPLIFY = FALSE, function(x, y) {
x[, "Count"] = plyr::mapvalues(x$unique_identifier, from = y$unique_identifier, to = y$Count, warn_missing = FALSE)
x[, "Count"] = gsub("^.*chr.*$", "0", x[, "Count"])
x[, "Count"] = as.numeric(x[, "Count"])
x[, "unique_identifier"] = NULL
return(x)
})
# remove temp MB data variables, somatic
rm(mb_somatic.list.c, mb_somatic_cut.list.c)
}
# load, extract and reform MB data, germline, remember to add "chr" prefix!!!
if ("germline" %in% doMB) {
# load and extract MB data, germline
mb_germline = maf_germline.p@data
mb_germline = mb_germline[, c("Chromosome", "Start_Position", "End_Position", "Tumor_Sample_Barcode")]
colnames(mb_germline)[1:3] = c("Chr", "Start", "End")
mb_germline$Chr = gsub("chr", "", mb_germline$Chr)
mb_germline$Chr = paste0("chr", mb_germline$Chr)
mb_germline.list.c = list()
for (i in 1:nrow(groupInfo)) {
mb_germline.list.c[[i]] = subset(mb_germline, subset = Tumor_Sample_Barcode == groupInfo$aliasID[i])
}
rm(i, mb_germline)
names(mb_germline.list.c) = assayID
# grouping MB data, germline
mb.list.grp = list()
for (i in 1:length(grpName)) {
mb.list.grp[[i]] = dplyr::bind_rows(mb_germline.list.c[names(mb_germline.list.c) %in% grp.list[[i]]])
mb.list.grp[[i]] = unique(mb.list.grp[[i]])
}
rm(i)
mb_germline.list.c = c(mb_germline.list.c, mb.list.grp)
rm(mb.list.grp)
names(mb_germline.list.c) = c(assayID, grpName)
# set empty density list to be filled, germline
mb_germline_d.list.c = lapply(mb_germline.list.c, function(x) {
x = circlize::genomicDensity(x, window.size = 1e7, overlap = FALSE, chr.len = chr_len_auto + 1e7)
x[, 4] = 0
colnames(x) = c("Chr", "Start", "End", "Count")
x[, 1] = as.character(x[, 1])
x[, "unique_identifier"] = paste0(x$Chr, ":", x$Start, ":", x$End)
return(x)
})
# set cut list for downstream filling, germline
mb_germline_cut.list.c = lapply(mb_germline.list.c, function(x) {
x[, 2] = cut(unlist(x[, 2]), breaks = seq(0, chr_len_max, 1e7), include.lowest = TRUE)
x[, 3] = cut(unlist(x[, 3]), breaks = seq(0, chr_len_max, 1e7), include.lowest = TRUE)
x = x %>% dplyr::group_by(Chr, Start, End) %>% dplyr::summarize(count = dplyr::n())
x[, 2] = gsub("\\(", "", unlist(x[, 2]))
x[, 2] = gsub("\\[", "", unlist(x[, 2]))
x[, 2] = gsub("\\]", "", unlist(x[, 2]))
x[, "End"] = NULL
colnames(x)[3] = "Count"
x = tidyr::separate(x, col = "Start", into = c("Start", "End"), sep = ",", remove = TRUE)
x[, 2] = as.integer(unlist(x[, 2])) + as.integer(1)
x[, 3] = as.integer(unlist(x[, 3]))
x[, "unique_identifier"] = paste0(x$Chr, ":", x$Start, ":", x$End)
return(x)
})
# fill empty density list with cut list, germline
mb_germline_d.list.c = mapply(x = mb_germline_d.list.c, y = mb_germline_cut.list.c, SIMPLIFY = FALSE, function(x, y) {
x[, "Count"] = plyr::mapvalues(x$unique_identifier, from = y$unique_identifier, to = y$Count, warn_missing = FALSE)
x[, "Count"] = gsub("^.*chr.*$", "0", x[, "Count"])
x[, "Count"] = as.numeric(x[, "Count"])
x[, "unique_identifier"] = NULL
return(x)
})
# remove temp MB data variables, germline
rm(mb_germline.list.c, mb_germline_cut.list.c)
}
# set mb_track_ylim for downstream render plot
mb_track_ylim = mapply(x = mb_somatic_d.list.c, y = mb_germline_d.list.c, SIMPLIFY = FALSE, function(x, y) {
if (nrow(x) >= 1) {
x = ceiling(max(abs(x[, 4])))
} else {
x = 0
}
if (nrow(y) >= 1) {
y = ceiling(max(abs(y[, 4])))
} else {
y = 0
}
mb_track_ylim_both = max(x, y)
if (mb_track_ylim_both %% 2 != 0) {
mb_track_ylim_both = mb_track_ylim_both + 1
}
return(mb_track_ylim_both)
})
if (unifyTrackYlim == TRUE) {
mb_track_ylim_max = max(unlist(mb_track_ylim))
mb_track_ylim = lapply(mb_track_ylim, function(x) {
x = mb_track_ylim_max
return(x)
})
rm(mb_track_ylim_max)
}
# remove temp MB data variables, other remainings
rm(chr_len_auto, chr_len_max)
print("MB data processing completed")
}
# if condition for whether to reform GE data
if (TRUE %in% doGE) {
# check if GE data exist
ge_file_name = dir(path = folder_name, pattern = glob2rx("*cDNA_genes.sf"))
if (length(ge_file_name) == 0) {
print("GE data not detected, please double check if GE data files are properly named, or set doGE = FALSE to skip GE data processing")
stop()
} else {
print("Start processing GE data")
}
# load and extract gene expression data
ge_path = paste0("./", folder_name, "/", ge_file_name)
ge.list = lapply(ge_path, read.table, header = TRUE, stringsAsFactors = FALSE)
ge.list = lapply(ge.list, function(x) x[(names(x) %in% c("Name", "TPM"))])
ge_header = c("ENSG", "TPM")
ge.list = lapply(ge.list, setNames, ge_header)
# filter out unexpected ENST/transcripts in ge.list
ge.list = lapply(ge.list, function(x) {
temp_ENST = grepl("^ENST.+$", x$ENSG)
x = subset(x, subset = !temp_ENST)
return(x)
})
# remove ENSG version suffix in ge.list
ge.list = lapply(ge.list, function(x) {
x$ENSG = gsub("\\.[[:digit:]]+$", "", x$ENSG)
return(x)
})
# map gene expression data to GRCh38/GRCm38
if (speciesID == "hg38") {
ge.list = lapply(ge.list, merge, x = GRCh38G, by = "ENSG")
} else if (speciesID == "hg19") {
ge.list = lapply(ge.list, merge, x = GRCh37G, by = "ENSG")
} else {
ge.list = lapply(ge.list, merge, x = GRCm38G, by = "ENSG")
}
ge.list = lapply(ge.list, function(x) x[, c(3, 4, 5, 6)])
names(ge.list) = assayID
# calculate average GE per sample
ge.list = lapply(ge.list, function(x) {
x = dplyr::group_by(x, Chr, Start, End)
x = dplyr::summarise(x, TPM = mean(TPM))
x = as.data.frame(x)
return(x)
})
# grouping and calculate average GE per group
ge.grp = list()
for (i in 1:length(grpName)) {
ge.grp[[i]] = ge.list[names(ge.list) %in% grp.list[[i]]]
}
rm(i)
ge.grp.avg = list()
ge.grp.avg = lapply(ge.grp, dplyr::bind_rows)
ge.grp.avg = lapply(ge.grp.avg, function(x) {
x = dplyr::group_by(x, Chr, Start, End)
x = dplyr::summarise(x, TPM = mean(TPM))
x = as.data.frame(x)
})
# append grouped list to original list
ge.list.c = c(ge.list, ge.grp.avg)
names(ge.list.c) = c(assayID, grpName)
# calculate log10ratio for ge.list.c
if (unifyTrackYlim == TRUE) {
min0offset_circos_unif = lapply(ge.list.c, function(x) {
x = max(x[, 4])
return(x)
})
min0offset_circos_unif = dplyr::bind_rows(min0offset_circos_unif)
min0offset_circos_unif = 10 ^ (-ceiling(log10(max(min0offset_circos_unif))))
}
ge.list.c = lapply(ge.list.c, function(x) {
x[is.na(x[, 4]), 4] = 0 # replace NA with 0
x[x[, 4] < 0, 4] = 0 # replace negative value with 0 (for subtraction-normolized group)
min0offset_circos = 10 ^ (-ceiling(log10(max(x[, 4]))))
x[, 4] = x[, 4] + ifelse(unifyTrackYlim == TRUE, min0offset_circos_unif, min0offset_circos) # add 1e-3 to fix log(0) issue based on unifyTrackYlim condition
x["log10ratio"] = log10(x[, 4]) # convert TPM to log10 ratio
return(x)
})
# remove temp ge data
suppressWarnings(rm(ge.grp, ge.grp.avg, ge.list, ge_file_name, ge_path, ge_header, min0offset_circos_unif))
print("GE data processing completed")
}
# if condition for whether to reform FL data
if (doFL == TRUE) {
# check if FL data exist
fl_file_name_fuca = dir(path = folder_name, pattern = glob2rx("*-fusion-genes.txt"))
fl_file_name_star = dir(path = folder_name, pattern = glob2rx("*star-fusion*abridged*"))
if (length(fl_file_name_fuca) == 0 | length(fl_file_name_star) == 0) {
print("FL data not detected, or missing for at least one fusion caller, please double check if FL data files from all fusion callers are properly named, or set doFL = FALSE to skip FL data processing")
stop()
} else {
print("Start processing FL data")
}
# load and extract fusion link data, for Fusion Catcher
fl_path_fuca = paste0("./", folder_name, "/", fl_file_name_fuca)
fl.list.fuca = lapply(fl_path_fuca, read.delim, header = TRUE, stringsAsFactors = FALSE, fill = TRUE)
fl.list.fuca = lapply(fl.list.fuca, function(x) x[, c(11, 12)])
fl_header = c("From", "To")
fl.list.fuca = lapply(fl.list.fuca, setNames, fl_header)
fl.list.fuca = lapply(fl.list.fuca, function(x) {
x = unique(x[fl_header])
})
# load and extract fusion link data, for Star Fusion
fl_path_star = paste0("./", folder_name, "/", fl_file_name_star)
fl.list.star = lapply(fl_path_star, read.delim, header = TRUE, stringsAsFactors = FALSE, fill = TRUE)
fl.list.star = lapply(fl.list.star, function(x) x[, c(5, 7)])
for (i in 1:length(fl.list.star)) {
fl.list.star[[i]][, 1] = gsub(".*ENSG", "ENSG", fl.list.star[[i]][, 1])
fl.list.star[[i]][, 1] = gsub("\\..*", "", fl.list.star[[i]][, 1])
fl.list.star[[i]][, 2] = gsub(".*ENSG", "ENSG", fl.list.star[[i]][, 2])
fl.list.star[[i]][, 2] = gsub("\\..*", "", fl.list.star[[i]][, 2])
}
rm(i)
fl.list.star = lapply(fl.list.star, setNames, fl_header)
fl.list.star = lapply(fl.list.star, function(x) {
x = unique(x[fl_header])
})
# convert logical to character for samples with empty fusion data (to avoid grouping error)
fl.list.fuca = lapply(fl.list.fuca, function(x) {
x$From = as.character(x$From)
x$To = as.character(x$To)
return(x)
})
fl.list.star = lapply(fl.list.star, function(x) {
x$From = as.character(x$From)
x$To = as.character(x$To)
return(x)
})
# inner-join Fusion Catcher and Star Fusion, find common fusion genes
fl.list.join = list()
for (i in 1:length(fl.list.fuca)) {
fl.list.join[[i]] = merge(fl.list.fuca[[i]], fl.list.star[[i]], by = c("From", "To"))
}
rm(i)
# select only matched gene (match in GRCh38G/GRCh37G)
if (speciesID == "hg38") {
matched = lapply(fl.list.join, function(x) {
x = x[, 1] %in% GRCh38G[, 1] & x[, 2] %in% GRCh38G[, 1]
})
} else if (speciesID == "hg19") {
matched = lapply(fl.list.join, function(x) {
x = x[, 1] %in% GRCh37G[, 1] & x[, 2] %in% GRCh37G[, 1]
})
}
fl.list.join = mapply(x = fl.list.join, y = matched, SIMPLIFY = FALSE, function(x, y) {
x = subset(x, subset = y)
return(x)
})
rm(matched)
names(fl.list.join) = assayID
# grouping fusion link
fl.list.grp = list()
for (i in 1:length(grpName)) {
fl.list.grp[[i]] = dplyr::bind_rows(fl.list.join[names(fl.list.join) %in% grp.list[[i]]])
fl.list.grp[[i]] = unique(fl.list.grp[[i]])
}
rm(i)
fl.list.c = c(fl.list.join, fl.list.grp)
names(fl.list.c) = c(assayID, grpName)
# split fusion link list into "From" and "To"
flf.list = list()
flt.list = list()
flf.list = lapply(fl.list.c, function(x) as.data.frame(x[, 1], stringsAsFactors = FALSE))
flt.list = lapply(fl.list.c, function(x) as.data.frame(x[, 2], stringsAsFactors = FALSE))
flf.list = lapply(flf.list, setNames, "ENSG")
flt.list = lapply(flt.list, setNames, "ENSG")
# map fusion link data to GRCh38/GRCm38, remember to keep the original order!!!
for (i in 1:length(flf.list)) {
if (speciesID == "hg38") {
flf.list[[i]] = plyr::join(flf.list[[i]], GRCh38G, by = "ENSG", type = "inner")
} else if (speciesID == "hg19") {
flf.list[[i]] = plyr::join(flf.list[[i]], GRCh37G, by = "ENSG", type = "inner")
} else {
flf.list[[i]] = plyr::join(flf.list[[i]], GRCm38G, by = "ENSG", type = "inner")
}
}
rm(i)
for (i in 1:length(flt.list)) {
if (speciesID == "hg38") {
flt.list[[i]] = plyr::join(flt.list[[i]], GRCh38G, by = "ENSG", type = "inner")
} else if (speciesID == "hg19") {
flt.list[[i]] = plyr::join(flt.list[[i]], GRCh37G, by = "ENSG", type = "inner")
} else {
flt.list[[i]] = plyr::join(flt.list[[i]], GRCm38G, by = "ENSG", type = "inner")
}
}
rm(i)
flf.list = lapply(flf.list, function(x) x[, c(3, 4, 5, 2)])
flt.list = lapply(flt.list, function(x) x[, c(3, 4, 5, 2)])
flf.list.c = flf.list
flt.list.c = flt.list
names(flf.list.c) = c(assayID, grpName)
names(flt.list.c) = c(assayID, grpName)
rm(flf.list, flt.list)
# classify fuison link into intra-/inter-chr types
fl_intraORinter.list.c = mapply(x = flf.list.c, y = flt.list.c, SIMPLIFY = FALSE, function(x, y) {
fl_intraORinter.list.c = x[, 1] == y[, 1]
})
fl_intraORinter.list.c = lapply(fl_intraORinter.list.c, function(x) {
x = gsub(TRUE, "gray33", x)
x = gsub(FALSE, "firebrick3", x)
})
# set gene labels for downstream intra-/inter-chr classification
lb.list.c = mapply(x = flf.list.c, y = flt.list.c, SIMPLIFY = FALSE, function(x, y) {
lb.list.c = dplyr::bind_rows(x, y)
})
names(lb.list.c) = c(assayID, grpName)
# classify gene label into intra-/inter-chr types
lb_intraORinter.list.c = mapply(x = lb.list.c, y = fl_intraORinter.list.c, SIMPLIFY = FALSE, function(x, y) {
lb_intraORinter.list.c = cbind(x, rep(y, 2))
})
lb_intraORinter.list.c = lapply(lb_intraORinter.list.c, function(x) {
if (speciesID %in% c("hg38", "hg19")) {
x[, 1] = factor(x[, 1], levels = paste0("chr", c(1:22, "X", "Y")))
} else if (speciesID %in% c("mm10", "mm9")) {
x[, 1] = factor(x[, 1], levels = paste0("chr", c(1:19, "X", "Y")))
}
x = plyr::arrange(x, Chr, Start, End)
x = unique(x)
})
# extract lb.list.c from lb_intraORinter.list.c and reset lb_intraORinter.list.c to make sure they match each other
lb.list.c = lapply(lb_intraORinter.list.c, function(x) {
x = subset(x, select = 1:4)
return(x)
})
lb_intraORinter.list.c = lapply(lb_intraORinter.list.c, function(x) {
x = as.character(unlist(x[, 5]))
return(x)
})
# remove temp fl data
rm(fl_file_name_fuca, fl_file_name_star, fl_header, fl_path_fuca, fl_path_star, fl.list.grp, fl.list.join, fl.list.fuca, fl.list.star, fl.list.c)
print("FL data processing completed")
}
# add variables to temp_prepareVdata_Circos_returnList
temp_prepareVdata_Circos_returnList = list(
"baf.list.c" = baf.list.c,
"cna.list.c" = cna.list.c,
"cna_track_ylim" = cna_track_ylim,
"mb_somatic_d.list.c" = mb_somatic_d.list.c,
"mb_germline_d.list.c" = mb_germline_d.list.c,
"mb_track_ylim" = mb_track_ylim,
"ge.list.c" = ge.list.c,
"flf.list.c" = flf.list.c,
"flt.list.c" = flt.list.c,
"fl_intraORinter.list.c" = fl_intraORinter.list.c,
"lb.list.c" = lb.list.c,
"lb_intraORinter.list.c" = lb_intraORinter.list.c
)
# remove variables already added to temp_prepareVdata_Circos_returnList
rm(list = names(temp_prepareVdata_Circos_returnList))
# filter out empty variables, using backwards for loop
for (i in length(temp_prepareVdata_Circos_returnList):1) {
if (is.null(temp_prepareVdata_Circos_returnList[[i]])) {
temp_prepareVdata_Circos_returnList[[i]] = NULL
}
}
rm(i)
# remove extracted global settings
rm(list = names(globalSettings_returnList))
# end of v_prepareVdata_Circos function
print("v_prepareVdata_Circos run completed, return value saved to the global environment in **prepareVdata_Circos_returnList**")
return(temp_prepareVdata_Circos_returnList)
}
#' Main function for module: Circos
#'
#' Extract, clean and reform NGS data, and then generate comprehensive Circos plot with each track (circle) representing different types of data. Currently supported tracks: BAF (B allele frequency), CNA (copy number alterations), mutation burden (somatic/germline mutations), gene expression (TPM) and gene fusions (cross-check validation is available when 2 or more fusion callers are in use; also, fusion status check and validation through statistics are available via module CSE).
#'
#' @param outputFolderPath string, relative or absolute path to the output folder, trailing slash "/" required, can be set to NULL (no output file will be written to the file system), default "./_VK/_Circos/" for module Circos.
#' @param startNum integer, by default, vigilante will go through every individual sample and sample group, generating a Circos plot per each item in the list. If an integer is specified, vigilante will instead start generating Circos plots at the specified index (e.g. there 10 samples and 2 groups in the list, set startNum to 11 will make vigilante start generating Circos plots at the first group). If set to NULL, vigilante will automatically detect Circos pltos already existing in the output folder and continue at the most recent generated one (e.g. there are 12 Circos plots in total to be generated and 6 of them already exist in the output folder, to make sure all plots are fully-rendered, vigilante will continue at the 6th plot and overwrite it). Since generating comprehensive Circos plots is very computational-intensive, it is possible that function runs get interrupted and half-rendered plots get generated because of running out of memory. Therefore, it is recommended to set startNum to NULL so that vigilante can ensure every item in the list to be plotted get fully-rendered.
#' @param endNum integer, by default, vigilante will go through every individual sample and sample group, generating a Circos plot per each item in the list. If an integer is specified, vigilante will instead stop generating Circos plots at the specified index (e.g. there 10 samples and 2 groups in the list, set endNum to 6 will make the 6th sample as the last one to be plotted, and there will be no Circos plots for items after the 6th sample).
#' @param customPlotNum integer vector, optional, can be set to NULL; if provided, will override "startNum" and "endNum", and only Circos plots at the specified index will be generated (e.g. there 10 samples and 2 groups in the list, set customPlotNum to c(3, 11) will make only the 3rd sample and the 1st group to be plotted).
#' @param doBAF logic, whether to show BAF track on Circos plots.
#' @param doCNA logic, whether to show CNA track on Circos plots.
#' @param doMB mixed vector, c(FALSE, "somatic", "germline"), whether to show MB track on Circos plots; if TRUE, specify the mutation type, can be either "somatic" or "germline" or both, each type will be shown as a separate track on Circos plots.
#' @param doGE mixed vector, c(FALSE, "log"), whether to show GE track on Circos plots; if TRUE, specify the value format, can be either "log" or "TPM", choose "TPM" for showing raw TPM value, choose "log" for showing value after log10 conversion (0 will be converted into negative value based on the value range across all samples), must match the value format chosen in v_prepareVdata_Circos function.
#' @param doFL logic, whether to show FL track will be shown on Circos plots.
#'
#' @details
#' Please be aware that generating comprehensive Circos plots may become very computational-intensive and time-consuming depending on input data, and it is possible that function runs get interrupted and half-rendered plots get generated because of running out of memory. But no need to worry, vigilante has already taken this possible issue into consideration and come with an internal mechanism to help minimize the interruption. Please see the description of argument "startNum" and properly set it to better utilize the interal helper mechanism.
#'
#'
#' @return NULL, when a valid outputFolderPath is provided, output Circos plots will be generated and saved in the provided location, otherwise function run will stop and nothing will be written into the file system.
#'
#' @import circlize grDevices graphics ComplexHeatmap grid
#'
#' @export
#'
# v_Circos function
v_Circos = function(outputFolderPath = "./_VK/_Circos/", startNum = NULL, endNum = NULL, customPlotNum = NULL, doBAF = FALSE, doCNA = FALSE, doMB = c(FALSE, "somatic", "germline"), doGE = c(FALSE, "log"), doFL = FALSE) {
# check if outputFolderPath is set
if (is.null(outputFolderPath)) {
print("v_Circos run stopped: based on user's choice, output plots will not be generated")
stop()
} else {
print(as.character(glue::glue("Based on user's choice, output plots will be generated and saved in {outputFolderPath}")))
}
# check if globalSettings_returnList exists
if (!exists("globalSettings_returnList")) {
print("globalSettings_returnList not detected, please follow the instruction and run v_globalSettings function before continue")
stop()
} else {
print("globalSettings_returnList detected, start extracting global settings")
}
# extract global settings from return list
for (i in 1:length(globalSettings_returnList)) {
assign(x = names(globalSettings_returnList)[i], value = globalSettings_returnList[[i]])
}
rm(i)
# check if prepareVdata_Circos_returnList exists
if (!exists("prepareVdata_Circos_returnList")) {
print("prepareVdata_Circos_returnList not detected, please follow the instruction and run v_prepareVdata_Circos function before continue")
stop()
} else {
print("prepareVdata_Circos_returnList detected, start extracting Circos variables")
}
# extract Circos variables from return list
for (i in 1:length(prepareVdata_Circos_returnList)) {
assign(x = names(prepareVdata_Circos_returnList)[i], value = prepareVdata_Circos_returnList[[i]])
}
rm(i)
# set starting number based on already existing CircosPlot file number
if (is.null(startNum)) {
circosPlotNum = dir(path = outputFolderPath, pattern = "CircosPlot4K_.+\\.png$")
if (length(circosPlotNum) <= 1) {
startNum = 1
print(as.character(glue::glue("No existing Circos plots detected in {outputFolderPath}, will start from plot 1")))
} else {
startNum = length(circosPlotNum) # in case loop is aborted, rerun the script to restart at (recreate) the last existing one regardless it is fully completed or partially created
print(as.character(glue::glue("{startNum} existing Circos plots detected in {outputFolderPath}, will continue from plot {startNum}")))
}
} else {
startNum = startNum
}
# set ending number
if (is.null(endNum)) {
endNum = length(plot_name)
} else {
endNum = endNum
if (endNum < startNum) {
endNum = startNum
}
}
# set finalPlotNum, override startNum and endNum based on customPlotNum
if (!is.null(customPlotNum)) {
finalPlotNum = customPlotNum
} else {
finalPlotNum = startNum:endNum
}
# beginning of the loop
for (i in finalPlotNum) {
# progress statement, start, with timestamp
if (!is.null(customPlotNum)) {
print(as.character(glue::glue("Start generating plot {i}, {length(finalPlotNum) - grep(i, finalPlotNum)[1]} more in queue, {Sys.time()}")))
} else {
print(as.character(glue::glue("Start generating plot {i}, {endNum - i} more in queue, {Sys.time()}")))
}
# adjust track.height based on track numbers
track_height_par = suppressWarnings(min(0.4 / sum(doBAF, doCNA, any(doMB, na.rm = TRUE), any(doGE, na.rm = TRUE), doFL), 0.2))
# adjust labels.cex based on track index
labels_cex = c(0.25, 0.25, 0.2, 0.15)
labels_cex_index = 0
# save current circos plot to PNG file under folder "Viz"
png(filename = paste0(outputFolderPath, "CircosPlot4K_", studyID, "_", plot_name[i], ".png"), width = 3339, height = 2160, units = "px", res = 300)
# run render circos plot here
circos.clear()
# set gap based on speciesID
if (speciesID == "hg38" | speciesID == "hg19") {
circos.par(track.height = track_height_par, start.degree = 90, points.overflow.warning = FALSE, clock.wise = TRUE, canvas.xlim = c(-1.1, 1.1), canvas.ylim = c(-1.1, 1.1), gap.degree = c(rep(2, 8), 4, rep(2, 14), 4), track.margin = c(0.01, 0.01))
} else if (speciesID == "mm10" | speciesID == "mm9") {
circos.par(track.height = track_height_par, start.degree = 90, points.overflow.warning = FALSE, clock.wise = TRUE, canvas.xlim = c(-1.1, 1.1), canvas.ylim = c(-1.1, 1.1), gap.degree = c(rep(2, 8), 4, rep(2, 11), 4), track.margin = c(0.01, 0.01))
}
# initialize with Ideogram
circos.initializeWithIdeogram(plotType = c("ideogram", "labels"), species = speciesID)
# render 1st circle: B Allele Frequency (BAF)
if (doBAF == TRUE && length(baf.list.c[[i]][, 1]) > 0) {
labels_cex_index = labels_cex_index + 1
circos.genomicTrack(data = baf.list.c[[i]], numeric.column = 4, ylim = c(0, 1), panel.fun = function(region, value, ...) {
circos.genomicPoints(region, value, col = "#0d80d8", pch = 16, cex = 0.25, ...)
circos.lines(CELL_META$cell.xlim, c(0.5, 0.5), lty = 1, col = "#00000040", lwd = 0.25) # center line, gray
circos.lines(CELL_META$cell.xlim, c(1, 1), lty = 1, col = "#0dd84980", lwd = 0.75) # top line, green
circos.lines(CELL_META$cell.xlim, c(0, 0), lty = 1, col = "#ff000080", lwd = 0.75) # bottom line, red
circos.lines(CELL_META$cell.xlim, c(0.75, 0.75), lty = 1, col = "#00000040", lwd = 0.1) # 2nd quantile line, gray
circos.lines(CELL_META$cell.xlim, c(0.25, 0.25), lty = 1, col = "#00000040", lwd = 0.1) # 4th quantile line, gray
if (CELL_META$sector.index %in% "chr1") {circos.yaxis(side = "left", at = c(0, 0.5, 1), labels = TRUE, tick = TRUE, labels.cex = labels_cex[labels_cex_index], tick.length = convert_x(0.75, "mm"), lwd = 0.25)} else if (CELL_META$sector.index %in% "chr9") {circos.yaxis(side = "right", at = c(0, 0.5, 1), labels = TRUE, tick = TRUE, labels.cex = labels_cex[labels_cex_index], tick.length = convert_x(0.75, "mm"), lwd = 0.25)}
})
}
# render 2nd circle: Copy Number Alternations (CNA, somatic?), remember to change ylim!!!
if (doCNA == TRUE && length(cna.list.c[[i]][, 1]) > 0) {
labels_cex_index = labels_cex_index + 1
circos.genomicTrack(data = cna.list.c[[i]], numeric.colum = 4, ylim = c(-cna_track_ylim[[i]], cna_track_ylim[[i]]), panel.fun = function(region, value, ...) {
circos.genomicLines(region, value, type = "h", baseline = 0, col = ifelse(value[[1]] > 0, "#0dd849", "red"), ...)
circos.lines(CELL_META$cell.xlim, c(0, 0), lty = 1, col = "#00000040", lwd = 0.25) # center line, gray
circos.lines(CELL_META$cell.xlim, c(cna_track_ylim[[i]], cna_track_ylim[[i]]), lty = 1, col = "#00000040", lwd = 0.75) # top line, gray
circos.lines(CELL_META$cell.xlim, c(-cna_track_ylim[[i]], -cna_track_ylim[[i]]), lty = 1, col = "#00000040", lwd = 0.75) # bottom line, gray
circos.lines(CELL_META$cell.xlim, c(cna_track_ylim[[i]] * 0.5, cna_track_ylim[[i]] * 0.5), lty = 1, col = "#00000040", lwd = 0.1) # 2nd quantile line, gray
circos.lines(CELL_META$cell.xlim, c(-cna_track_ylim[[i]] * 0.5, -cna_track_ylim[[i]] * 0.5), lty = 1, col = "#00000040", lwd = 0.1) # 4th quantile line, gray
if (CELL_META$sector.index %in% "chr1") {circos.yaxis(side = "left", at = c(-cna_track_ylim[[i]], 0, cna_track_ylim[[i]]), labels = TRUE, tick = TRUE, labels.cex = labels_cex[labels_cex_index], tick.length = convert_x(0.75, "mm"), lwd = 0.25)} else if (CELL_META$sector.index %in% "chr9") {circos.yaxis(side = "right", at = c(-cna_track_ylim[[i]], 0, cna_track_ylim[[i]]), labels = TRUE, tick = TRUE, labels.cex = labels_cex[labels_cex_index], tick.length = convert_x(0.75, "mm"), lwd = 0.25)}
})
}
# render 3rd & 4th circle: Mutation Burden
if (TRUE %in% doMB) {
if ("somatic" %in% doMB && length(mb_somatic_d.list.c[[i]][, 1]) > 0) {
labels_cex_index = labels_cex_index + 1
circos.genomicTrack(data = mb_somatic_d.list.c[[i]], numeric.colum = 4, ylim = c(0, mb_track_ylim[[i]]), panel.fun = function(region, value, ...) {
circos.genomicLines(region, value, area = TRUE, baseline = 0, col = "darkorchid", border = NA, ...)
circos.lines(CELL_META$cell.xlim, c(mb_track_ylim[[i]] * 0.5, mb_track_ylim[[i]] * 0.5), lty = 1, col = "#00000040", lwd = 0.25) # center line, gray
circos.lines(CELL_META$cell.xlim, c(mb_track_ylim[[i]], mb_track_ylim[[i]]), lty = 1, col = "#00000040", lwd = 0.75) # top line, gray
circos.lines(CELL_META$cell.xlim, c(0, 0), lty = 1, col = "#00000040", lwd = 0.75) # bottom line, gray
circos.lines(CELL_META$cell.xlim, c(mb_track_ylim[[i]] * 0.25, mb_track_ylim[[i]] * 0.25), lty = 1, col = "#00000040", lwd = 0.1) # 2nd quantile line, gray
circos.lines(CELL_META$cell.xlim, c(mb_track_ylim[[i]] * 0.75, mb_track_ylim[[i]] * 0.75), lty = 1, col = "#00000040", lwd = 0.1) # 4th quantile line, gray
if (CELL_META$sector.index %in% "chr1") {circos.yaxis(side = "left", at = c(0, mb_track_ylim[[i]] * 0.5, mb_track_ylim[[i]]), labels = TRUE, tick = TRUE, labels.cex = labels_cex[labels_cex_index], tick.length = convert_x(0.75, "mm"), lwd = 0.25)} else if (CELL_META$sector.index %in% "chr9") {circos.yaxis(side = "right", at = c(0, mb_track_ylim[[i]] * 0.5, mb_track_ylim[[i]]), labels = TRUE, tick = TRUE, labels.cex = labels_cex[labels_cex_index], tick.length = convert_x(0.75, "mm"), lwd = 0.25)}
})
}
if ("germline" %in% doMB && length(mb_germline_d.list.c[[i]][, 1]) > 0) {
labels_cex_index = labels_cex_index + 1
circos.genomicTrack(data = mb_germline_d.list.c[[i]], numeric.colum = 4, ylim = c(0, mb_track_ylim[[i]]), panel.fun = function(region, value, ...) {
circos.genomicLines(region, value, area = TRUE, baseline = 0, col = "plum", border = NA, ...)
circos.lines(CELL_META$cell.xlim, c(mb_track_ylim[[i]] * 0.5, mb_track_ylim[[i]] * 0.5), lty = 1, col = "#00000040", lwd = 0.25) # center line, gray
circos.lines(CELL_META$cell.xlim, c(mb_track_ylim[[i]], mb_track_ylim[[i]]), lty = 1, col = "#00000040", lwd = 0.75) # top line, gray
circos.lines(CELL_META$cell.xlim, c(0, 0), lty = 1, col = "#00000040", lwd = 0.75) # bottom line, gray
circos.lines(CELL_META$cell.xlim, c(mb_track_ylim[[i]] * 0.25, mb_track_ylim[[i]] * 0.25), lty = 1, col = "#00000040", lwd = 0.1) # 2nd quantile line, gray
circos.lines(CELL_META$cell.xlim, c(mb_track_ylim[[i]] * 0.75, mb_track_ylim[[i]] * 0.75), lty = 1, col = "#00000040", lwd = 0.1) # 4th quantile line, gray
if (CELL_META$sector.index %in% "chr1") {circos.yaxis(side = "left", at = c(0, mb_track_ylim[[i]] * 0.5, mb_track_ylim[[i]]), labels = TRUE, tick = TRUE, labels.cex = labels_cex[labels_cex_index], tick.length = convert_x(0.75, "mm"), lwd = 0.25)} else if (CELL_META$sector.index %in% "chr9") {circos.yaxis(side = "right", at = c(0, mb_track_ylim[[i]] * 0.5, mb_track_ylim[[i]]), labels = TRUE, tick = TRUE, labels.cex = labels_cex[labels_cex_index], tick.length = convert_x(0.75, "mm"), lwd = 0.25)}
})
}
}
# render 5th circle: Gene Expression
if (TRUE %in% doGE && length(ge.list.c[[i]][, 1]) > 0) {
if ("TPM" %in% doGE & !"log" %in% doGE) {
CCCM.TPM = colorRamp2(c(0, 100), c("white", "#e51091"))
circos.genomicHeatmap(ge.list.c[[i]], col = CCCM.TPM, numeric.column = 4, side = "inside", border = NA, connection_height = 0.0001, heatmap_height = track_height_par - 0.0001, line_col = NA)
} else if ("log" %in% doGE & !"TPM" %in% doGE) {
CCCM.breakpoint = lapply(ge.list.c, function(x) {
x = max(abs(x[, 5]))
return(x)
})
CCCM.breakpoint = dplyr::bind_rows(CCCM.breakpoint)
CCCM.breakpoint = max(CCCM.breakpoint)
CCCM.log10 = colorRamp2(c(-CCCM.breakpoint, 0, CCCM.breakpoint), c("royalblue3", "white", "red3"))
circos.genomicHeatmap(ge.list.c[[i]], col = CCCM.log10, numeric.column = "log10ratio", side = "inside", border = NA, connection_height = 0.0001, heatmap_height = track_height_par - 0.0001, line_col = NA)
}
}
# render 6th area: Fusion Links
if (doFL == TRUE && length(flf.list.c[[i]][, 1]) > 0) {
circos.genomicLink(flf.list.c[[i]], flt.list.c[[i]], col = fl_intraORinter.list.c[[i]], border = NA, directional = 1, arr.width = 0.05, arr.length = 0.1, arr.type = "triangle")
}
# render 7th area: Gene Labels
if (doFL == TRUE && length(flf.list.c[[i]][, 1]) > 0) {
par(new = TRUE)
circos.clear()
if (speciesID == "hg38" | speciesID == "hg19") {
circos.par(track.height = track_height_par, start.degree = 90, points.overflow.warning = FALSE, clock.wise = TRUE, canvas.xlim = c(-0.775, 0.775), canvas.ylim = c(-0.775, 0.775), gap.degree = c(rep(2, 8), 4, rep(2, 14), 4))
} else if (speciesID == "mm10" | speciesID == "mm9") {
circos.par(track.height = track_height_par, start.degree = 90, points.overflow.warning = FALSE, clock.wise = TRUE, canvas.xlim = c(-0.775, 0.775), canvas.ylim = c(-0.775, 0.775), gap.degree = c(rep(2, 8), 4, rep(2, 11), 4))
}
# canvas.x/ylim: bigger value = closer to center
circos.initializeWithIdeogram(plotType = NULL, species = speciesID)
circos.genomicLabels(lb.list.c[[i]], labels.column = 4, side = "outside", col = lb_intraORinter.list.c[[i]], line_col = lb_intraORinter.list.c[[i]], cex = 0.7, connection_height = 0.05, labels_height = 0.275)
}
# render 8th area: Title
title(main = plot_name[i], line = -4, adj = 0.08)
# render 9th area: Legend
if (doBAF == TRUE && length(baf.list.c[[i]][, 1]) > 0) {
lgd.baf = Legend(at = "BAF", type = "points", legend_gp = gpar(col = "#0d80d8"), title_position = "topleft", title = "\nB-Allele\nFrequency") # legend for BAF
}
if (doCNA == TRUE && length(cna.list.c[[i]][, 1]) > 0) {
lgd.cna = Legend(at = c("Gain", "Loss"), legend_gp = gpar(fill = c("#0dd849", "red")), title_position = "topleft", title = "\nCopy Number\nAlterations") # legend for CNA
}
if (TRUE %in% doMB) {
if (all(c("somatic", "germline") %in% doMB) && (length(mb_somatic_d.list.c[[i]][, 1]) > 0 & length(mb_germline_d.list.c[[i]][, 1]) > 0)) {
lgd.mb = Legend(at = c("Somatic", "Germline"), legend_gp = gpar(fill = c("darkorchid", "plum")), title_position = "topleft", title = "\nMutation\nBurden") # legend for MB, both tracks
} else if ("somatic" %in% doMB && length(mb_somatic_d.list.c[[i]][, 1]) > 0) {
lgd.mb = Legend(at = c("Somatic"), legend_gp = gpar(fill = c("darkorchid")), title_position = "topleft", title = "\nMutation\nBurden") # legend for MB, somatic track only
} else if ("germline" %in% doMB && length(mb_germline_d.list.c[[i]][, 1]) > 0) {
lgd.mb = Legend(at = c("Germline"), legend_gp = gpar(fill = c("plum")), title_position = "topleft", title = "\nMutation\nBurden") # legend for MB, germline track only
}
}
if (TRUE %in% doGE && length(ge.list.c[[i]][, 1]) > 0) {
if ("TPM" %in% doGE & !"log" %in% doGE) {
lgd.ge = Legend(at = c(0, 100), labels = c("0", "100+"), col_fun = CCCM.TPM, title_position = "topleft", title = "\nTPM") # legend for GE TPM
} else if ("log" %in% doGE & !"TPM" %in% doGE) {
lgd.ge = Legend(at = c(-CCCM.breakpoint, 0, CCCM.breakpoint), labels = c(-CCCM.breakpoint, 0, CCCM.breakpoint), col_fun = CCCM.log10, title_position = "topleft", title = "\nExpression\nLog Ratio") # legend for GE log10ratio
}
}
if (doFL == TRUE && length(flf.list.c[[i]][, 1]) > 0) {
if (all(c("gray33", "firebrick3") %in% lb_intraORinter.list.c[[i]])) {
lgd.fl = Legend(at = c("Intra-Chr", "Inter-Chr"), type = "lines", legend_gp = gpar(col = c("gray33", "firebrick3"), lwd = 2), title_position = "topleft", title = "\nFusion\nLinks") # legend for FL, both types
} else if ("gray33" %in% lb_intraORinter.list.c[[i]]) {
lgd.fl = Legend(at = c("Intra-Chr"), type = "lines", legend_gp = gpar(col = c("gray33"), lwd = 2), title_position = "topleft", title = "\nFusion\nLinks") # legend for FL, intra-chr type only
} else if ("firebrick3" %in% lb_intraORinter.list.c[[i]]) {
lgd.fl = Legend(at = c("Inter-Chr"), type = "lines", legend_gp = gpar(col = c("firebrick3"), lwd = 2), title_position = "topleft", title = "\nFusion\nLinks") # legend for FL, inter-chr type only
}
}
circos_moduleStatus = c(lgd.baf = exists("lgd.baf"), lgd.cna= exists("lgd.cna"), lgd.mb = exists("lgd.mb"), lgd.ge = exists("lgd.ge"), lgd.fl = exists("lgd.fl"))
circos_legend = ""
for (j in 1:length(circos_moduleStatus)) {
if (circos_moduleStatus[j] == TRUE) {
if (nchar(circos_legend) == 0 ) {
circos_legend = paste0(circos_legend, names(circos_moduleStatus)[j])
} else {
circos_legend = paste0(circos_legend, ", ", names(circos_moduleStatus)[j])
}
}
}
rm(j)
lgd.list = glue::glue("packLegend({circos_legend})")
lgd.list = eval(parse(text = lgd.list))
suppressWarnings(rm(circos_legend, circos_moduleStatus, lgd.baf, lgd.cna, lgd.mb, lgd.ge, lgd.fl))
pushViewport(viewport(x = 0.005, y = 0.4, width = 0.25, height = 0.25, just = c("left", "bottom"))) # move away from center(0, 0)
grid.draw(lgd.list)
# Clear
circos.clear()
# after running render, save the plot to a png.file
dev.off()
# remove temp variables
suppressWarnings(rm(track_height_par, labels_cex, labels_cex_index, CCCM.TPM, min0offset_circos, CCCM.breakpoint, CCCM.log10, lgd.list))
# progress statement, complete, with timestamp
print(as.character(glue::glue("plot {i} saved in {outputFolderPath}, {Sys.time()}")))
# end of the loop, always remember to reset loop variable "i"
}
rm(i)
# remove extracted global settings
rm(list = names(globalSettings_returnList))
# remove extracted prepareVdata_Circos_returnList variables
rm(list = names(prepareVdata_Circos_returnList))
# end of v_Circos function
print(as.character(glue::glue("v_Circos run completed, output plots saved in {outputFolderPath}")))
return()
}
# preset globalVariables for R CMD check
utils::globalVariables(c("BAF", "CNA", "Chr", "End", "GRCh37G", "GRCh38G", "GRCm38G", "Start", "TPM", "Tumor_Sample_Barcode", "assayID", "chr_len_H", "chr_len_M", "folder_name", "globalSettings_returnList", "groupInfo", "group_by", "grp.list", "grpName", "llpMultiLoader_returnList", "maf_germline.p", "plot_name", "speciesID", "baf.list.c", "cna.list.c", "cna_track_ylim", "fl_intraORinter.list.c", "flf.list.c", "flt.list.c", "ge.list.c", "lb.list.c", "lb_intraORinter.list.c", "mb_germline_d.list.c", "mb_somatic_d.list.c", "mb_track_ylim", "min0offset_circos", "prepareVdata_Circos_returnList", "studyID"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.