#' mutTrunkBranch
#' @description Summarize and conduct paired Fisher test of mutations of trunk/branches in a phylogenetic tree.
#'
#' @param phyloTree phyloTree or phyloTreeList object generated by \code{\link{getPhyloTree}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included
#' @param CT Distinction between C>T at CpG and C>T at other sites. (Default: FALSE).
#' @param pvalue Confidence level of the interval for Fisher test. Default 0.05.
#' @param plot Logical. (Default: TRUE).
#'
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#'
#' ## Load a reference genome.
#' library(BSgenome.Hsapiens.UCSC.hg19)
#'
#' phyloTree <- getPhyloTree(maf, patient.id = 'V402')
#' mutTrunkBranch(phyloTree, plot = TRUE)
#'
#' @return a list of box plots based on mutational categories
#' @importFrom data.table data.table
#' @importFrom stats fisher.test
#' @export mutTrunkBranch
mutTrunkBranch <- function(phyloTree,
patient.id = NULL,
CT = FALSE,
pvalue = 0.05,
plot = TRUE){
processMTB <- function(phyloTree){
patient <- getPhyloTreePatient(phyloTree)
## get trinucleotide matrix
tsb.label <- getPhyloTreeTsbLabel(phyloTree)
if(nrow(tsb.label) > 0){
tri_matrix <- tri_matrix_list[[patient]]$tri_matrix
}else{
tri_matrix <- tri_matrix_list[[patient]]
}
## set groups
if(CT){
group_level <- c("C>A","C>G","C>T at CpG","C>T other","T>A","T>C","T>G")
}else{
group_level <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G")
}
## separate trunk and branch data
tri_matrix_trunk <- tri_matrix["Trunk", ]
tri_matrix_branch <- tri_matrix["Branches", ]
if(is(tri_matrix_branch, "matrix")){
tri_matrix_branch <- colSums(tri_matrix_branch)
}
tri_matrixBT <- rbind(Trunk=tri_matrix_trunk, Branch=tri_matrix_branch)
## mutation spectrum
if(CT){
BT_spectrum <- data.frame(Type = colnames(tri_matrixBT), t(tri_matrixBT)) %>%
dplyr::rowwise() %>%
dplyr::mutate(Group = dplyr::case_when(
grepl("CpG",.data$Type) ~
"C>T at CpG",
grepl("other",.data$Type) ~
"C>T other",
!grepl("CpG|other",.data$Type) ~
paste(strsplit(as.character(.data$Type) ,"")[[1]][3:5], collapse = "")
)) %>%
as.data.frame() %>%
tidyr::pivot_longer(
cols = c("Trunk","Branch"),
names_to = "BT",
values_to = "mut_num")
}else{
BT_spectrum <- data.frame(Type = colnames(tri_matrixBT), t(tri_matrixBT)) %>%
dplyr::rowwise() %>%
dplyr::mutate(Group = paste(strsplit(as.character(.data$Type) ,"")[[1]][3:5], collapse = "")) %>%
as.data.frame()%>%
tidyr::pivot_longer(
cols = c("Trunk","Branch"),
names_to = "BT",
values_to = "mut_num")
}
# print(unique(BT_spectrum$Type))
## fisher test
df_pvalue_list <- lapply(group_level, function(group){
b1 <- sum(BT_spectrum[BT_spectrum$Group == group & BT_spectrum$BT == "Branch", ]$mut_num)
b2 <- sum(BT_spectrum[BT_spectrum$Group != group & BT_spectrum$BT == "Branch", ]$mut_num)
t1<- sum(BT_spectrum[BT_spectrum$Group == group & BT_spectrum$BT == "Trunk", ]$mut_num)
t2 <- sum(BT_spectrum[BT_spectrum$Group != group & BT_spectrum$BT == "Trunk", ]$mut_num)
if(all(!is.nan(b1)) & all(!is.nan(t1))){
m <- matrix(c(b1, t1, b2, t2),ncol = 2)
pValue <- stats::fisher.test(m,alternative = "two.sided")$p.value
}
else{
pValue <- NA
}
sub <- data.frame(group, pValue)
colnames(sub) <- c("Group", "P_Value")
return(sub)
})
df_pvalue <- dplyr::bind_rows(df_pvalue_list) %>% as.data.table()
## generate output data.frame with quantity of mutations in different categories
BT_summary <- BT_spectrum %>%
dplyr::group_by(.data$BT, .data$Group) %>%
dplyr::summarise(mut_num = sum(.data$mut_num)) %>%
tidyr::pivot_wider(names_from = "BT",
values_from = "mut_num") %>%
merge(df_pvalue,by = "Group") %>%
dplyr::mutate(Patient_ID = patient) %>%
dplyr::select("Patient_ID", "Group", "Trunk", "Branch", "P_Value")
if(plot){
trunk_num <- sum(BT_summary$Trunk)
branch_num <- sum(BT_summary$Branch)
if(sum(BT_summary$Trunk) == 0){
BT_summary$Trunk <- 0
}else{
BT_summary$Trunk <- BT_summary$Trunk/sum(BT_summary$Trunk)
}
if(sum(BT_summary$Branch) == 0){
BT_summary$Branch <- 0
}else{
BT_summary$Branch <- BT_summary$Branch/sum(BT_summary$Branch)
}
## set data table for bar plot
dat <- BT_summary %>%
tidyr::pivot_longer(cols = c("Trunk","Branch"),
names_to = "BT",
values_to = "fraction") %>%
as.data.frame()
dat$Group <- factor(dat$Group, levels = unique(group_level))
dat <- dplyr::arrange(dat, dplyr::desc(.data$Group))
## get position
dat <- as.data.table(dat)
dat$rect.xmin <- 0
dat$rect.xmax <- 0
dat$rect.ymin <- 0
dat$rect.ymax <- 0
i <- 0
i_list1 <- seq(0, 0.15*(length(unique(dat$BT))-1), 0.15)
i_list2 <- i_list1 + 0.1
d_list <- lapply(seq_len(length(unique(dat$BT))), function(i){
bt <- unique(dat$BT)[i]
d <- dat[dat$BT == bt,]
d$rect.xmin <- i_list1[i]
d$rect.xmax <- i_list2[i]
## get position for y axis
yy <- cumsum(d$fraction)
len.yy <- length(yy)
d$rect.ymin <- c(0,yy[-len.yy])
d$rect.ymax <- yy
return(d)
})
dat <- dplyr::bind_rows(d_list) %>% as.data.table()
## set table for number of mutation
num_table_list <- lapply(unique(dat$BT), function(bt){
xmin <- min(dat[dat$BT == bt]$rect.xmin)
xmax <- max(dat[dat$BT == bt]$rect.xmax)
x <- xmin + (xmax-xmin)/2
if(bt == "Trunk"){
sub <- data.table(x = x, y = 1.03, num = trunk_num)
}else{
sub <- data.table(x = x, y = 1.03, num = branch_num)
}
return(sub)
})
num_table <- dplyr::bind_rows(num_table_list) %>% as.data.table()
## set table for pvalue
pvalue_table_list <- lapply(unique(dat$Group), function(g){
v <- unique(dat[dat$Group == g]$P_Value)
if(is.na(v)){
return(data.table())
}
if(v < pvalue){
x <- max(dat[dat$Group == g]$rect.xmax)
ymin <- dat[dat$Group == g & dat$BT == "Branch"]$rect.ymin
ymax <- dat[dat$Group == g& dat$BT == "Branch"]$rect.ymax
y <- ymin + (ymax - ymin)/2
sub <- data.table(x,y,label = "*")
return(sub)
}else{
return(data.table())
}
})
pvalue_table <- dplyr::bind_rows(pvalue_table_list) %>% as.data.table()
## set segment from trunk to branches
segment_table_list <- lapply(unique(dat$Group), function(g){
fractions <- dat[dat$Group == g]$fraction
if(all(fractions == 0)){
return(data.table())
}
x1 <- dat[dat$Group == g& dat$BT == "Trunk"]$rect.xmax
x2 <- dat[dat$Group == g& dat$BT == "Branch"]$rect.xmin
y1 <- dat[dat$Group == g& dat$BT == "Trunk"]$rect.ymax
y2 <- dat[dat$Group == g& dat$BT == "Branch"]$rect.ymax
sub <- data.table(x1,x2,y1,y2)
return(sub)
})
segment_table <- dplyr::bind_rows(segment_table_list) %>% as.data.table()
## set color
if(CT){
all_color <- c("C>A" = "#E64B35FF",
"C>G" = "#4DBBD5FF",
"T>A" = "#3C5488FF",
"T>C" = "#F39B7FFF",
"T>G" = "#8491B4FF",
"C>T at CpG" = "#00A087FF",
"C>T other" = "#91D1C2FF")
}else{
all_color <- c("C>A" = "#E64B35FF",
"C>G" = "#4DBBD5FF",
"C>T" = "#00A087FF",
"T>A" = "#3C5488FF",
"T>C" = "#F39B7FFF",
"T>G" = "#8491B4FF")
}
group_color <- all_color[unique(dat$Group)]
## initialize variable in ggplot for biocheck error
rect.xmin <- NULL
rect.xmax <- NULL
rect.ymin <- NULL
rect.ymax <- NULL
Group <- NULL
num <- NULL
label <- NULL
x <- NULL
x1 <- NULL
x2 <- NULL
y <- NULL
y1 <- NULL
y2 <- NULL
## plot
pic <- ggplot() +
geom_rect(data = dat,
aes(xmin = rect.xmin, xmax = rect.xmax,
ymin = rect.ymin, ymax = rect.ymax,
fill = Group),color = "black")+
# ## lable patientid
# geom_text(data = patient.text.table,
# aes(x = p.x, y = 1.1, label = Patient_ID),size = 6)+
ggtitle(patient) +
# label number of mutation
geom_text(data = num_table,
aes(x = x, y = y, label = num),size = 4)+
geom_segment(aes(y = 0 ,yend = 1,x=-Inf,xend=-Inf), size = 1.5)+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(hjust = 0.48,size = 13.5,vjust = -2),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 11,colour = "black"),
axis.text.x = element_text(angle = 60,size = 11,colour = "black",hjust = 1,margin = margin(t = -10)),
axis.text.y = element_text(size = 10,colour = "black"),
axis.ticks.length.y = unit(.25, "cm"),
axis.ticks.y = element_line(size = 1)) +
scale_fill_manual(values = group_color) +
xlab("") +
ylab("Proportion")+
# ggtitle(unique(dat$Patient_ID)) +
scale_y_continuous(breaks = c(0,0.25,0.50,0.75,1))+
scale_x_continuous(breaks = unique(dat$rect.xmin+(dat$rect.xmax - dat$rect.xmin)/2) ,
labels = rep(c("Trunk","Branches"),length(unique(dat$Patient_ID))))
## label significant pvalue
if(nrow(pvalue_table) > 0){
pic <- pic +
geom_text(data = pvalue_table,
aes(x = x+0.01, y = y, label = label),
size = 7,vjust = 0.8)
}
## segment between trunk and branches
if(nrow(segment_table) > 0){
pic <- pic +
geom_segment(data = segment_table,
aes(x = x1, xend = x2, y = y1, yend = y2))
}
}
if(plot){
return(list(mutTrunkBranch.res = BT_summary, mutTrunkBranch.plot = pic))
}else{
return(list(mutTrunkBranch.res = BT_summary))
}
}
## preprocess input data
phyloTree_input <- subPhyloTree(phyloTree, patient.id = patient.id)
## get trinucleotide matrix
tri_matrix_list <- subTriMatrix(phyloTree_list = phyloTree_input, CT = CT, level = "6")
result <- lapply(phyloTree_input, processMTB)
result <- result[!is.na(result)]
if(length(result) == 1){
return(result[[1]])
}else if(length(result) == 0){
return(NA)
}else{
return(result)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.