Nothing
#' @title draw_step_tree
#' @description Drawing of metabolic process matrix diagram and phylogenetic tree diagram
#' @param infomatrix Matrix generated using the words2steps function
#' @param Matrix The matrix about the steps or transformation or the databases and tools of the metabolic reconstruction.
#' @param stepTypes Grouping information for metabolic processes
#' @param contentTypes Grouping information for metabolic content
#' @return a plot
#' @import dplyr
#' @import ggplot2
#' @import ggtree
#' @import patchwork
#' @import SnowballC
#' @importFrom dplyr full_join
#' @importFrom magrittr `%>%`
#' @importFrom ape as.phylo
#' @importFrom stats hclust dist na.omit
#' @importFrom grDevices colorRampPalette
#' @importFrom RColorBrewer brewer.pal
#' @importFrom rlang .data
#' @importFrom plyr rbind.fill
#' @export
#' @examples
#' \donttest{p1 <- draw_step_tree(matrixProcess, stepsMatrix, stepTypes, contentTypes)}
draw_step_tree <- function(infomatrix, Matrix, stepTypes, contentTypes){
steps <- 1:93
infomatrix <- cbind(steps,infomatrix)
Matrix[is.na(Matrix)] <- 0
Matrix <- data.frame(Matrix)
# Draw the evolutionary tree of metabolic processes
rownames(Matrix) <- Matrix$Steps
data <- Matrix[,-c(1:4)]
#data <- data[apply(data, 1, function(x) any(x)!=0),apply(data, 2, function(x) any(x)!=0)]
data <- data.frame(data)
# Calculate the evolutionary tree distance
tree <- hclust(dist(data))
# Convert to the phylo object
tree <- ape::as.phylo(tree)
# Group information of metabolic processes
stepTypes <- data.frame(stepTypes)
info <- data.frame(label = rownames(data),
group = stepTypes$group[match(rownames(data), stepTypes$label)])
# Integrate the group information
tree <- full_join(tree, info , by='label')
# Color
col <- colorRampPalette(brewer.pal(12,'Paired'))(13)
p2 <- ggtree(tree)+
# Draw evolutionary tree lines with colors sorted by group information
geom_tree(aes(color = .data$group), size = 1)+
# Evolutionary tree with the opening facing down
layout_dendrogram()+
# Set the color information and remove the NA in the legend
scale_color_manual(values = col[1:8],
na.translate=FALSE)+
# Modify the title of the legend
labs(colour = 'Group of steps')+
# Adjust the thickness of the legend lines
guides(colour = guide_legend(override.aes = list(size=2)))
# Extract the information about the top and bottom positions of metabolic processes when the p2 evolutionary tree is drawn,
# which is used to adjust the relative positions of the columns of the matrix
p2$data <- data.frame(p2$data)
roworder <- rownames(p2$data[!is.na(p2$data$label),])
# Draw the evolutionary tree of metabolic content
# Remove the frequency information
data <- t(Matrix[,-c(1:4)])
#data <- data[apply(data, 1, function(x) any(x)!=0),apply(data, 2, function(x) any(x)!=0)]
# Calculate the evolutionary tree distance
tree1 <- hclust(dist(data))
# Convert to the phylo object
tree1 <- ape::as.phylo(tree1)
# The group information of metabolic contents
contentTypes <- data.frame(contentTypes)
info <- data.frame(label = rownames(data), group = contentTypes$group[match(rownames(data), contentTypes$label)])
# Integrate the group information
tree1 <- full_join(tree1, info , by='label')
p3 <- ggtree(tree1)+
geom_tree(aes(color = .data$group), size = 1)+
# The legend position is set to below and placed vertically
theme(legend.position = "bottom", legend.direction = "vertical")+
scale_color_manual(values = col[9:13],
na.translate=FALSE)+
labs(colour = 'Group of contents')+
guides(colour = guide_legend(override.aes = list(size=2)))
# Extract the information about the top and bottom positions of the metabolic content when the p3 evolutionary tree is drawn,
# which is used to adjust the relative positions of the rows of the matrix
p3$data <- data.frame(p3$data)
colorder <- p3$data$label[order(p3$data$y)] %>% na.omit() %>% as.character()
# Since the matrix is plotted with the y-axis flipped,
# the position information of the metabolic content here must also be flipped
colorder <- c('degree', rev(colorder))
infomatrix_2 <- infomatrix[which(infomatrix$steps %in% roworder),colorder]
# Matrix plotting
# Data Organization
stoi <- infomatrix_2[,!colnames(infomatrix_2) %in% 'degree'] %>% t()
stoi <- as.data.frame(stoi)
# Extract non-zero location and number information
coefficientPosi_1 <- which(stoi == -1,arr.ind = T)%>%data.frame()
coefficientPosi_1$num <- -1
coefficientPosi_2 <- which(stoi == 1,arr.ind = T)%>%data.frame()
coefficientPosi_2$num <- 1
coefficientPosi <- rbind(coefficientPosi_1, coefficientPosi_2)
# Find the line with output
f <- c()
for (i in 1:nrow(stoi)) {
if(length(which(stoi[i,] >0)))
f = c(f, i)
}
dim <- dim(stoi)
# Draw the connected line
# Row loop, drawing a line for each row
d <- data.frame()
for (i in f) {
a <- which(stoi[i,] != 0)# Extract the position of each row with non-zero stoichiometric coefficients
# After extracting the position information of each line, then add a line of NA,
# which means to interrupt the line between the previous line and the next line
a <- data.frame(row_x_1 = c(a, NA), col_y_1 = c(rep(i, length(a)), NA))
d <- rbind(d,a)
}
rownames(d) <- 1:nrow(d)
# Draw the connection line of the column
# Column loop, drawing a line for each column
e <- data.frame()
for (i in 1:dim[2]) {
a = which(stoi[,i] != 0)# Extract the position of each column with non-zero stoichiometric coefficients
# After extracting the position information of each row, then add a line of NA,
# the meaning is to interrupt the line between the previous column and the next column
a = data.frame(row_x_2 = c(rep(i, length(a)),NA), col_y_2 = c(a, NA), degree = c(rep(infomatrix[i,1], length(a)),NA))
e = rbind(e,a)
}
rownames(e) <- 1:nrow(e)
# Judgment of arrows in each line
# Create an empty data data frame to store the arrow coordinates in each row
arrow_row <- data.frame()
# Arrows
# Positive stoichiometric coefficients point to negative stoichiometric coefficients in the rows
for (i in 1:dim[1]) {
# Judge information on the location of the stoichiometric coefficient less than zero in the row
posi <- which(stoi[i,] < 0)
if(length(posi) > 0){# Determine if there are rows less than zero
for (ii in posi) {# All negative coefficients in the row participate in the loop,
# drawing an arrow pointing to it
if(max(stoi[i,1:(ii-1)]) > 0){# Determine if there is the coefficient greater than zero to the left of the negative coefficient
arrow_row <- rbind(arrow_row, c(c(ii-0.35, i, ii-0.1, i)))# Combine arrow position information in the rows
if(ii != ncol(stoi)){# When the position of the negative coefficient is not on the far right
if(max(stoi[i,(ii+1):ncol(stoi)]) > 0){# Determine if there is the coefficient greater than zero to the right of the negative coefficient
arrow_row <- rbind(arrow_row, c(ii+0.35, i, ii+0.1, i))# Combine arrow position information in the rows
}
}
} else {# Determine if there is the coefficient greater than zero to the right of the negative coefficient
if(ii != ncol(stoi)){# When the position of the negative coefficient is not on the far right
if(max(stoi[i,(ii+1):ncol(stoi)]) > 0){
arrow_row <- rbind(arrow_row, c(ii+0.35, i, ii+0.1, i))# Combine arrow position information in the rows
}
}
}
}
} else {
next
}
}
if(all(dim(arrow_row))){
# Change column names of data frame
colnames(arrow_row) <- c('x1', 'y1', 'xend1', 'yend1')
rownames(arrow_row) <- 1:nrow(arrow_row)
} else {
arrow_row <- data.frame(NA, NA, NA, NA)
colnames(arrow_row) <- c('x1', 'y1', 'xend1', 'yend1')
rownames(arrow_row) <- 1:nrow(arrow_row)
}
# Judgment of arrows in each column
# Create an empty data frame to store the arrow coordinates in each column
arrow_col <- data.frame()
# Negative stoichiometric coefficients point to positive stoichiometric coefficients in the column
for (i in 1:dim[2]) {
posi <- which(stoi[,i] > 0)
if(length(posi) > 0){
for (ii in posi) {
if(min(stoi[1:(ii-1), i]) < 0){
arrow_col <- rbind(arrow_col, c(c(i, ii-0.5, i, ii-0.25)))
if(ii != nrow(stoi)){
if(min(stoi[(ii+1):nrow(stoi), i]) < 0){
arrow_col <- rbind(arrow_col, c(c(i, ii+0.5, i, ii+0.25)))
}
}
} else {
if(ii != nrow(stoi)){
if(min(stoi[(ii+1):nrow(stoi), i]) < 0){
arrow_col <- rbind(arrow_col, c(c(i, ii+0.5, i, ii+0.25)))
}
}
}
}
} else {
next
}
}
if(all(dim(arrow_col))){
# Change column names of data frame
colnames(arrow_col) <- c('x2', 'y2', 'xend2', 'yend2')
rownames(arrow_col) <- 1:nrow(arrow_col)
} else {
arrow_col = data.frame(NA, NA, NA, NA)
colnames(arrow_col) <- c('x2', 'y2', 'xend2', 'yend2')
rownames(arrow_col) <- 1:nrow(arrow_col)
}
# Because the number of rows in each matrix is not the same and column merging with cbind will report an error, the rbind.fill function is used
pwdata <- data.frame(t(plyr::rbind.fill(as.data.frame(t(coefficientPosi)), as.data.frame(t(d)),
as.data.frame(t(e)), as.data.frame(t(arrow_row)),as.data.frame(t(arrow_col)))))
colnames(pwdata) <- c('row_x', 'col_y', 'text', 'row_x_1', 'col_y_1', 'row_x_2', 'col_y_2', 'degree',
'x1', 'y1', 'xend1', 'yend1', 'x2', 'y2', 'xend2', 'yend2')
# Append information metabolite name and chemical reaction name, and add NA according to pwdata row number
pwdata <- cbind(pwdata, colname = c(colnames(stoi), rep(NA, (nrow(pwdata)-dim[2]))),
rowname = c(str_replace_all(rownames(stoi), '\\.', ' '), rep(NA, (nrow(pwdata)-dim[1]))))
pwdata$row_x_2[(nrow(pwdata)-3):nrow(pwdata)] <- c(1,1,93,93)
pwdata$col_y_2[(nrow(pwdata)-3):nrow(pwdata)] <- c(1,66,1,66)
#The maximum degree is 30
pwdata$degree[nrow(pwdata)] <- 30
p1 <- ggplot(data = pwdata, aes(colour = .data$degree)) +
# Change y-axis scale label to metabolite name
scale_y_reverse(name = 'Contents', breaks = seq(1, dim[1], 1), expand = c(0.01,0.01),
labels = pwdata$rowname[!is.na(pwdata$rowname)], position = 'right') +
# The x-axis is set at the top, and the x-axis scale label is changed to the reaction name.
scale_x_continuous(name = 'Steps', breaks = seq(1, dim[2], 1), expand = c(0.008,0.008),
labels = pwdata$colname[!is.na(pwdata$colname)]) +
# Draw the connecting lines for each row
geom_path(aes(x = .data$row_x_1, y = .data$col_y_1), na.rm = T, linewidth = 1) +
# Draw the connecting lines for each column
geom_path(aes(x = .data$row_x_2, y = .data$col_y_2, colour = .data$degree), na.rm = T, linewidth = 1, show.legend = T) +
# Change axis label format, rotation, color, font, etc.
theme(axis.text.x = element_text(colour = "grey20", size = 10, angle = 90, hjust = 1, vjust = 0, face = "plain"),
axis.text.y = element_text(colour = "grey20", size = 10, angle = 0, hjust = 1, vjust = 0, face = "plain"))+
# Draw the arrows for each row
geom_segment(aes(x = .data$x1, y = .data$y1,xend = .data$xend1, yend = .data$yend1),
arrow = arrow(length = unit(0.2, "cm"), angle = 20), colour = "red", lwd = 0.5, na.rm = T) +
# Draw the arrows for each column
geom_segment(aes(x = .data$x2, y = .data$y2,xend = .data$xend2, yend = .data$yend2),
arrow = arrow(length = unit(0.2, "cm"), angle = 20), colour = "red", lwd = 0.5, na.rm = T) +
# To ensure that there is no overlap between the straight lines and the text of the measurement coefficients,
# an alternative approach is to add white labels at the corresponding positions
geom_point(data = filter(pwdata, .data$text == -1), aes(x = .data$col_y, y = .data$row_x), size = 3, color = "#8ecfc9", na.rm = T)+
geom_point(data = filter(pwdata, .data$text == 1), aes(x = .data$col_y, y = .data$row_x), size = 3, color = "#FA7F6F", na.rm = T)+
# It should be noted that the parameter values of scale_color_gradientn is according to the scale, not the actual value,
# for the actual range of the graph degree of archaea is 0-7
# To ensure that the gradient colors of archaea, bacteria and eukaryotes are comparable,
# we need to set a false degree: 30 for archaea, which is the value given by the code in line 211
scale_color_gradientn(values = seq(0,1,0.1),
colours = colorRampPalette(c("#8ecfc9", '#82B0D2', '#BEB8DC', '#FFBE7A', "#FA7F6F"))(10))+
# Remove the subgrid lines
theme(panel.grid.minor = element_blank())
# Layout format between the three figures
design <- "#A
BC"
p <- wrap_plots(A = p2, B = p3, C = p1,
design = design)+
# Legend integrated into one piece
plot_layout(guides = 'collect')+
# B and C, A and C graphics ratio set to 1:3
plot_layout(widths = c(1, 3))+
plot_layout(heights = c(1, 3))
return(p)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.