#' Visualize a tidy pedigree
#'
#' \code{visped} function draws a graph of a full or compact pedigree.
#'
#' This function takes a pedigree tidied by the \code{\link{tidyped}} function, outputs a hierarchical graph for all individuals in the pedigree. The graph can be shown on the defaulted graphic device and be saved in a pdf file. The graph in the pdf file is a vector drawing, is legible and isn't overlapped. It is especially useful when the number of individuals is big and the width of individual label is long in one generation. This function can draw the graph of a very large pedigree (> 10,000 individuals per generation) by compacting the full-sib individuals. It is very effective for drawing the pedigree of aquatic animal, which usually including many full-sib families per generation in the nucleus breeding population. The outline of a pedigree without individuals' label is still shown if the width of a pedigree graph is longer than the maximum width (200 inches) of the pdf file.
#'
#' In the graph, two shapes and three colors are used. Circle is for individual, square is for family. Dark sky blue means male, dark golden rod means female, dark olive green means unknown sex. For example, one circle with dark sky blue means a male individual; One square with dark golden rod means all female individuals in a full-sib family when \code{compact = TRUE}.
#'
#' @param ped A data.table including the pedigree tidied by the \code{\link{tidyped}} function with the parameter \code{addnum=TRUE}. It is recommended that the pedigree is tidied and pruned by candidates using the \code{\link{tidyped}} function with the not null parameter \code{cand}.
#' @param compact A logical value indicating whether IDs of full-sib individuals in one generation will be deleted and replaced with the number of full-sib individuals. For example, if there are 100 full-sib individuals in one generation, they will be deleted from the pedigree and be replaced with one individual label of "100" when \code{compact = TRUE}. The default value is FALSE
#' @param outline A logical value indicating whether shapes without label will be shown. A graph of the pedigree without individuals' label is shown when setting \code{outline = TRUE}. It is very useful for viewing the outline of the pedigree and finding the immigrant individuals in each generation when the width of a pedigree graph is longer than the maximum width (200 inches) of the pdf file. The defaulted value is FALSE.
#' @param cex NULL or a numeric value changing the size of individual label shown in the graph. \emph{cex} is an abbreviation of character expansion factor. \code{visped} function will try to guess (\code{cex=NULL}) the matched cex value and returned it in the messages. According to the returned cex of the last run, this parameter should be increased if the label's width is longer than that of the shape in the output pdf file; Contrariwise, this parameter should be decreased if the label's width is shorter than that of the shape in the output pdf file; then rerunning \code{visped} function. The default value is NULL.
#' @param showgraph A logical value indicating whether a plot will be shown in the defaulted graphic device, such as the Plots panel of Rstudio. It is useful for quick viewing of the pedigree graph without opening the pdf file. However, the graph on the defaulted graphic device may be not legible, such as overlapped labels, aliasing lines due to the restricted width and height. It's a good choice to set \code{showgraph = FALSE} when the pedigree is large. The default value is TRUE.
#' @param file NULL or a character value means whether the pedigree graph will be saved in a pdf file. The graph in the pdf file is a legible vector drawing, and labels isn't overlapped especially when the number of individuals is big and width of the individual label is long in one generation. It is recommended that saving a pedigree graph in the pdf file. The default value is NULL.
#' @return No returned values. The graph will be plotted directly on graphic devices.
#'
#' @examples
#' library(data.table)
#' # Drawing a simple pedigree
#' simple_ped
#' simple_ped_tidy <- tidyped(simple_ped)
#' visped(simple_ped_tidy)
#' # Drawing a simple pedigree of a individual with id of J5X804
#' simple_ped_J5X804_tidy <- tidyped(simple_ped,cand=c("J5X804"))
#' visped(simple_ped_J5X804_tidy)
#' # Drawing the graph in the pdf file
#' visped(simple_ped_J5X804_tidy,file="output.pdf")
#' # Drawing a compact pedigree
#' # The candidates' labels in 2007
#' cand_labels <- big_family_size_ped[(Year == 2007) & (substr(Ind,1,2) == "G8"),Ind]
#' big_ped_tidy <- tidyped(big_family_size_ped,cand=cand_labels)
#' visped(big_ped_tidy,compact=TRUE)
#' visped(big_ped_tidy,compact=TRUE, file="output.pdf")
#' # Individual labels are not shown
#' visped(big_ped_tidy,compact=TRUE, outline=TRUE, file="output.pdf")
#'
#' @import data.table
#' @import igraph
#' @importFrom grDevices pdf dev.off
#' @importFrom graphics strwidth
#' @export
visped <- function(ped,
compact = FALSE, outline = FALSE, cex = NULL, showgraph = TRUE, file = NULL) {
if (is.null(attributes(ped)$tidyped)) {
stop("The pedigree need to be firstly trimmed by the tidyped() function!")
} else {
if (!attr(ped,"tidyped")) {
stop("The pedigree need to be firstly trimmed by the tidyped() function!")
}
}
ped_new <- copy(ped)
# Convertting pedigree to nodes and edges data.table
# Reserved digits
fixed_digits <- 7
# Digits when calculating
options(digits=20)
# IndNum, SireNum, and DamNum columns are used as IDs to node and edges
#ped_col_names <- names(ped)
# if (!all(c("IndNum", "SireNum", "DamNum") %in% ped_col_names)) {
# stop("The pedigree need to be firstly trimmed by the tidyped() function!")
# }
ped_igraph <- ped2igraph(ped_new, compact)
real_node <- ped_igraph$node[nodetype %in% c("real", "compact")]
gen_node_num <- real_node[, .N, by = gen]
gen_max_size <- max(gen_node_num$N, na.rm = TRUE)
#=== Obtaining the maximum width of nodes' label =====================================================
# The maximum width of a pdf file is 200 inch
pdf_max_width = 200
cexs <- seq(from = 0.1, to = 1, by = 0.05)
best_cex <- 0
max_strwidth_label <- real_node[which.max(strwidth(real_node$label, cex = 1, units = "inches")), label]
for (i in length(cexs):1) {
# Obtaining the maximum width of a node's label: inch
label_max_width <- max(strwidth(max_strwidth_label, cex = cexs[i], units = "inches"),
na.rm = TRUE)
# Fixing the width of the node when the number of nodes (individuals) is small in one generation
# The unit of 0.8 is inch, about 2cm for the width of one node
if (gen_max_size <= 16 & label_max_width < 0.8) {
label_max_width = 0.8
}
if ((label_max_width * gen_max_size) < pdf_max_width) {
best_cex <- cexs[i] * 0.7
break
}
}
if (!outline & best_cex == 0) {
stop(
"Too many individuals (>=",
gen_max_size,
") in one generation!!! Two choices:\n", "1. Removing full-sib individuals using the parameter compact = TRUE; or, \n",
"2. Visualizing all nodes without labels using the parameter outline = TRUE.\n",
"Rerun visped() function!")
}
#=== Generating the hierarchy layout of all nodes using the sugiyama algorithm =======
hgap <- round(1 / gen_max_size, 8)
gen_num <- max(real_node$gen, na.rm = TRUE)
max_layer <- max(ped_igraph$node$layer, na.rm = TRUE)
# Restoring the IDs for all nodes in each layer
layers <- vector(mode = "list", length = max_layer)
k <- 1
for (i in max_layer:1) {
layers[[k]] <- ped_igraph$node[layer == i, id]
k <- k + 1
}
g <- graph_from_data_frame(ped_igraph$edge, directed = TRUE, ped_igraph$node)
l <- layout_with_sugiyama(g,
layers = apply(sapply(layers, function(x)
V(g)$name %in% x)
, 1
, which),
hgap = hgap,
maxiter = 120,
attributes = "all")$layout
l <- norm_coords(l,
xmin = 0,
xmax = 1,
ymin = 0,
ymax = 1)
ped_igraph$node <- cbind(ped_igraph$node, x = l[, 1], y = l[, 2])
real_node <- ped_igraph$node[nodetype %in% c("real", "compact")]
#===Repelling duplicated x positions=================================================
for (i in 1:gen_num) {
v_rank <- rank(real_node[gen == i, x], na.last = TRUE, ties.method = "first")
x_sorted <- sort(real_node[gen == i, x])
x_sorted <- round(x_sorted,fixed_digits)
x_new <- repeloverlap(x_sorted)
real_node[gen == i, x := x_new[v_rank]]
}
ped_igraph$node[nodetype %in% c("real", "compact")] <- real_node
#=== Adjusting space between two nodes (individuals) in x axis for each generation ==
if ((!outline) & gen_max_size >= 2) {
x_stats_gen <-
real_node[, .(.N, range = max(x, na.rm = TRUE) - min(x, na.rm = TRUE)),
by =gen]
meanspace = NULL # due to NSE notes in R CMD check
x_stats_gen[range > 0 & N > 1, ":="(meanspace = range / (N - 1))]
l_x_range <- range(l[, 1], na.rm = TRUE)
l_x_distance <- diff(l_x_range)
max_gen_mean_space <- max(x_stats_gen$meanspace, na.rm = TRUE)
for (i in 1:gen_num) {
v_rank <- rank(real_node[gen == i, x], na.last = TRUE, ties.method = "first")
node_num <- length(v_rank)
if (node_num >= 2) {
x_distance_1 <- diff(range(real_node[gen == i, x], na.rm = TRUE))
x_distance_2 <- (node_num - 1) * max_gen_mean_space
if (x_distance_2 > l_x_distance) {
x_distance_2 <- l_x_distance
}
mean_space <- round(x_distance_2 / (node_num - 1), 8)
a <- x_distance_2 - x_distance_1
x_min <- min(real_node[gen == i, x], na.rm = TRUE)
b <- x_min - a
if (b > 0) {
x_min <- b
} else {
x_min <- 0
}
x_new <- x_min + seq(from = 0, to = node_num - 1) * mean_space
real_node[gen == i, x := x_new[v_rank]]
}
}
ped_igraph$node[nodetype %in% c("real", "compact")] <- real_node
}
#=== Matching a virtual node's x pos to the samllest position of the full-sib =======
# A virutal node is a tie between two parents and their progenies
virtual_node <- ped_igraph$node[nodetype %in% c("virtual")]
for (i in 2:gen_num) {
real_family_min_x <- real_node[gen == i, .(minx = min(x,na.rm=TRUE)),by=c("familylabel")]
virtual_family_label <- virtual_node[gen == i, familylabel]
min_x <- real_family_min_x[match(virtual_family_label, familylabel),minx]
virtual_node[gen == i, x := min_x]
}
ped_igraph$node[nodetype %in% c("virtual")] <- virtual_node
# l[,1] <- ped_igraph$node[match(V(g)$name,as.character(id)),x]
l[, 1] <- ped_igraph$node[, x]
#=== Rescalling canvas' size, node's size and edge's size ==============================
# calculate the width of each node: inch
node_width_s <- label_max_width
if (!outline) {
if (gen_max_size >= 2) {
x_stats_gen <-
real_node[, .(.N, range = max(x, na.rm = TRUE) - min(x, na.rm = TRUE)),
by = gen]
x_stats_gen[range > 0 & N > 1, ":="(meanspace = range / (N - 1))]
min_node_space <-
min(x_stats_gen[meanspace > 0, meanspace], na.rm = TRUE)
f <- 1
if (max(x_stats_gen$N) <= 16) {
# increase large space between two nodes when the node number is small
f <- 3 * round(node_width_s / min_node_space, 8)
}
else {
# keep samll space between two nodes when the node number are big
f <- round(node_width_s / min_node_space, 8)
}
} else {
f <- 1
}
x_f <- f * (l[, 1])
# Setting the width of the canvas
# Adding extra 6 node width to the canvas's width to decerase node size
# when only have one node in one layer
# because node size is equal to the percentage of node width to the canvas width
canvas_width_s <- max(x_f, na.rm = TRUE) - min(x_f, na.rm = TRUE) + 6 * node_width_s
}
# Finding better node_width when the number of nodes is not too big.
if (outline) {
# Maybe < 20,000,000 nodes could be shown in one generation
node_width_s <- 0.0001
node_width_v <- seq(from=label_max_width, to=node_width_s,by=-0.0001)
canvas_width_v <- node_width_v * gen_max_size
if (min(canvas_width_v,na.rm = TRUE) > pdf_max_width) {
stop("The outline of the pedigree is not shwon due to too many nodes in one genertion")
}
canvas_width_v <- sort(canvas_width_v)
canvas_width_s <- max(canvas_width_v[canvas_width_v < pdf_max_width])
}
# The maximum width or height for a pdf file is 200 inch
pdf_maximum_width <- pdf_maximum_height <- pdf_max_width
if (canvas_width_s > pdf_maximum_width) {
canvas_width_s <- pdf_maximum_width
}
if (canvas_width_s <= 10) {
canvas_width_s <- 10
}
canvas_height <- canvas_width_s * 0.618
# inch
gen_height <- 0.618
if (canvas_height < gen_num * (node_width_s) + 3 * node_width_s) {
canvas_height <- gen_num * (node_width_s) + 3 * node_width_s
}
if (canvas_height > pdf_maximum_height) {
canvas_height <- pdf_maximum_height
}
# node_size is a percentage of the width of node to graph
node_size <- round(node_width_s * 100 / canvas_width_s, 8)
edge_size <- node_size * 0.001
edge_arrow_size <- node_size * 0.002
edge_arrow_width <- node_size * 0.006
V(g)$size[V(g)$nodetype %in% c("real", "compact")] = node_size
if (outline) {
V(g)$label <- ""
}
if (!outline) {
if (is.null(cex) & (best_cex > 0)) {
V(g)$label.cex[V(g)$nodetype %in% c("real", "compact")] = best_cex
} else {
V(g)$label.cex[V(g)$nodetype %in% c("real", "compact")] = cex
}
}
E(g)$size = edge_size
E(g)$arrow.size = edge_arrow_size
E(g)$arrow.width = edge_arrow_width
#===Draw the peidgree================================================================
if (showgraph) {
plot.igraph(
g,
rescale = FALSE,
xlim = c(0 - node_size / 100, 1 + node_size / 100),
ylim = c(1 + node_size / 100, 0 - node_size / 100),
layout = l,
asp = 0
)
}
if (!is.null(file)) {
pdf(file = file,
width = canvas_width_s,
height = canvas_height)
plot.igraph(
g,
rescale = FALSE,
xlim = c(0 - node_size / 100, 1 + node_size / 100),
ylim = c(1 + node_size / 100, 0 - node_size / 100),
layout = l,
asp = 0
)
dev.off()
message(paste("The vector drawing of the pedigree is saved in the ",
getwd(),
"/",file," file",sep=""))
}
if (!outline) {
if (is.null(cex)) {
message(paste("The cex for individual label is ", best_cex, ".", sep = ""))
} else {
message(paste("The cex for individual label is ", cex, ".", sep = ""))
}
message("Please decrease or increase the value of the parameter cex if the label's width is longer or shorter than that of the circle or square in the graph.")
}
if (is.null(file)) {
message("It is recommended that the pedigree graph is saved in the pdf file using the parameter file")
message("The graph in the pdf file is a vector drawing: shapes, labels and lines are legible; shapes and labels isn't overlapped.")
}
options(digits=7)
}
ped2igraph <- function(ped,compact=TRUE) {
ped_new <- copy(ped)
ped_col_names <- colnames(ped_new)
# There is the Cand column in the pedigree if it is traced by the tidyped function
if (c("Cand") %in% ped_col_names) {
ped_node <-
ped_new[, .(
id = IndNum,
label = Ind,
sirenum = SireNum,
damnum = DamNum,
sirelabel = Sire,
damlabel = Dam,
cand = Cand,
sex = Sex,
gen = Gen
)]
} else {
ped_node <-
ped_new[, .(
id = IndNum,
label = Ind,
sirenum = SireNum,
damnum = DamNum,
sirelabel = Sire,
damlabel = Dam,
sex = Sex,
gen = Gen
)]
}
max_id <- max(ped_node$id,na.rm = TRUE)
# Adding two new columns family label (column name: familylabel) and it's numeric id
# (column name: familynum) in the ped_node
familylabel = NULL # due to NSE notes in R CMD check
ped_node[!(is.na(sirelabel) &
is.na(damlabel)),
familylabel := paste(sirelabel, damlabel, sep = "")]
family_label <- unique(ped_node$familylabel)
family_label <- family_label[!is.na(family_label)]
family_num <-
setDT(list(
familynum = seq(
from = max(ped_node$id,na.rm=TRUE) + 1,
to = max(ped_node$id,na.rm=TRUE) + length(family_label)
),
familylabel = family_label
))
ped_node <-
merge(ped_node,
family_num,
by = c("familylabel"),
all.x = TRUE)
ped_node[is.na(familynum), familynum := 0]
# There will be three node types in the ped_note, including real, compact, and virtual.
# Real nodes are all individuals in the pedigree.
# Compact nodes are full-sib individuals with parents, but without progeny,
# they exist only when the "compact" paramete is TRUE
nodetype = NULL # due to NSE notes in R CMD check
ped_node[,nodetype:="real"]
#=== Compact the pedigree============================================================
# Full-sib individuals with parents but without progeny will be deleted from ped_note.
# count individuals by family and sex as a number of node replace full-sib individuals
if (compact) {
# Finding the individuals with parents, but without progeny
sire_dam_label <- unique(c(ped_node$sirelabel,ped_node$damlabel))
sire_dam_label <- sire_dam_label[!is.na(sire_dam_label)]
ped_node_1 <- ped_node[!(label %in% sire_dam_label)]
# Moreover, finding full-sib individuals
familysize <- NULL
ped_node_1[,familysize:=.N,by=.(familylabel,sex)]
if (max(ped_node_1$familysize,na.rm=TRUE)>=2) {
# The full-sib individuals in a family will be compacted if the family size >= 2
fullsib_id_DT <- ped_node_1[familysize >=2]
fullsib_ids <- fullsib_id_DT$id
familylabelsex = NULL # due to NSE notes in R CMD check
fullsib_id_DT[,familylabelsex:=paste(familylabel,sex,sep="")]
# Generating a compact family dataset, only including maximum three individuals for
# each family: male, female and unknown sex individuals
fullsib_family_label_sex <- unique(fullsib_id_DT$familylabelsex)
compact_family <- fullsib_id_DT[match(fullsib_family_label_sex,familylabelsex)]
# The compact families' id are the number of individuals by family and sex.
compact_family[,":="(label=familysize,nodetype="compact")]
# Deleting full-sib individuals from families with 2 and more full-sib individuals
ped_node <- ped_node[!(id %in% fullsib_ids)]
ped_node <- rbind(ped_node,compact_family,fill=TRUE)
}
}
#=== Add virtual nodes between parents and progrenies================================
# Add id to familynum and familynum to sirenum and damnum as new virtual edges
ped_edge <-
rbind(ped_node[, .(from = id, to = familynum)],
ped_node[, .(from = familynum, to = sirenum)],
ped_node[, .(from = familynum, to = damnum)])
ped_edge <- ped_edge[!(to == 0)]
# Delete duplicated edges from familynum to parents
ped_edge <- unique(ped_edge)
ped_edge <- ped_edge[order(from, to)]
size = arrow.size = arrow.width = color = curved = NULL # due to NSE notes in R CMD check
# grey
#ped_edge[,":="(size=1,arrow.size=1,arrow.width=1,color="#9d96ad",curved=0.15)]
#ped_edge[,":="(size=1,arrow.size=1,arrow.width=1,color="#a69f89",curved=0.15)]
#ped_edge[,":="(size=1,arrow.size=1,arrow.width=1,color="#afa8be",curved=0.10)]
ped_edge[,":="(size=1,arrow.size=1,arrow.width=1,color="#333333",curved=0.10)]
# Add familynum as new virtual nodes
ped_node <-
rbind(ped_node, unique(ped_node[familynum > 0, .(
id = familynum,
familylabel,
label = familylabel,
sirenum,
damnum,
sirelabel,
damlabel,
gen,
familynum
)]), fill = TRUE)
ped_node[is.na(nodetype),nodetype:="virtual"]
layer = NULL # due to NSE notes in R CMD check
ped_node[nodetype %in% c("real","compact"),layer:=2*gen-1]
ped_node[nodetype %in% c("virtual"),layer:=2*(gen-1)]
#=== Set default shape, size and color for male and female===========================
# Setting the default attributes of nodes
# Notes: size = 15 means the width of a circle node account for 15% of the whole width
# of the graph
#ped_node[, ":="(shape = "circle", frame.color="#8495e8", color="#9daaea",size = 15)]
#ped_node[, ":="(shape = "circle", frame.color="black", color="#aaa16c",size = 15)]
shape = frame.color = color = size = label.color = NULL
ped_node[, ":="(shape = "circle", frame.color="#7fae59", color="#9cb383",size = 15, label.color="#0d0312")]
#ped_node[, ":="(shape = "circle", frame.color=NA, color="#9cb383",size = 15)]
ped_node[nodetype %in% c("compact"), ":="(shape="square")]
# Setting virtual size of nodes to 0.0001
ped_node[id > max_id,":="(shape="none",label="",size=0)]
# Setting male and female's color
ped_node[sex %in% c("male"), ":="(frame.color="#0e8dbb", color = "#119ecc")]
ped_node[sex %in% c("female"), ":="(frame.color="#e6a11f", color = "#f4b131")]
# The edge color is same with the color of the it's "to" node.
min_familynum <- min(family_num$familynum)
ped_edge <- merge(ped_edge,
ped_node[,.(id,tonodecolor=color)],
by.x="to", by.y="id",all.x=TRUE)
ped_edge[from >= min_familynum,":="(color=tonodecolor)]
ped_edge[from < min_familynum,":="(curved=0)]
# Sorting the "from" and "to" columns as the first two columns in the ped_edge
old_names <- colnames(ped_edge)
new_names <- c(c("from","to"),old_names[!(old_names %in% c("from","to"))])
ped_edge <- ped_edge[, ..new_names]
ped_edge <- ped_edge[order(from,to)]
# Sorting the "id" column as the first column in the ped_node
old_names <- colnames(ped_node)
new_names <- c("id",old_names[!(old_names %in% c("id"))])
ped_node <- ped_node[, ..new_names]
ped_node <- ped_node[order(layer,id)]
return(list(node = ped_node, edge = ped_edge))
}
`:=` = function(...) NULL
#' Repel the overlapping nodes in x axis.
#'
#' \code{repeloverlap} function converts repeated axis x positions to continuous positions.
#'
#' This function takes a vector including axis x positions with repeated values, converts the repeated values of one x position to continuous positions. For example, x = c(1.2,2.1,2.1,2.1,3.2,4.6,5.7), where 2.1 is a axis x position with two repeated values; bl <- (3.2-2.1)/3; transformed x = c(1.2,2.1,2.1+bl,2.1+2*bl,3.2,4.6,5.7)
#' @param x A vector including x positions with repeated values
#' @return A vector including x positions with unique values
#' @keywords internal
repeloverlap <- function(x) {
if (anyDuplicated(x)>0) {
x_dt <- as.data.table(x)
# The number of the x postions with duplicated values
x_dt_times <- x_dt[,.N,by=x]
# The number of the x postions with unique value
unique_elements <- sort(x_dt_times[,x])
unique_elements_num <- length(unique_elements)
x_dt_dup <- x_dt_times[N>1]
dup_num <- nrow(x_dt_dup)
y <- vector(mode = "list",length = dup_num)
for (i in 1:dup_num) {
rank_left <- which(unique_elements==x_dt_dup[i,x])
# For the last two x positions with repeated values
# For example, c(1,2,3,4,5,5,6,6)
if (i == (dup_num-1)) {
if ((rank_left == unique_elements_num-1) & (which(unique_elements==x_dt_dup[i+1,x]) == unique_elements_num)) {
rank_right <- rank_left+1
range <- unique_elements[rank_right]-unique_elements[rank_left]
break_num <- x_dt_dup[i,N]+x_dt_dup[i+1,N]-1
break_length <- range/break_num
y[[i]] <- c(x_dt_dup[i,x],x_dt_dup[i+1,x],unique_elements[rank_left]+break_length*seq(break_num-1))
break
}
}
break_num <- x_dt_dup[i,N]
# For the repeated values which are not in the end.
# For example, c(1,1,2,3,4,5,6)
if (rank_left < unique_elements_num) {
rank_right <- rank_left+1
}
# For the last one repeated value
# for example, c(1,1,2,3,4,5,6,6)
if (rank_left == unique_elements_num) {
rank_right <- rank_left
rank_left <- rank_left-1
}
range <- unique_elements[rank_right]-unique_elements[rank_left]
break_length <- range/break_num
y[[i]] <- c(x_dt_dup[i,x],unique_elements[rank_left]+break_length*seq(break_num-1))
}
return(sort(union(unique_elements, do.call("c",y))))
}
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.