#' citation_network_plot
#'
#' \code{citation_network_plot} generates plots of citation networks, depicting works as dots ("nodes", in network parlance) and citations as arrows ("edges") from cited works to citing works.
#'
#' @param citations A dataframe representing classified citations such as that generated by \code{\link[classify_citations()]{classify_citations}}.
#' @param cited_only A dataframe representing the works that are only cited by---not citing---the other works in \code{citations}. This need only be specified if multiple classifications are employed (e.g., "agree" and "disagree") and not all of the only-cited works fall into the first classification. Otherwise, this information will be automatically generated from \code{citations}.
#' @param time_axis Set to "y" to specify that time should start from the bottom of the plot or "x", the default, to specify that time flows from left to right.
#' @param color_plot Should multiple classifications be denoted by different colors? If not, they will indicated by different shapes.
#' @param custom_plot Should the function return a bare plot suitable for customization using \code{\link[ggnetwork]{ggnetwork}} functions?
#' @param arrow_gap A parameter that will shorten the arrows that link works in order to avoid overplotting the dots that represent the works; larger values may be desirable if \code{custom_plot} is set to TRUE and the size of the dots will be increased using \code{\link[ggnetwork]{ggnetwork}}'s \code{\link[geom_nodes]{geom_nodes}} function.
#'
#' @details
#' The `citation_network_plot` function
#'
#' @return A ggplot object
#'
#' @examples
#'
#' @importFrom dplyr "%>%" select mutate filter bind_rows
#' @importFrom stringr str_extract
#' @importFrom network network "%v%<-" "%v%" get.vertex.attribute
#' @importFrom sna as.sociomatrix.sna symmetrize
#' @importFrom purrr map_dbl
#' @importFrom tibble tibble
#' @import ggnetwork
#' @import ggplot2
#'
#' @export
citation_network_plot <- function(citations,
cited_only,
stages,
root_source,
label_only_cited = FALSE,
color_plot = TRUE,
custom_plot = FALSE,
arrow_gap = .015,
time_axis = "x",
seed = 324) {
if (missing(cited_only)) {
cited_only <- tibble(citing = citations$cited[!citations$cited %in% citations$citing]) %>%
mutate(year = str_extract(citing, "\\d{4}") %>%
as.numeric(),
classification = 1) %>%
distinct()
}
if (!("citing" %in% names(citations))) {
citations <- citations %>%
mutate(citing_author = if_else(str_detect(author, ","),
str_replace(author, "(^[^,]*).*", "\\1"),
author),
citing = paste(citing_author, date)) %>%
select(cited, citing, date, classification)
}
if (!("classification" %in% names(citations))) {
citations$classification <- 1
}
cited_works <- citations %>%
group_by(citing) %>%
summarize(classification = max(classification),
year = as.numeric(min(date))) %>%
ungroup() %>%
bind_rows(cited_only) %>%
mutate(only_citing = !(citing %in% citations$cited))
cite_net <- network::network(citations %>%
select(-date), matrix.type = "edgelist")
cite_net %v% "year" <- cited_works[order(match(cited_works[["citing"]],
cite_net %v% "vertex.names")), ] %>%
pull(year)
cite_net %v% "classification" <- cited_works[order(match(cited_works[["citing"]],
cite_net %v% "vertex.names")), ] %>%
pull(classification)
cite_net %v% "only_citing" <- cited_works[order(match(cited_works[["citing"]],
cite_net %v% "vertex.names")), ] %>%
pull(only_citing)
if (!missing(stages)) {
stages <- tibble(year = seq(min(cited_works$year), max(cited_works$year))) %>%
mutate(stage_breaks = year %in% stages,
stage = cumsum(stage_breaks))
cited_works <- cited_works %>%
left_join(stages, by = "year")
} else {
cited_works$stage <- 1
}
cite_net %v% "stage" <- cited_works[order(match(cited_works[["citing"]],
cite_net %v% "vertex.names")), ] %>%
pull(stage)
if (missing(root_source)) {
layout <- layout_cite(cite_net, seed = seed)
} else {
layout <- layout_cite(cite_net, seed = seed, root_source = root_source)
}
cite_network <- format_network(cite_net, layout = layout, arrow.gap = arrow_gap, by = "stage")
if (label_only_cited == TRUE) {
cite_network <- cite_network %>%
mutate(vertex.names = if_else(!only_citing, vertex.names, NA_character_))
}
if (!custom_plot) {
if (color_plot) {
network_plot <- ggplot(cite_network,
aes(x, y, xend = xend, yend = yend)) +
geom_edges(color = "grey85",
arrow = arrow(length = unit(5, "pt")),
curvature = 0.05) +
geom_nodes(size = 6, aes(color = as.factor(classification))) +
geom_nodetext(aes(label = vertex.names)) +
# theme_blank() +
theme(legend.background = element_blank(),
legend.key = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
strip.background = element_blank(),
plot.background = element_blank(),
axis.line = element_blank(),
panel.grid = element_blank()) +
scale_color_manual(values = c("1" = "grey75", "2" = "red")) +
theme(legend.position="none")
} else {
network_plot <- ggplot(cite_network,
aes(x, y, xend = xend, yend = yend)) +
geom_edges(color = "grey85",
arrow = arrow(length = unit(5, "pt")),
curvature = 0.05) +
geom_nodes(size = 6, aes(shape = as.factor(classification))) +
geom_nodetext(aes(label = vertex.names)) +
theme_blank() +
scale_shape_manual(values = c(16, 21)) +
theme(legend.position="none")
}
} else {
network_plot <- ggplot(cite_network,
aes(x, y, xend = xend, yend = yend))
}
if (time_axis == "x") {
network_plot <- network_plot +
coord_flip() +
theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.x = element_blank()) +
scale_y_continuous(breaks = pretty(layout$y, n = 5)) +
expand_limits(y = c(min(pretty(layout$y, n = 5)), max(pretty(layout$y, n = 5))))
} else {
network_plot <- network_plot #+
# theme(axis.title.x = element_blank(),
# axis.text.x = element_blank(),
# axis.ticks.x = element_blank(),
# axis.line.y = element_line(size = .5),
# axis.title.y = element_blank())
}
return(network_plot)
}
layout_cite <- function(d, seed, trials = 100, root_source) {
set.seed(seed)
if (!missing(root_source)) {
root <- which(unlist(lapply(d$val, `[`, c('vertex.names')))==root_source)
}
d_mat <- sna::as.sociomatrix.sna(d)
if (is.list(d_mat)) {
d_mat <- d_mat[[1]]
}
n <- nrow(d_mat)
y_real <- get.vertex.attribute(d, "year")
y <- get.vertex.attribute(d, "year") %>% jitter(amount = 1)
y_range <- max(y_real) - min(y_real)
n_y <- tibble(y = y_real) %>%
group_by(y) %>%
mutate(n_y = n()) %>%
ungroup() %>%
last()
old_crit <- 0
for (i in 1:trials) {
a_dx <- rep(0, n)
r_dx <- rep(0, n)
x <- purrr::map_dbl(n_y, function(x) {
seq(from = -y_range, to = y_range, length.out = x + 5) %>%
sample(size = 1) %>%
jitter(amount = y_range/50)
})
if (!missing(root_source)) {
x[root] <- 0
}
x_old <- x
ds <- sna::symmetrize(d_mat, "weak")
dis <- as.matrix(dist(cbind(x, y)))
for (j in 1:25) {
x_old <- x
theta <- acos(t(outer(x, x, "-"))/dis) * sign(t(outer(y, y, "-")))
# attraction, on non-time dimension, between nodes with ties
a_dx <- apply((ds * cos(theta) * dis)/50,
1, sum, na.rm = TRUE)
x <- (x + a_dx) %>%
pmin(., y_range) %>%
pmax(., -y_range)
dis <- as.matrix(dist(cbind(x, y)))
theta <- acos(t(outer(x, x, "-"))/dis) * sign(t(outer(y, y, "-")))
# attraction, on time dimension, between nodes and actual years
y <- (y_real + y)/2
# repulsion between close nodes
r_dx <- apply(cos(theta) * y_range/dis^2,
1, sum, na.rm = TRUE) %>%
pmin(., y_range*2) %>%
pmax(., -y_range*2)
r_dy <- apply(sin(theta) * y_range/dis^2,
1, sum, na.rm = TRUE) %>%
pmin(., y_range/100) %>%
pmax(., -y_range/100)
x <- x - r_dx
y <- y - r_dy
if (!missing(root_source)) {
x[root] <- mean(x)
}
dis <- as.matrix(dist(cbind(x, y)))
}
crit <- min(dis[dis!=0])
if (crit > old_crit) {
old_crit <- crit
pos <- cbind(x, y)
}
}
pos <- as_tibble(pos)
return(pos)
}
format_network <- function(model,
layout,
weights = NULL,
by = by,
...) {
x = model
nodes = layout
#
# # rescale arrow_gap
# # arrow.gap <- arrow.gap * diff(range(nodes$y))
# nodes$x = scale(nodes$x, center = min(nodes$x), scale = diff(range(nodes$x))) %>% as.vector()
# nodes$y = scale(nodes$y, center = min(nodes$y), scale = diff(range(nodes$y))) %>% as.vector()
#
# import vertex attributes
for (y in network::list.vertex.attributes(x)) {
nodes = cbind(nodes, network::get.vertex.attribute(x, y))
names(nodes)[ncol(nodes)] = y
}
# edge list
edges = network::as.matrix.network.edgelist(x, attrname = weights)
# edge list (if there are duplicated rows)
if (nrow(edges[, 1:2]) > nrow(unique(edges[, 1:2]))) {
warning("duplicated edges detected")
}
edges = data.frame(nodes[edges[, 1], 1:2], nodes[edges[, 2], 1:2])
names(edges) = c("x", "y", "xend", "yend")
# # arrow gap (thanks to @heike and @ethen8181 for their work on this issue)
# if (arrow.gap > 0) {
# x.length = with(edges, xend - x)
# y.length = with(edges, yend - y)
# arrow.gap = with(edges, arrow.gap / sqrt(x.length ^ 2 + y.length ^ 2))
# edges = transform(
# edges,
# xend = x + (1 - arrow.gap) * x.length,
# yend = y + (1 - arrow.gap) * y.length
# )
# }
# import edge attributes
# for (y in network::list.edge.attributes(x)) {
# edges = cbind(edges, network::get.edge.attribute(x, y))
# names(edges)[ncol(edges)] = y
# }
# merge edges and nodes data
edges = left_join(edges,
nodes,
by = c("x", "y"))
# add missing columns to nodes data
nodes$xend = nodes$x
nodes$yend = nodes$y
# # make nodes data of identical dimensions to edges data
# for (y in names(edges)[(1 + ncol(nodes)):ncol(edges)]) {
# nodes = cbind(nodes, NA)
# names(nodes)[ncol(nodes)] = y
# }
# panelize nodes (for temporal networks)
if (!is.null(by)) {
nodes = lapply(sort(unique(edges[, by ])), function(x) {
y = nodes
y[, by ] = x
y
})
nodes = do.call(rbind, nodes)
}
network <- bind_rows(nodes, edges %>%
filter(!is.na(xend))) %>%
distinct()
return(network)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.