R/citation_network_plot.R

Defines functions format_network layout_cite citation_network_plot

Documented in citation_network_plot

#' citation_network_plot
#'
#' \code{citation_network_plot} generates plots of citation networks, depicting works as dots ("nodes", in network parlance) and citations as arrows ("edges") from cited works to citing works.
#' 
#' @param citations A dataframe representing classified citations such as that generated by \code{\link[classify_citations()]{classify_citations}}.
#' @param cited_only A dataframe representing the works that are only cited by---not citing---the other works in \code{citations}.  This need only be specified if multiple classifications are employed (e.g., "agree" and "disagree") and not all of the only-cited works fall into the first classification.  Otherwise, this information will be automatically generated from \code{citations}.
#' @param time_axis Set to "y" to specify that time should start from the bottom of the plot or "x", the default, to specify that time flows from left to right.
#' @param color_plot Should multiple classifications be denoted by different colors?  If not, they will indicated by different shapes.
#' @param custom_plot Should the function return a bare plot suitable for customization using \code{\link[ggnetwork]{ggnetwork}} functions?
#' @param arrow_gap A parameter that will shorten the arrows that link works in order to avoid overplotting the dots that represent the works; larger values may be desirable if \code{custom_plot} is set to TRUE and the size of the dots will be increased using \code{\link[ggnetwork]{ggnetwork}}'s \code{\link[geom_nodes]{geom_nodes}} function.
#' 
#' @details
#' The `citation_network_plot` function 
#' 
#' @return A ggplot object
#'
#' @examples
#' 
#' @importFrom dplyr "%>%" select mutate filter bind_rows
#' @importFrom stringr str_extract
#' @importFrom network network "%v%<-" "%v%" get.vertex.attribute
#' @importFrom sna as.sociomatrix.sna symmetrize
#' @importFrom purrr map_dbl
#' @importFrom tibble tibble
#' @import ggnetwork
#' @import ggplot2
#'
#' @export

citation_network_plot <- function(citations, 
                                  cited_only, 
                                  stages,
                                  root_source,
                                  label_only_cited = FALSE,
                                  color_plot = TRUE, 
                                  custom_plot = FALSE, 
                                  arrow_gap = .015, 
                                  time_axis = "x",
                                  seed = 324) {
    
    if (missing(cited_only)) {
        cited_only <- tibble(citing = citations$cited[!citations$cited %in% citations$citing]) %>% 
            mutate(year = str_extract(citing, "\\d{4}") %>% 
                       as.numeric(),
                   classification = 1) %>% 
            distinct()
    }
    
    if (!("citing" %in% names(citations))) {
        citations <- citations %>% 
        mutate(citing_author = if_else(str_detect(author, ","), 
                                       str_replace(author, "(^[^,]*).*", "\\1"),
                                       author),
               citing = paste(citing_author, date)) %>% 
            select(cited, citing, date, classification)
    } 
    
    if (!("classification" %in% names(citations))) {
        citations$classification <- 1
    }

    cited_works <- citations %>% 
        group_by(citing) %>% 
        summarize(classification = max(classification),
                  year = as.numeric(min(date))) %>% 
        ungroup() %>% 
        bind_rows(cited_only) %>% 
        mutate(only_citing = !(citing %in% citations$cited))
    
    cite_net <- network::network(citations %>% 
                                     select(-date), matrix.type = "edgelist")
    cite_net %v% "year" <- cited_works[order(match(cited_works[["citing"]],
                                                   cite_net %v% "vertex.names")), ] %>% 
        pull(year)
    cite_net %v% "classification" <- cited_works[order(match(cited_works[["citing"]],
                                                             cite_net %v% "vertex.names")), ] %>% 
        pull(classification)
    cite_net %v% "only_citing" <- cited_works[order(match(cited_works[["citing"]],
                                                          cite_net %v% "vertex.names")), ] %>% 
        pull(only_citing)
    
    if (!missing(stages)) {
        stages <- tibble(year = seq(min(cited_works$year), max(cited_works$year))) %>% 
                             mutate(stage_breaks = year %in% stages,
                                    stage = cumsum(stage_breaks))
        cited_works <- cited_works %>%
            left_join(stages, by = "year")
    } else {
        cited_works$stage <- 1
    }
    cite_net %v% "stage" <- cited_works[order(match(cited_works[["citing"]],
                                                    cite_net %v% "vertex.names")), ] %>% 
        pull(stage)
    
    if (missing(root_source)) {
        layout <- layout_cite(cite_net, seed = seed)
    } else {
        layout <- layout_cite(cite_net, seed = seed, root_source = root_source)
    }
    
    cite_network <- format_network(cite_net, layout = layout, arrow.gap = arrow_gap, by = "stage")
    
    if (label_only_cited == TRUE) {
        cite_network <- cite_network %>% 
            mutate(vertex.names = if_else(!only_citing, vertex.names, NA_character_))
    }
    
    if (!custom_plot) {
        if (color_plot) {
            network_plot <- ggplot(cite_network,
                                   aes(x, y, xend = xend, yend = yend)) +
                geom_edges(color = "grey85",
                           arrow = arrow(length = unit(5, "pt")),
                           curvature = 0.05) +
                geom_nodes(size = 6, aes(color = as.factor(classification))) +
                geom_nodetext(aes(label = vertex.names)) +
                # theme_blank() + 
                theme(legend.background = element_blank(),
                      legend.key = element_blank(),
                      panel.background = element_blank(),
                      panel.border = element_blank(),
                      strip.background = element_blank(),
                      plot.background = element_blank(),
                      axis.line = element_blank(),
                      panel.grid = element_blank()) +
                scale_color_manual(values = c("1" = "grey75", "2" = "red")) +
                theme(legend.position="none") 
        } else {
            network_plot <- ggplot(cite_network,
                                   aes(x, y, xend = xend, yend = yend)) +
                geom_edges(color = "grey85",
                           arrow = arrow(length = unit(5, "pt")),
                           curvature = 0.05) +
                geom_nodes(size = 6, aes(shape = as.factor(classification))) +
                geom_nodetext(aes(label = vertex.names)) +
                theme_blank() + 
                scale_shape_manual(values = c(16, 21)) +
                theme(legend.position="none")
        }
    } else {
        network_plot <- ggplot(cite_network,
                               aes(x, y, xend = xend, yend = yend))
    }
    
    if (time_axis == "x") {
        network_plot <- network_plot + 
            coord_flip() +
            theme(
                axis.title.y = element_blank(),
                axis.text.y = element_blank(),
                axis.ticks.y = element_blank(),
                axis.title.x = element_blank()) +
            scale_y_continuous(breaks = pretty(layout$y, n = 5)) +
            expand_limits(y = c(min(pretty(layout$y, n = 5)), max(pretty(layout$y, n = 5))))
    } else {
        network_plot <- network_plot #+
            # theme(axis.title.x = element_blank(),
            #       axis.text.x = element_blank(),
            #       axis.ticks.x = element_blank(),
                 # axis.line.y = element_line(size = .5),
                  # axis.title.y = element_blank())
    }
    
    return(network_plot)  
}


layout_cite <- function(d, seed, trials = 100, root_source) {
    set.seed(seed)
    
    if (!missing(root_source)) {
        root <- which(unlist(lapply(d$val, `[`, c('vertex.names')))==root_source)
    }
    
    d_mat <- sna::as.sociomatrix.sna(d)
    if (is.list(d_mat)) {
        d_mat <- d_mat[[1]]
    }
    
    n <- nrow(d_mat)
    y_real <- get.vertex.attribute(d, "year")
    y <- get.vertex.attribute(d, "year") %>% jitter(amount = 1)
    y_range <- max(y_real) - min(y_real)
    n_y <- tibble(y = y_real) %>%
        group_by(y) %>% 
        mutate(n_y = n()) %>% 
        ungroup() %>% 
        last()
    old_crit <- 0
    
    for (i in 1:trials) {
        a_dx <- rep(0, n)
        r_dx <- rep(0, n)
        x <- purrr::map_dbl(n_y, function(x) {
            seq(from = -y_range, to = y_range, length.out = x + 5) %>% 
                sample(size = 1) %>% 
                jitter(amount = y_range/50)
        })
        if (!missing(root_source)) {
            x[root] <- 0
        }
        x_old <- x
        
        ds <- sna::symmetrize(d_mat, "weak")
        dis <- as.matrix(dist(cbind(x, y)))
        
        for (j in 1:25) {
            x_old <- x
            theta <- acos(t(outer(x, x, "-"))/dis) * sign(t(outer(y, y, "-")))
            
            # attraction, on non-time dimension, between nodes with ties
            a_dx <- apply((ds * cos(theta) * dis)/50, 
                          1, sum, na.rm = TRUE)
            
            x <- (x + a_dx) %>% 
                pmin(., y_range) %>% 
                pmax(., -y_range)
            dis <- as.matrix(dist(cbind(x, y)))
            theta <- acos(t(outer(x, x, "-"))/dis) * sign(t(outer(y, y, "-")))
            
            # attraction, on time dimension, between nodes and actual years
            y <- (y_real + y)/2
            
            # repulsion between close nodes
            r_dx <- apply(cos(theta) * y_range/dis^2, 
                          1, sum, na.rm = TRUE) %>% 
                pmin(., y_range*2) %>% 
                pmax(., -y_range*2)
            r_dy <- apply(sin(theta) * y_range/dis^2, 
                          1, sum, na.rm = TRUE) %>% 
                pmin(., y_range/100) %>% 
                pmax(., -y_range/100)
            
            x <- x - r_dx 
            y <- y - r_dy
            
            if (!missing(root_source)) {
                x[root] <- mean(x)
            }
            dis <- as.matrix(dist(cbind(x, y)))
        }
        
        crit <- min(dis[dis!=0])
        if (crit > old_crit) {
            old_crit <- crit
            pos <- cbind(x, y)
        }
    }
    pos <- as_tibble(pos)
    
    return(pos)
}

format_network <- function(model, 
                           layout,
                           weights = NULL,
                           by = by,
                           ...) {
    x = model
    
    nodes = layout
    # 
    # # rescale arrow_gap
    # # arrow.gap <- arrow.gap * diff(range(nodes$y))
    # nodes$x = scale(nodes$x, center = min(nodes$x), scale = diff(range(nodes$x))) %>% as.vector()
    # nodes$y = scale(nodes$y, center = min(nodes$y), scale = diff(range(nodes$y))) %>% as.vector()
    # 
    # import vertex attributes
    for (y in network::list.vertex.attributes(x)) {
        nodes = cbind(nodes, network::get.vertex.attribute(x, y))
        names(nodes)[ncol(nodes)] = y
    }
    
    # edge list
    edges = network::as.matrix.network.edgelist(x, attrname = weights)
    
    # edge list (if there are duplicated rows)
    if (nrow(edges[, 1:2]) > nrow(unique(edges[, 1:2]))) {
        warning("duplicated edges detected")
    }
    
    edges = data.frame(nodes[edges[, 1], 1:2], nodes[edges[, 2], 1:2])
    names(edges) = c("x", "y", "xend", "yend")
    
    # # arrow gap (thanks to @heike and @ethen8181 for their work on this issue)
    # if (arrow.gap > 0) {
    #     x.length = with(edges, xend - x)
    #     y.length = with(edges, yend - y)
    #     arrow.gap = with(edges, arrow.gap / sqrt(x.length ^ 2 + y.length ^ 2))
    #     edges = transform(
    #         edges,
    #         xend = x + (1 - arrow.gap) * x.length,
    #         yend = y + (1 - arrow.gap) * y.length
    #     )
    # }
    
    # import edge attributes
    # for (y in network::list.edge.attributes(x)) {
    #     edges = cbind(edges, network::get.edge.attribute(x, y))
    #     names(edges)[ncol(edges)] = y
    # }
    
    # merge edges and nodes data
    edges = left_join(edges, 
                      nodes,
                      by = c("x", "y"))
    
    # add missing columns to nodes data
    nodes$xend = nodes$x
    nodes$yend = nodes$y

    # # make nodes data of identical dimensions to edges data
    # for (y in names(edges)[(1 + ncol(nodes)):ncol(edges)]) {
    #     nodes = cbind(nodes, NA)
    #     names(nodes)[ncol(nodes)] = y
    # }
    
    # panelize nodes (for temporal networks)
    if (!is.null(by)) {
        nodes = lapply(sort(unique(edges[, by ])), function(x) {
            y = nodes
            y[, by ] = x
            y
        })
        nodes = do.call(rbind, nodes)
    }
    
    network <- bind_rows(nodes, edges %>%
                             filter(!is.na(xend))) %>% 
        distinct()
    
    return(network)
}
mariolaespinosa/historicalnetworks documentation built on Feb. 9, 2022, 12:31 p.m.