In judgelord/netlit: Augment a Literature Review with Network Analysis Statistics

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, cache = TRUE)
library(igraph)
library(sjmisc)
library(ggraph)
library(knitr)
library(kableExtra)
library(netlit)

#load data for original coer networks
load(here::here("replication_data/IR_original_networks.RData"))

#load data for coder evaluations of other networks
load(here::here("replication_data/IR_evaluations.RData"))

#set seed
set.seed(5)

Network Creation

As a check on the validity of utilizing the netlit approach to creating literature reviews, we conducted an experiment in which five (junior) researchers independently coded the same set of articles and then compared the resulting networks. The researchers read fifteen articles, pulled from a section of James Vreeland's International Organizations syllabus. No researcher had prior knowledge or specialization in international organizations. We expect this exercise to be representative of researchers in a higher variance group given low correlation of the limited prior knowledge on the topic (and therefore a harder test). Each researcher followed the standard netlit data entry approach and coded the articles' relationships in a simple Excel spreadsheet containing three columns: from, to, and citations. Using the resulting spreadsheet, we then used the review() function from our netlit package to create the network diagrams for each researcher, which are shown below.

coder1_lit <- review(coder1_df)

coder1_graph <- coder1_lit$graph

researcher_1_network <- ggraph(coder1_graph, layout = "fr") +
  geom_node_point(size = 6, 
                  alpha = .75, 
                  colour = "lightblue") +
  geom_edge_arc2(start_cap = circle(3, "mm"),
                     end_cap = circle(3, "mm"),
                     colour = "tomato",
                     alpha = 0.5,
                 curvature = 0,
                 arrow = arrow(length = unit(2, "mm"),
                               type = "open")) +
  geom_node_text(aes(label = sjmisc::word_wrap(name, 15)), size = 2.5) +
  theme_void() +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5)) +
  labs(title = "Researcher 1 Network")

researcher_1_network

coder2_lit <- review(coder2_df)

coder2_graph <- coder2_lit$graph

researcher_2_network <- ggraph(coder2_graph, layout = "fr") +
  geom_node_point(size = 6, 
                  alpha = .75, 
                  colour = "lightblue") +
  geom_edge_arc2(start_cap = circle(3, "mm"),
                     end_cap = circle(3, "mm"),
                     colour = "tomato",
                     alpha = 0.5,
                 curvature = 0,
                 arrow = arrow(length = unit(2, "mm"),
                               type = "open")) +
  geom_node_text(aes(label = sjmisc::word_wrap(name, 15)), size = 2.5) +
  theme_void() +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5)) +
  labs(title = "Researcher 2 Network")

researcher_2_network

\ \ \

coder3_lit <- review(coder3_df)

coder3_graph <- coder3_lit$graph

researcher_3_network <- ggraph(coder3_graph, layout = "fr") +
  geom_node_point(size = 6, 
                  alpha = .75, 
                  colour = "lightblue") +
  geom_edge_arc2(start_cap = circle(3, "mm"),
                     end_cap = circle(3, "mm"),
                     colour = "tomato",
                     alpha = 0.5,
                 curvature = 0,
                 arrow = arrow(length = unit(2, "mm"),
                               type = "open")) +
  geom_node_text(aes(label = sjmisc::word_wrap(name, 15)), size = 2.5) +
  theme_void() +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5)) +
  labs(title = "Researcher 3 Network")

researcher_3_network

\ \ \

coder4_lit <- review(coder4_df)

coder4_graph <- coder4_lit$graph

researcher_4_network <- ggraph(coder4_graph, layout = "fr") +
  geom_node_point(size = 6, 
                  alpha = .75, 
                  colour = "lightblue") +
  geom_edge_arc2(start_cap = circle(3, "mm"),
                     end_cap = circle(3, "mm"),
                     colour = "tomato",
                     alpha = 0.5,
                 curvature = 0,
                 arrow = arrow(length = unit(2, "mm"),
                               type = "open")) +
  geom_node_text(aes(label = sjmisc::word_wrap(name, 15)), size = 2.5) +
  theme_void() +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5)) +
  labs(title = "Researcher 4 Network")

researcher_4_network

\ \ \

coder5_lit <- review(coder5_df)

coder5_graph <- coder5_lit$graph

researcher_5_network <- ggraph(coder5_graph, layout = "fr") +
  geom_node_point(size = 6, 
                  alpha = .75, 
                  colour = "lightblue") +
  geom_edge_arc2(start_cap = circle(3, "mm"),
                     end_cap = circle(3, "mm"),
                     colour = "tomato",
                     alpha = 0.5,
                 curvature = 0,
                 arrow = arrow(length = unit(2, "mm"),
                               type = "open")) +
  geom_node_text(aes(label = sjmisc::word_wrap(name, 15)), size = 2.5) +
  theme_void() +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5)) +
  labs(title = "Researcher 5 Network")

researcher_5_network

Network Similarity

After coding the relationships and creating the networks, cehck for network similarity using two approaches. We gave each coder a copy of every other coder's spreadsheet and asked them to assign a similarity score for each edge in the other coder's network. In essence, the coders asked themselves "does this conceptual relationship also exist in my network?" The similarity score could take three distinct values: 0 if the relationship does not exist in their own network, 1 if the relationship exists in their own network but with different labels, and 2 if the exact relationship exists in their own network. Using this data, we then created tables summarizing the network similarity, shown below. Diagonals are NA as coders did not evaluate their own network (all edges would appear in their own network). Off-diagonals represent the percentage of shared edges perceived by the row when examining the column's network. For example, using the strict evaluation criteria, Researcher 2 claims that 67% of the edges in Researcher 1's network also appear in their own network. Since naming conventions can easily differ across researchers in referencing the same concept, we consider overlap between two networks if the same concept occurs even if labeling might differ slightly (e.g. 1s and 2s are indicative of overlap, while zeroes are not). The main table presents these findings. (We also include a "strict" table that counts only 2s as relationships that exist in both coders' networks). As the main table shows, accounting for labeling differences greatly increases the network similarity. Well over half of all edges are shared between all pairs of researchers.

#lax results
#create df to store results
result_lax_df <- data.frame(row.names = c("coder1", "coder2", "coder3", "coder4", "coder5"),
                               coder1 = c(NA, NA, NA, NA, NA),
                               coder2 = c(NA, NA, NA, NA, NA),
                               coder3 = c(NA, NA, NA, NA, NA),
                               coder4 = c(NA, NA, NA, NA, NA),
                               coder5 = c(NA, NA, NA, NA, NA))

#loop through rows and cols
for (i in 1:nrow(result_lax_df)){
  for (j in 1:ncol(result_lax_df)){
    row_name <- rownames(result_lax_df[i,])
    col_name <- colnames(result_lax_df[j])

    if (row_name == col_name){
      next
    }

    df <- get(paste0(row_name, "_", col_name, "_df"))

    pct <- sum(df$`exists in my network` == 2 | df$`exists in my network` == 1)/nrow(df)

    result_lax_df[row_name, col_name] <- pct
  }
}


#reformat colnames and rownames
IDs <- c("Researcher 1", "Researcher 2", "Researcher 3", "Researcher 4", "Researcher 5")

colnames(result_lax_df) <- IDs
rownames(result_lax_df) <- IDs

kable(result_lax_df, 
      format = "html", 
      digits = 2,
      caption = "Perceived Similarity Across Researcher Findings (Main)") %>% 
  kable_styling(full_width = F,
                bootstrap_options = "striped")

#strict results
#create df to store results
result_strict_df <- data.frame(row.names = c("coder1", "coder2", "coder3", "coder4", "coder5"),
                        coder1 = c(NA, NA, NA, NA, NA),
                        coder2 = c(NA, NA, NA, NA, NA),
                        coder3 = c(NA, NA, NA, NA, NA),
                        coder4 = c(NA, NA, NA, NA, NA),
                        coder5 = c(NA, NA, NA, NA, NA))

#loop through rows and cols
for (i in 1:nrow(result_strict_df)){
  for (j in 1:ncol(result_strict_df)){
  row_name <- rownames(result_strict_df[i,])
  col_name <- colnames(result_strict_df[j])

  if (row_name == col_name){
    next
  }

  df <- get(paste0(row_name, "_", col_name, "_df"))

  pct <- sum(df$`exists in my network` == 2)/nrow(df)

  result_strict_df[row_name, col_name] <- pct
  }
}

#reformat colnames and rownames
colnames(result_strict_df) <- IDs
rownames(result_strict_df) <- IDs


kable(result_strict_df, 
      format = "html", 
      digits = 2,
      caption = "Perceived Similarity Across Researcher Findings (Strict)") %>% 
  kable_styling(full_width = F,
                bootstrap_options = "striped")

These quantitative results are supported by the qualitative feedback we elicited from the researchers. We asked the five researchers "if you were to separately use each of these networks to understand the current state of the literature – both what currently exists and what is missing – would you come away with similar conclusions?" and received positive responses from all five. For example, Researcher 3 stated that "after reading the 15 papers provided and then looking at everyone’s maps, the central causal relationships are present in all maps." Research 5 reported that "except for some minor details, everyone captured similar causal relationships as the main ones." This feedback increases our confidence that our method is replicable; starting from the same literature, five independent junior researchers with no prior knowledge on the topic arrived at similar conclusions regarding the state of the literature. In addition, these represented the modal type of researcher who might use our review method: junior researchers who are familiar with the discipline but are new to a specific topic.

judgelord/netlit documentation built on Jan. 3, 2023, 2:42 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Tweet to @rdrrHQ

GitHub issue tracker

ian@mutexlabs.com