inst/doc/visualizing-json.R

## ---- echo = FALSE, message = FALSE-------------------------------------------
knitr::opts_chunk$set(collapse = T, comment = "#>")
knitr::opts_chunk$set(fig.width = 7, fig.height = 5)
options(tibble.print_min = 4L, tibble.print_max = 4L)

## ---- message = FALSE---------------------------------------------------------
library(jsonlite)
library(dplyr)
library(purrr)
library(magrittr)
library(forcats)
library(ggplot2)
library(igraph)
library(RColorBrewer)
library(wordcloud)
library(viridis)
library(listviewer)
library(tidyjson)

set.seed(1)

## -----------------------------------------------------------------------------
co_length <- companies %>% json_complexity

## -----------------------------------------------------------------------------
co_length %>%
  ggplot(aes(complexity)) +
    geom_density() +
    scale_x_log10() +
    annotation_logticks(side = 'b')

## -----------------------------------------------------------------------------
co_examp_index <- which(co_length$complexity == 20L)[1]

co_examp <- companies[co_examp_index]

co_examp

## -----------------------------------------------------------------------------
co_examp %>% jsonedit(mode = "code")

## -----------------------------------------------------------------------------
co_struct <- companies %>% sample(5) %>% json_structure

print(co_struct)

## -----------------------------------------------------------------------------
co_names <- co_struct %>% 
  filter(type != "null" & !is.na(name)) %>%
  group_by(level, name, type) %>%
  summarize(ndoc = n_distinct(document.id))

co_names

## -----------------------------------------------------------------------------
co_names %$% wordcloud(name, ndoc, scale = c(1.5, .1), min.freq = 100)

## ---- fig.height = 9----------------------------------------------------------
co_names %>%
  ungroup %>%
  group_by(type) %>%
  arrange(desc(ndoc), level) %>%
  mutate(rank = 1:n()) %>%
  ggplot(aes(1, rank)) +
    geom_text(aes(label = name, color = ndoc)) +
    scale_y_reverse() +
    facet_grid(. ~ type) +
    theme_void() +
    theme(legend.position = "bottom") +
    scale_color_viridis(direction = -1)

## -----------------------------------------------------------------------------
# Plots an igraph visualization of a JSON document
#
# @param .x a JSON string or tbl_json object
# @param legend add a type color legend automatically
# @param vertex.size the size of the vertices
# @param edge.color the color for the edges
# @param edge.width the width of the edge lines
# @param show.labels should object names be shown
# @param plot should the plot be rendered?
# @param ... further arguments to igraph::plot.igraph
plot_json_graph <- function(.x, legend = TRUE, vertex.size = 6,
                            edge.color = 'grey70', edge.width = .5,
                            show.labels = TRUE, plot = TRUE,
                            ...) {

  if (!is.tbl_json(.x)) .x <- as.tbl_json(.x)

  if (nrow(.x) != 1) stop("nrow(.x) not equal to 1")

  structure <- .x %>% json_structure

  type_colors <- RColorBrewer::brewer.pal(6, "Accent")

  graph_edges <- structure %>%
    filter(!is.na(parent.id)) %>%
    select(parent.id, child.id)

  graph_vertices <- structure %>%
    transmute(child.id,
              vertex.color = type_colors[as.integer(type)],
              vertex.label = name)

  if (!show.labels)
    graph_vertices$vertex.label <- rep(NA_character_, nrow(graph_vertices))

  g <- igraph::graph_from_data_frame(graph_edges, vertices = graph_vertices,
                             directed = FALSE)

  if (plot) {
    op <- par(mar = c(0, 0, 0, 0))
    plt <- igraph::plot.igraph(g,
         vertex.color = igraph::V(g)$vertex.color,
         vertex.size  = vertex.size,
         vertex.label = igraph::V(g)$vertex.label,
         vertex.frame.color = NA,
         layout = layout_with_kk,
         edge.color = edge.color,
         edge.width = edge.width,
         ...)

    if (legend)
      legend(x = -1.3, y = -.6, levels(structure$type), pch = 21,
             col= "white", pt.bg = type_colors,
             pt.cex = 2, cex = .8, bty = "n", ncol = 1)

    par(op)
  }

  invisible(g)

}

## -----------------------------------------------------------------------------
'{"object" : {"name": 1},
  "array"  : ["a", "b"],
  "string" : "value", 
  "number" : 1, 
  "logical": true,
  "null"   : null}' %>% 
  plot_json_graph

## -----------------------------------------------------------------------------
co_examp %>% plot_json_graph

## ---- fig.height = 8----------------------------------------------------------
plot_json_graph_panel <- function(json, nrow, ncol, ...) {
  
  # Set up grid
  op <- par(mfrow = c(nrow, ncol))
  
  indices <- seq_along(json) %>% keep(`<=`, nrow * ncol)
  
  for (i in indices) {
    plot_json_graph(json[[i]], ...)
    if ("names" %in% names(attributes(json))) 
      title(names(json)[i], col.main = 'red')
  }
  
  par(op)
  invisible(NULL)
}

## ---- fig.height = 8----------------------------------------------------------
plot_json_graph_panel(companies %>% sample(5), 7, 6, legend = FALSE, show.labels = FALSE,
                      vertex.size = 4)

## -----------------------------------------------------------------------------
most_complex <- companies[which(co_length$complexity == max(co_length$complexity))]

most_complex_name <- most_complex %>% 
  spread_values(name = jstring(name)) %>% 
  extract2("name")

## -----------------------------------------------------------------------------
plot_json_graph(most_complex, show.labels = FALSE, vertex.size = 2)

## -----------------------------------------------------------------------------
most_complex %>% json_schema %>% jsonedit(mode = "code")

## -----------------------------------------------------------------------------
most_complex %>% json_schema(type = "value") %>% plot_json_graph

## -----------------------------------------------------------------------------
most_complex %>% gather_object %>% json_types %>% json_complexity %>%
  filter(type %in% c('array', 'object') & complexity >= 15) %>%
  split(.$name) %>%
  map(json_schema, type = "value") %>%
  plot_json_graph_panel(3, 3, legend = FALSE)

## -----------------------------------------------------------------------------
rounds <- companies %>%
  enter_object(funding_rounds) %>%
  gather_array %>%
  spread_values(
    round = jstring(round_code),
    currency = jstring(raised_currency_code),
    raised = jnumber(raised_amount)
  )
rounds %>% head

## -----------------------------------------------------------------------------
geos <- companies %>%
  enter_object(offices) %>%
  gather_array %>%
  spread_values(
    country = jstring(country_code),
    state = jstring(state_code),
    description = jstring(description)
  )
geos %>% head

## -----------------------------------------------------------------------------
hqs <- geos %>%
  filter(array.index == 1) %>%
  filter(country == "USA") %>%
  select(document.id, state)
  
rounds_usd <- rounds %>%
  filter(currency == "USD") %>%
  filter(!is.na(raised)) %>%
  select(document.id, round, raised)

rounds_by_geo <- inner_join(rounds_usd, hqs, by = "document.id") %>% as_tibble()

## -----------------------------------------------------------------------------
round_prep <- rounds_by_geo %>% 
  dplyr::filter(!is.na(state)) %>%
  mutate(
    round = round %>% forcats::fct_collapse(
      "angel" = c("seed", "angel"),
      "d-f"   = c("d", "e", "f"),
      "other" = c("grant", "partial", "post_ipo_equity", "private_equity", 
                  "debt_round", "unattributed")
    ) %>% forcats::fct_relevel("angel", "a", "b", "c", "d-f", "other")
  ) %>%
  mutate(
    state = state %>% forcats::fct_lump(2)
  ) 

g <- ggplot(round_prep, aes(state, raised, fill = state)) +
  geom_violin() +
  scale_y_log10() + 
  annotation_logticks(side = 'l') +
  facet_grid(. ~ round) +
  theme(legend.position = "bottom") +
  labs(x = "", y = "Amount Raised (USD)")

g

Try the tidyjson package in your browser

Any scripts or data that you put into this service are public.

tidyjson documentation built on Jan. 7, 2023, 1:14 a.m.