library(flexdashboard)
library(viridis)
library(plotly)
library(shiny)
library(scales)
library(DT)
library(writexl)
library(echarts4r)
library(hoad)
library(forcats)
library(dplyr)
library(stringr)
# access to data
hybrid_df <- readr::read_csv(file = path_extdat("hybrid_publications.csv")) %>%
  mutate(license = fct_infreq(license)) %>%
  mutate(publisher = fct_infreq(publisher)) %>%
  mutate(year = factor(issued, levels = c("2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020"))) %>%
  mutate(hybrid_type = ifelse(hybrid_type == "SCOAP", "SCOAP<sup>3</sup>", hybrid_type)) %>%
  mutate(hybrid_type = factor(hybrid_type, levels = c("Open APC (TA)", "Open APC (Hybrid)", "SCOAP<sup>3</sup>"))) %>%
  mutate(domain = paste(domain, suffix, sep = ".")) %>%
  arrange(desc(yearly_publisher_volume))
unpaywall_df <- readr::read_csv(file = path_extdat("unpaywall_df.csv")) %>%
  mutate(year = factor(year, levels = c("2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020")))

Overview

Inputs {.sidebar}

# from https://stackoverflow.com/questions/42148177/how-can-i-build-multiple-inputs-into-my-shiny-app-with-both-updating-and-multipl
# TODO factor this out
selectizeInput(
  inputId = "publishers",
  label = "Selected Publishers",
  choices = c(All = "", levels(hybrid_df$publisher)),
  multiple = TRUE,
)
selectizeInput(
  inputId = "journals",
  label = "Selected Journals",
  choices = c(All = "", sort(unique(hybrid_df$journal_title))),
  multiple = TRUE
)

# publisher filter
jn_filtered <- reactive({
  if (is.null(input$publishers)) {
    return(hybrid_df)
  } else {
    return(hybrid_df[hybrid_df$publisher %in% input$publishers, ])
  }
})

observe(updateSelectizeInput(
  session,
  "journals",
  choices = c(All = "", sort(unique(jn_filtered()$journal_title)))
))

# journal_title filter
jn_f <- reactive({
  if (is.null(input$journals)) {
    return(jn_filtered())
  } else {
    return(jn_filtered()[jn_filtered()$journal_title %in% input$journals, ])
  }
})

# prepare dataset for plotting
hybrid_sub <- reactive({
  if (length(unique(jn_f()$publisher)) > 1) {
    hybrid_sub <- jn_f() %>%
      group_by(year, yearly_all, license) %>%
      count() %>%
      mutate(prop = n / yearly_all)
  } else if (length(unique(jn_f()$journal_title)) > 1 &&
    length(unique(jn_f()$publisher)) == 1) {
    hybrid_sub <- jn_f() %>%
      group_by(year, license, yearly_publisher_volume) %>%
      count() %>%
      mutate(prop = n / yearly_publisher_volume)
  } else {
    hybrid_sub <- jn_f() %>%
      group_by(year, license, yearly_jn_volume) %>%
      count() %>%
      mutate(prop = n / yearly_jn_volume)
  }
})
# using unpaywall indicators
# unpaywall <- reactive({
#   hybrid_df %>%
#     filter(journal_title %in% jn_f()$journal_title) %>%
#     group_by(year, journal_title, publisher, jn_y_unpaywall_others) %>%
#     summarise(n = n_distinct(doi_oa)) %>%
#     gather(n, jn_y_unpaywall_others, key = "source", value = "articles") %>%
#     ungroup() %>%
#     group_by(year, source) %>%
#     summarise(articles = sum(articles, na.rm = TRUE)) %>%
#     mutate(
#       source = ifelse(
#         source == "n",
#         "Crossref immediate license",
#         "Other license information\n(Unpaywall)"
#       )
#     )
# })

Notice that only those hybrid open access journals were included where academic institutions sponsored the open access publication according to the Open APC initiative and where publishers shared license information about immediate open access with Crossref.

Row

Publishers selected

renderValueBox({
  publisher_n <- length(unique(jn_f()$publisher))
  valueBox(publisher_n, icon = "fa-filter")
})

Journals selected

renderValueBox({
  journal_n <- length(unique(jn_f()$journal_title))
  valueBox(format(journal_n, big.mark = " ", scientific = FALSE),
    icon = "fa-filter"
  )
})

Hybrid OA articles indexed in Crossref

renderValueBox({
  hybrid_n <- nrow(jn_f())
  valueBox(format(hybrid_n, big.mark = " ", scientific = FALSE),
    icon = "fa-creative-commons"
  )
})

Column {.tabset .tabset-fade}

Hybrid OA licenses found (relative)

renderPlotly({
  p <- ggplot(hybrid_sub(), aes(year, prop, fill = license)) +
    xlab("Year") +
    ylab("Hybrid OA / Articles published") +
    scale_fill_manual(
      "License",
      values = colors_license
    ) +
    scale_x_discrete(drop = FALSE) +
    scale_y_continuous(labels = scales::percent) +
    theme_minimal() +
    geom_col(position = position_stack(reverse = TRUE))
  plotly::ggplotly(p)
})

Hybrid OA licenses found (absolute)

renderPlotly({
  p <- ggplot(hybrid_sub(), aes(year, n, fill = license)) +
    xlab("Year") +
    ylab("Hybrid OA Articles") +
    scale_fill_manual(
      "License",
      values = colors_license
    ) +
    scale_x_discrete(drop = FALSE) +
    scale_y_continuous(
      labels = function(x) {
        format(x, big.mark = " ", scientific = FALSE)
      }
    ) +
    theme_minimal() +
    geom_col(position = position_stack(reverse = TRUE))
  # p
  plotly::ggplotly(p)
})

Other types of OA license information detected by Unpaywall

renderPlotly({
  unpaywall_df() %>%
    filter(journal_title %in% jn_f()$journal_title) %>%
    oa_by_other() %>%
    oa_by_other_plot() %>%
    plotly::ggplotly()
})

Row {data-width=400 data-height=350}

Sources of disclosure of OA sponsorship

renderPlotly({
  p <- hybrid_df %>%
    filter(journal_title %in% jn_f()$journal_title) %>%
    ggplot(aes(year, ..count.., fill = hybrid_type)) +
    geom_bar(position = position_stack(reverse = TRUE)) +
    xlab("Year") +
    ylab("Articles") +
    scale_x_discrete(drop = FALSE) +
    scale_y_continuous(
      labels = function(x) {
        format(x, big.mark = " ", scientific = FALSE)
      },
      breaks = scales::pretty_breaks()
    ) +
    scale_fill_manual(
      "Sources",
      values = colors_source_disclosure,
      na.value = "#E9E1D7"
    ) +
    theme_minimal()

  tt <- plotly::ggplotly(p, tooltip = c("y"))
  tt$x$data <- lapply(tt$x$data, function(x) {
    x$text <- paste(x$name, x$y, sep = ": ")
    x
  })
  tt
})

Text-mined author emails (first match per article)

renderEcharts4r({
  hybrid_df %>%
    filter(journal_title %in% jn_f()$journal_title) %>%
    count(suffix, domain) %>%
    filter(!is.na(domain)) %>%
    e_charts() %>%
    e_treemap(suffix, domain, n,
      leafDepth = "1",
      name = "Corresponding Email"
    ) %>%
    e_tooltip(trigger = "item")
})

Compare

Row {data-height=250}

Hybrid open access uptake by Publisher, shown as the proportion of the total number of hybrid open articles found

hybrid_all <-
  hybrid_df %>%
  count(year) %>%
  mutate(year = gsub("201", "1", year)) %>%
  mutate(year = gsub("2020", "20", year))
hybrid_df %>%
  mutate(year = gsub("201", "1", year)) %>%
  mutate(year = gsub("2020", "20", year)) %>%
  mutate(publisher_group = fct_lump(publisher, prop = 0.05)) %>%
  group_by(year, publisher_group) %>%
  summarize(n = n()) %>%
  mutate(prop = n / sum(n)) %>%
  mutate(`Proportion in %` = round(prop * 100, 2)) %>%
  ggplot(aes(year, n)) +
  geom_bar(
    data = hybrid_all,
    aes(fill = "All Hybrid OA Articles"),
    color = "transparent",
    stat = "identity"
  ) +
  geom_bar(aes(fill = "by Publisher", label = `Proportion in %`), color = "transparent", stat = "identity") +
  facet_wrap(~publisher_group, nrow = 1) +
  scale_fill_manual(values = c("#b3b3b3a0", "#153268"), name = "") +
  labs(x = "Year", y = "Hybrid OA Articles") +
  theme_minimal() +
  theme(
    legend.position = "top",
    legend.justification = "right"
  ) +
  scale_y_continuous(labels = scales::number_format(big.mark = " ")) +
  theme(panel.grid.minor = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(panel.grid.major.x = element_blank()) +
  theme(panel.border = element_blank()) -> publishers_comp_plot

plotly::ggplotly(publishers_comp_plot, tooltip = c("label", "y"))

Row {data-height=500}

Journal hybrid open access share by publisher

fillRow(
  flex = c(1, 3),
  inputPanel(
    selectInput(
      inputId = "Year",
      label = "Select Years",
      choices = c(rev(levels(hybrid_df$year))),
      selected = c("2017", "2018", "2019"),
      multiple = TRUE
    ),
    checkboxInput("rb", "Display journals", FALSE)
  ),
  plotlyOutput("pubplot", height = "100%")
)

output$pubplot <- renderPlotly({
  if (input$Year == "All") {
    tt <- hybrid_df %>%
      filter(!is.na(license)) %>%
      mutate(publisher = forcats::as_factor(publisher)) %>%
      group_by(year, publisher, journal_title, yearly_jn_volume) %>%
      summarize(oa = n()) %>%
      mutate(prop = oa / yearly_jn_volume)
  } else {
    tt <- hybrid_df %>%
      filter(!is.na(license)) %>%
      mutate(publisher = forcats::as_factor(publisher)) %>%
      filter(year %in% input$Year) %>%
      group_by(year, publisher, journal_title, yearly_jn_volume) %>%
      summarize(oa = n()) %>%
      mutate(prop = oa / yearly_jn_volume)
  }
  tt <- tt %>%
    ungroup() %>%
    mutate(publisher = forcats::fct_other(publisher, keep = levels(publisher)[1:10]))
  p <- ggplot(tt, aes(y = prop, x = as.factor(publisher))) +
    geom_boxplot() +
    coord_flip() +
    theme_minimal() +
    xlab("Publisher") +
    ylab(NULL) +
    scale_x_discrete(
      drop = FALSE,
      limits = rev(levels(tt$publisher)),
      labels = function(x) {
        str_wrap(x, width = 30)
      }
    ) +
    scale_y_continuous(labels = scales::percent)
  if (input$rb == TRUE) {
    p <- p +
      geom_point(aes(colour = prop, label = journal_title)) +
      scale_colour_viridis(option = "C") +
      scale_fill_viridis(option = "C") +
      guides(color = FALSE, fill = FALSE)
  } else {
    p <- p
  }
  plotly::ggplotly(p, tooltip = c("label", "y")) %>%
    layout(margin = list(l = 200))
})

Journal hybrid open access share by publisher by year

fillCol(
  height = 600,
  flex = c(NA, 1),
  inputPanel(
    selectInput(
      "Publisher",
      label = "Select Publisher",
      choices = c("All", unique(hybrid_df$publisher)),
      selected = "All"
    )
  ),
  plotlyOutput("yearplot", height = "100%")
)

output$yearplot <- renderPlotly({
  if (input$Publisher == "All") {
    tt <- hybrid_df %>%
      filter(!is.na(license)) %>%
      group_by(year, publisher, journal_title, yearly_jn_volume) %>%
      summarize(oa = n()) %>%
      mutate(prop = oa / yearly_jn_volume)
  } else {
    tt <- hybrid_df %>%
      filter(!is.na(license)) %>%
      filter(publisher == input$Publisher) %>%
      group_by(year, publisher, journal_title, yearly_jn_volume) %>%
      summarize(oa = n()) %>%
      mutate(prop = oa / yearly_jn_volume)
  }

  p <- ggplot(tt, aes(y = prop, x = as.factor(year))) +
    geom_boxplot() +
    coord_flip() +
    theme_minimal() +
    xlab("Year") +
    ylab(NULL) +
    scale_x_discrete(drop = FALSE) +
    scale_y_continuous(labels = scales::percent)
  if (input$rb == TRUE) {
    p <- p +
      geom_point(aes(colour = prop, label = journal_title)) +
      scale_colour_viridis(option = "C") +
      scale_fill_viridis(option = "C") +
      guides(color = FALSE, fill = FALSE)
  } else {
    p <- p
  }
  plotly::ggplotly(p, tooltip = c("label", "y"))
})

Row {data-height=500}

Table view

hybrid_data <- reactive({
  if (input$Publisher == "All") {
    hybrid_df
  } else {
    hybrid_df <- hybrid_df %>%
      filter(publisher == input$Publisher)
  }
  hybrid_df %>%
    filter(year %in% input$Year) %>%
    group_by(year, publisher, journal_title, yearly_jn_volume) %>%
    summarize(oa = n()) %>%
    mutate(prop = round((oa / yearly_jn_volume) * 100, 2)) %>%
    arrange(desc(year)) %>%
    select(
      Year = year,
      Publisher = publisher,
      Journal = journal_title,
      `Article Volume` = yearly_jn_volume,
      `OA Articles` = oa,
      `OA Share (in %)` = prop
    )
})

renderDataTable(
  expr = {
    hybrid_data()
  },
  rownames = FALSE,
  filter = "bottom",
  options = list(
    pageLength = 4,
    dom = "ftp",
    columnDefs = list(list(
      className = "dt-head-left", targets = "_all"
    ))
  )
)

Institutional View

my_data <- reactive({
  if (!is.null(input$plot_clicked_data$name)) {
    hybrid_df %>%
      filter(suffix == input$plot_clicked_data$name | domain == input$plot_clicked_data$name) %>%
      mutate(plot_title = ifelse(!input$plot_clicked_data$name %in% suffix,
        domain,
        suffix
      ))
  } else {
    hybrid_df
  }
})

Column

Text-mined author emails (first match per article) - Double click for selection!

output$plot <- renderEcharts4r({
  hybrid_df %>%
    count(suffix, domain) %>%
    filter(!is.na(domain)) %>%
    e_charts() %>%
    e_treemap(suffix, domain, n,
      leafDepth = "1",
      name = "Corresponding Email"
    ) %>%
    e_tooltip(trigger = "item")
})

output$oa_license <- renderPlotly({
  ggplot(my_data(), aes(factor(year), fill = license)) +
    geom_bar(position = position_stack(reverse = TRUE)) +
    ggtitle(unique(my_data()$plot_title)) +
    scale_fill_manual(
      "License",
      values = colors_license
    ) +
    scale_x_discrete("", drop = FALSE) +
    scale_y_continuous("Articles",
      labels = function(x) {
        format(x, big.mark = " ", scientific = FALSE)
      }
    ) +
    theme_minimal() -> p
  tt <- plotly::ggplotly(p, tooltip = c("y"))
  tt$x$data <- lapply(tt$x$data, function(x) {
    x$text <- paste(x$name, x$y, sep = ": ")
    x
  })
  tt
})

output$oa_source <- renderPlotly({
  p <-
    ggplot(my_data(), aes(factor(year), ..count.., fill = hybrid_type)) +
    geom_bar(position = position_stack(reverse = TRUE)) +
    xlab("Year") +
    ylab("Articles") +
    scale_y_continuous(
      labels = function(x) {
        format(x, big.mark = " ", scientific = FALSE)
      }
    ) +
    scale_fill_manual(
      "Sources",
      values = colors_source_disclosure,
      na.value = "#b3b3b3a0"
    ) +
    theme_minimal()
  tt <- plotly::ggplotly(p, tooltip = c("y"))
  tt$x$data <- lapply(tt$x$data, function(x) {
    x$text <- paste(x$name, x$y, sep = ": ")
    x
  })
  tt
})

fillRow(
  flex = c(1, 2),
  fillCol(echarts4rOutput("plot")),
  fillCol(plotlyOutput("oa_license"), plotlyOutput("oa_source"))
)

Row

Top Publishers

renderDataTable(
  expr = {
    my_data() %>%
      count(publisher) %>%
      arrange(desc(n)) %>%
      mutate(prop = round(n / sum(n) * 100, 2))
  },
  options = list(
    pageLength = 8,
    dom = "ftp"
  )
)


subugoe/hoad documentation built on Feb. 22, 2022, 9:53 p.m.