r length(unique(df$publisher))
unique publishers
r sum(!is.na(df$publisher))
documents have unambiguous publisher information (r round(100*mean(!is.na(df$publisher)), 1)
%). This includes documents identified as self-published; the author name is used as the publisher in those cases (if known).
r length(which(df$self_published))
documents are identified as self-published (r round(100*(length(which(df$self_published))/nrow(df)), 1)
%).
Conversions from original to final names (only non-trivial conversions shown)
The r ntop
most common publishers are shown with the number of documents.
p <- top_plot(df, "publisher", ntop) p <- p + ggtitle(paste("Top publishers")) p <- p + scale_y_log10() p <- p + ylab("Documents") print(p)
Title count
# For some reason the ggplot never completes if top n > 2 ??? theme_set(theme_bw(20)) field <- "publisher" tops <- names(top(df, field, n = 2)) dfs <- df %>% select(field, publication_decade) dfs$field <- as.character(dfs[[field]]); dfs[[field]] <- NULL dfs <- dfs %>% filter(field %in% tops) %>% filter(!is.na(field) & !is.na(publication_decade)) %>% count(field, publication_decade) dfs$field <- as.factor(dfs$field) p <- ggplot(dfs, aes(x = publication_decade, y = n, fill = field)) + geom_bar(stat = "identity", position = "stack", color = "black") + labs(x = "Publication Decade", y = "Title Count", title = "Title count timeline for the top publishers") + scale_fill_grey() + guides(fill = guide_legend("Publisher")) # print(p)
Title count versus paper consumption (top publishers):
#res <- compare_title_paper(df, "publisher", selected = tops) tops <- names(top(df, field = "publisher", n = 10)) res <- compare_title_paper(df, "publisher") print(res$plot) kable(subset(res$table, publisher %in% tops), digit = 2)
Summaries of the corporate field.
r length(unique(df$corporate))
unique corporates
r sum(!is.na(df$corporate))
documents have unambiguous corporate information (r round(100*mean(!is.na(df$corporate)), 1)
%).
Conversions from original to final names (only non-trivial conversions shown)
The r ntop
most common corporates are shown with the number of documents.
p <- top_plot(df, "corporate", ntop) p <- p + ggtitle(paste("Top corporates")) p <- p + scale_y_log10() p <- p + ylab("Documents") print(p)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.