knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE) opts <- options(knitr.kable.NA = "") library(httr) library(readr) library(tidyr) library(dplyr) library(ggplot2) library(ggfortify) source("R/deposits.R") source("R/mdc.R") ### Configurable variables # Custom start and end dates: use YYYY-MM-DD format # These are specified from the workflow # If these aren't provided, we'll: default to today's date as end date and to the beginning of the year as the start date period_begin <- as.Date(Sys.getenv("period_begin")) period_end <- as.Date(Sys.getenv("period_end")) # Calculate dates: We use YTD if none are given if (is.na(period_begin)) { period_begin <- as.Date(paste(as.integer(format(Sys.Date(), "%Y")), "01-01", sep = "-"))} if (is.na(period_end)){ period_end <- Sys.Date() } # Set tokens and file location settings dataverse_host <- Sys.getenv("DATAVERSE_SERVER") dataverse_key <- Sys.getenv("DATAVERSE_TOKEN") dataverse <- paste0('http://', dataverse_host, "/api/info/metrics") doi_link <- paste0('http://', dataverse_host, "/dataset.xhtml?persistentId=") # Dataverse data. Cached locally as rds files # This way, we don't need to re-scrape for each format (HTML/PDF) if (file.exists("deposits.rds")) { deposits <- readRDS("deposits.rds") projects <- readRDS("projects.rds") if (file.exists("downloads.rds")) downloads <- readRDS("downloads.rds") if (file.exists("mdc.rds")) mdc <- readRDS("mdc.rds") } else { # actually scrape deposits <- deposits_get(dataverse_host) saveRDS(deposits, "deposits.rds") projects <- content(GET(paste0(dataverse, "/uniquedownloads/monthly"))) projects <- bind_rows(projects$data) saveRDS(projects, "projects.rds") if (mdc_available(dataverse_host)) { mdc <- mdc_get(dataverse_host) saveRDS(mdc, "mdc.rds") } else { downloads <- content(GET(paste0(dataverse, "/downloads/monthly"))) downloads <- bind_rows(downloads$data) downloads <- projects %>% group_by(date) %>% summarize(unique=sum(count)) %>% full_join(downloads) saveRDS(downloads, "downloads.rds") } }
r dataverse_host
r period_begin
to r period_end
.dataverse_total <- nrow(deposits) # all projects created within specified time frame deposits_new <- deposits %>% filter(as.Date(createdAt) < period_end & as.Date(createdAt) > period_begin) %>% summarize(n=n()) %>% .$n # all *published* projects within the specified timeframe published_new <- deposits %>% filter(as.Date(published_at) < period_end & as.Date(published_at) > period_begin) %>% summarize(n=n()) %>% .$n
r dataverse_total
projects. r deposits_new
new deposits were initiated between r period_begin
and r period_end
.r published_new
projects were published between r period_begin
and r period_end
.# all projects by status deposits %>% group_by(versionState) %>% summarize(n=n()) %>% knitr::kable(caption="Total deposits by publication status", col.names=c("Status", "Deposits")) # all published projects by subject deposits_subj <- deposits %>% filter(versionState=="Published") %>% select(c("subjects")) %>% unnest_longer(subjects) %>% group_by(subjects) %>% summarize(published=n()) # published projects by subject within time frame deposits_subj <- deposits %>% filter(as.Date(published_at) < period_end & as.Date(published_at) > period_begin & versionState=="Published") %>% select(c("subjects")) %>% unnest_longer(subjects) %>% group_by(subjects) %>% summarize(period=n()) %>% full_join(deposits_subj) # Any unpublished projects? if (any(deposits$versionState=="Unpublished")) { deposits_subj <- deposits %>% filter(versionState=="Unpublished") %>% select(c("subjects")) %>% unnest_longer(subjects) %>% group_by(subjects) %>% summarize(unpublished=n()) %>% full_join(deposits_subj) } else { deposits_subj$unpublished <- 0 } # Any unpublished in the time period specified? if (any(subset(deposits, as.Date(createdAt) < period_end & as.Date(createdAt) > period_begin)$versionState=="Unpublished")) { deposits_subj <- deposits %>% filter(versionState == "Unpublished" & as.Date(createdAt) < period_end & as.Date(createdAt) > period_begin) %>% select(c("subjects")) %>% unnest_longer(subjects) %>% group_by(subjects) %>% summarize(unpublished_period=n()) %>% full_join(deposits_subj) } else { deposits_subj$unpublished_period <- 0 } # exclude unpublished columns if missing dataverse key if (dataverse_key=="") { include_columns <- c("subjects", "period", "published") column_names <- c("Subject", paste("Published between", period_begin, "and", period_end), "Total published") } else { include_columns <- c("subjects", "unpublished_period", "unpublished", "period", "published") column_names <- c("Subject", paste("Unpublished between", period_begin, "and", period_end), "Total draft", paste("Published between", period_begin, "and", period_end), "Total published") } # make the table deposits_subj[, include_columns] %>% knitr::kable(caption="Deposits by subject", col.names=column_names)
projects %>% group_by(pid) %>% summarize(sum(count)) %>% arrange(desc(`sum(count)`)) %>% slice(1:5) %>% mutate(pid = paste0("[", deposits$name[deposits$global_id %in% pid], "](", doi_link, pid, ")")) %>% knitr::kable(caption="Top 5 all time downloaded projects", row.names = FALSE, col.names = c("Project", "Downloads")) projects$date <- as.Date(paste0(projects$date, "-01")) projects %>% group_by(pid) %>% filter(date < period_end & date > period_begin) %>% summarize(sum(count)) %>% arrange(desc(`sum(count)`)) %>% slice(1:5) %>% mutate(pid = paste0("[", deposits$name[deposits$global_id %in% pid], "](", doi_link, pid, ")")) %>% knitr::kable(caption=paste("Top 5 downloaded projects between ", period_begin, "and", period_end), row.names = FALSE, col.names = c("Project", "Downloads"))
Dataverse reports statistics as "total" and "unique".
# not all dataverse instances use MDC data (QDR does). In case they don't, we use the regular downloads stats if (exists("mdc")) { stats_ts <- ts(mdc[, -1], frequency=12, start=strsplit(mdc$date[1], split="-")[[1]]) } else { stats_ts <- ts(downloads[, -1], frequency=12, start=strsplit(downloads$date[1], split="-")[[1]]) } autoplot(stats_ts, facets = FALSE) + labs(title="Statistics", color="Statistic") + scale_y_continuous(labels=scales::comma)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.