# Please read https://github.com/KonScience/Summarize-Flattr-Reports#summarize-flattr-reports
rm(list = ls()) # clean workspace
original_wd <- getwd() # save current working directory
Sys.setlocale("LC_ALL", "UTF-8") # respect non-ASCII symbols like German umlauts on Mac OSX, learned from https://stackoverflow.com/questions/8145886/
options(stringsAsFactors = FALSE, limitsize = FALSE) # set global options; row.names = FALSE has no effect, though
# see http://www.r-bloggers.com/library-vs-require-in-r/ for require() vs. library() discussion
library(ggplot2)
library(plyr)
library(scales)
# get all filenames of Flattr Monthly Revenue CSV; assumes that all were downloaded into same folder
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) { # execute via: Rscript path/to/summarize-flattr-reports.R path/to/flattr-revenue-000000.csv
print("Please select one of the 'flattr-revenue-....csv' files from the folder you downloaded them to.")
first_flattr_file <- file.choose()
flattr_dir <- dirname(first_flattr_file) # learned from http://stackoverflow.com/a/18003224
} else {
if ((substring(args[1], 1, 1) == "/") || (substring(args[1], 2, 2) == ":")) {
flattr_dir <- dirname(args[1]) # set absolute directory by cli argument
} else {
flattr_dir <- dirname(file.path(getwd(), args[1], fsep = .Platform$file.sep)) # set relative directory by cli argument
}
}
Flattr_filenames <- list.files(flattr_dir, pattern = "flattr-revenue-20[0-9]{4}.csv")
setwd(flattr_dir)
if ("flattr-revenue-000000.csv" %in% list.files(flattr_dir, pattern = "*.csv")) {
# use summary file if available & create if not, instead of reading files individually
try(known_raw <- read.csv2("flattr-revenue-000000.csv", encoding = "UTF-8"))
known_raw$X <- NULL
# check for existing raw date & merge with new
if (length(unique(known_raw$period)) < length(Flattr_filenames)) {
known_months <- paste(paste("flattr-revenue", # turn months into filenames
sub("-",
"",
unique(known_raw$period)),
sep = "-"),
"csv",
sep = ".")
new_months <- setdiff(Flattr_filenames, known_months)
new_raw <- do.call("rbind",
lapply(new_months,
read.csv2,
encoding = "UTF-8"))
raw <- rbind(known_raw, new_raw) # learned from http://stackoverflow.com/a/27313467
} else { # read data from all CSVs into data frame
raw <- do.call("rbind", # constructs and executes a call of the rbind function => combines R objects
lapply(Flattr_filenames, # applies function read.csv2 over list or vector
read.csv2,
encoding = "UTF-8" # learned from RTFM, but works only on Win7
)) # Function structure learned from https://stat.ethz.ch/pipermail/r-help/2010-October/255593.html
}
} else {
raw <- do.call("rbind", lapply(Flattr_filenames, read.csv2, encoding = "UTF-8")) # same as inner else, just to catch edge case of repetive plotting without adding new Revenue Reports
}
write.csv2(x = raw, file = "flattr-revenue-000000.csv", row.names = FALSE)
# append 1st days to months & convert to date format; learned from http://stackoverflow.com/a/4594269
raw$period <- as.Date(paste(raw$period, "-01"), format="%Y-%m -%d")
raw$EUR_per_click <- raw$revenue / raw$clicks
# populate raw data with all_revenue for each thing
for (i in 1:nrow(raw)){raw$all_revenue[i] <- sum(subset(raw, title == raw$title[i])$revenue)}
# determine dataset size to auto-adjust plots
N_months <- length(Flattr_filenames)
N_things <- length(unique(raw$title))
date <- format(Sys.time(), "%Y-%m-%d")
export_csv <- function(table, filename){
write.csv2(table, paste0(filename, "-", date, ".csv"),
row.names = FALSE)}
export_png <- function(p, fn, h = par("din")[2], w = par("din")[1]){
ggsave(filename = paste0(fn, "-", date, ".png"),
plot = p, height = h, width = w)
return(p)}
# summarize & order by title to account for changes in Thing ID and URLs (due to redirection after permalink changes)
per_thing <- ddply(.data = raw,
.variables = "title",
.fun = summarize,
all_clicks = sum(clicks),
all_revenue = sum(revenue))
per_thing <- per_thing[order(per_thing$all_revenue, decreasing = TRUE),]
export_csv(per_thing, "flattr-revenue-things")
# summarize & order by month and thing to provide click-value development over time
per_month_and_thing <- ddply(raw,
c("period", "title", "EUR_per_click"),
summarize,
all_clicks = sum(clicks),
all_revenue = sum(revenue))
per_month_and_thing <- per_month_and_thing[order(per_month_and_thing$title),]
export_csv(per_month_and_thing, "flattr-revenue-clicks")
# summarize & export revenue per month
per_month <- ddply(raw,
"period",
summarize,
all_clicks = sum(clicks),
all_revenue = sum(revenue))
per_month <- per_month[order(per_month$period),]
export_csv(per_month, "flattr-revenue-months")
# revenue per click and month coloured by thing, with trends for everything & best thing
best_thing <- subset(per_month_and_thing, title == per_thing[1,1]) # reduces data frame to best thing, for later trendline
best_thing$EUR_per_click <- best_thing$all_revenue / best_thing$all_clicks
flattr_plot <- ggplot(data = raw,
mapping = aes(x = period,
y = EUR_per_click,
size = raw$revenue, # points sized according to revenue of that thing in that month => bubble plot
colour = factor(title)))
export_png(flattr_plot +
geom_jitter() + # same as geom_point(position = "jitter"); spreads data points randomly around true x value bit; day-exact resolution not (yet) possible
stat_smooth(mapping = aes(x = best_thing$period,
y = best_thing$EUR_per_click,
size = best_thing$all_revenue),
data = best_thing,
method = "auto",
show.legend = FALSE,
size = N_things / N_months,
se = FALSE, # confidence interval indicator
linetype = "dashed") + # learned from http://sape.inf.usi.ch/quick-reference/ggplot2/linetype
stat_smooth(aes(group = 1), # plots trendline over all values; otherwise: one for each thing; learned from http://stackoverflow.com/a/12810890
method = "auto",
se = FALSE,
color = "darkgrey",
show.legend = FALSE,
size = N_months / 20) +
scale_x_date(labels = date_format("%Y-%b"), expand = c(0, 0)) +
scale_y_continuous(limits = c(0, mean(raw$EUR_per_click) * 5), # omit extreme y-values; learned from http://stackoverflow.com/a/26558070
expand = c(0, 0)) +
labs(title = "Development of Flattr Revenue per Click", # learned from http://docs.ggplot2.org/current/labs.html
x = NULL,
y = expression("EUR per Flattr (extremes omitted)"),
colour = "Thing",
size = "Total revenue of Thing") +
theme_classic(base_size = sqrt(N_months + N_things)) +
theme(legend.position = "none", axis.text.x = element_text(angle = 15)),
"flattr-revenue-clicks")
# revenue per month and thing
monthly_advanced_plot <- ggplot(per_month_and_thing, aes(period, all_revenue, fill = factor(title)))
export_png(monthly_advanced_plot +
geom_bar(stat = "identity") +
scale_x_date(expand = c(0, 0), labels = date_format("%Y-%b")) +
scale_y_continuous(limits = c(0, max(per_month$all_revenue) * 1.1), expand = c(0, 0)) +
guides(fill = guide_legend(reverse = TRUE)) +
labs(title = "Development of Flattr Revenue by Things", x = NULL, y = "EUR received", fill = "Thing") +
theme_classic(base_size = (N_things + N_months) / 5),
"flattr-revenue-months",
N_things/3,
N_months/1.5)
# total revenue per month with trend
monthly_simple_plot <- ggplot(per_month, aes(x = period, y = all_revenue, size = per_month$all_revenue))
export_png(monthly_simple_plot +
geom_point(colour = "#ED8C3B") +
stat_smooth(data = per_month, method = "auto", color = "#80B04A", size = N_things / N_months) + # fit trend plus confidence interval
scale_x_date(expand = c(0, 0), labels = date_format("%Y-%b")) +
scale_y_continuous(limits = c(0, max(per_month$all_revenue) * 1.1), expand = c(0, 0)) +
labs(title = "Development of Flattr Revenue", x = NULL, y = "EUR received") +
theme_classic(base_size = sqrt(N_things + N_months)) +
theme(axis.text.x = element_text(angle = 15), legend.position = "none"),
"flattr-revenue-months-summarized")
# revenue per location of button
# summarize & order by month and domain
raw$domain <- sapply(strsplit(x = raw$url,
split = "/"),
"[", # indexing operator, see https://stackoverflow.com/questions/3703803/apply-strsplit-rowwise/3703855#comment3905951_3703855
3) # select index 3 of list = domain
for (i in 1:length(raw$domain)) {
raw$domain[i] <- gsub(pattern = "www.",
replacement = "",
x = raw$domain[i])}
per_month_and_domain <- ddply(raw,
c("period", "domain"),
summarize,
all_clicks = sum(clicks),
all_revenue = sum(revenue))
monthly_domain_plot <- ggplot(per_month_and_domain, aes(period, all_revenue, fill = factor(domain)))
export_png(monthly_domain_plot +
geom_bar(stat = "identity") +
scale_x_date(expand = c(0,0), labels = date_format("%Y-%b")) +
scale_y_continuous(limits = c(0, max(per_month$all_revenue)), expand = c(0, 0)) +
scale_fill_brewer(type = "qual") +
guides(fill = guide_legend(reverse = TRUE, keywidth = 0.75, keyheight = 0.75)) +
labs(title = "Development of Flattr Revenue by Button Locations", x = NULL, y = "EUR", fill = "Domains") +
theme_classic(base_size = sqrt(N_things + N_months)) +
theme(axis.text.x = element_text(angle = 30)),
"flattr-revenue-months-domain")
monthly_domain_plot_fractions <- ggplot(per_month_and_domain, aes(period, all_revenue, fill = factor(domain)))
export_png(monthly_domain_plot_fractions +
geom_bar(position = "fill", stat = "identity") +
scale_x_date(expand = c(0,0), labels = date_format("%Y-%b")) +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_brewer(type = "qual") +
guides(fill = guide_legend(reverse = TRUE, keywidth = 0.75, keyheight = 0.75)) +
labs(title = "Development of Flattr Revenue by Button Locations",
x = NULL, y = "Fraction", fill = "Domains") +
theme_classic(base_size = sqrt(N_things + N_months)) +
theme(axis.text.x = element_text(angle = 30)),
"flattr-revenue-months-domain-fractions")
# sort & export after plotting in order to preserve alphabatic sorting in of domains in plot
per_month_and_domain <- per_month_and_domain[order(per_month_and_domain$all_revenue),]
export_csv(per_month_and_domain, "flattr-revenue-clicks-domain")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.