#' Source-specific metric mappings
#'
#' List containing source-specific metrics with their original naming.
#' Some of those get mapped into \code{\link{STANDARD_METRICS}} through
#' the \code{\link{metric_type_mapping}} table.
#'
MAPPINGS <- l(
spotify = l(
# From https://github.com/playax/playax/blob/master/app/models/concerns/external_id_spotify.rb
followers = 0,
popularity = 1,
listeners = 2,
streams = 3
),
# From https://github.com/playax/playax/blob/master/app/models/concerns/external_id_youtube.rb
youtube = l(
subscriber_count = 0,
view_count = 1
),
# From https://github.com/playax/playax/blob/master/app/models/concerns/external_id_knowledge_graph.rb
knowledgegraph = l(
track_plays = 0,
artist_plays = 1
),
facebook = l(
page_fans = 0,
talking_about_this = 1
),
instagram = l(
followers = 0,
likes = 1,
comments = 2,
medias = 3
)
)
# Mapping of standard metrics into rsm metrics.
std_metrics <- function() {
metric_type_mapping %>%
inner_join(source_name_mapping,
by = c('source_name' = 'raw_social_metrics_index'), suffix = c('', '.snm')) %>%
mutate(
metric_type_str = unlist(playaxdata:::STANDARD_METRICS)[metric_type_to + 1],
source_name.snm = tolower(source_name.snm)
) %>%
rename(
source_name_str = source_name.snm,
metric_type = metric_type_from
) %>%
select(-metric_type_to, -period_metrics_index)
}
# Mapping of non-standard metrics.
ns_metrics <- function() {
lapply(
names(MAPPINGS),
function(source_name_str) {
metrics <- playaxdata:::MAPPINGS[[source_name_str]]
tibble(source_name = source_indices(source_name_str),
source_name_str = source_name_str,
metric_type = unname(unlist(metrics)),
metric_type_str = names(metrics))
}
) %>% {
do.call(rbind, .)
}
}
# All metrics in one table.
rsm_metrics <- function() {
ns <- ns_metrics() %>% rename(non_standard_name = metric_type_str)
std <- std_metrics() %>% rename(standard_name = metric_type_str)
std %>%
left_join(ns %>% select(-source_name_str),
by = c('source_name', 'metric_type')) %>%
rbind(
ns %>%
anti_join(std, by = c('source_name', 'metric_type')) %>%
mutate(standard_name = NA, aggregation_function = NA)
)
}
#' Global and regionalized per-artist, "raw" metrics
#'
#' `*_social_metrics` are a set of per-artist tables which store "raw", daily
#' data from the various sources ingested by Playax. These are "raw" in the
#' sense that no processing is done on the data, and it is stored as-is.
#'
#' Regionalized variants are avaiable by supplying a different _scope_ parameter,
#' or by calling `{city,state,region}_social_metrics` directly.
#'
#' @param scope a regionalized scope for the data. Valid scopes are `global`
#' (data for the whole planet or the whole country, default),
#' `city` (data per city), `state` (date per state), and
#' `region` (data per region).
#'
#' @export
raw_social_metrics <- function(scope = c('global', 'city', 'state', 'region')) {
scope = match.arg(scope)
get(g('{scope}_social_metrics'))()
}
#' @rdname global_social_metrics
#' @export
global_social_metrics <- function() new_rsm(db_tbl('raw_social_metrics_cs'),
'source_name', NULL)
#' @rdname raw_social_metrics
#' @export
city_social_metrics <- function() regionalized_rsm('city')
#' @rdname raw_social_metrics
#' @export
state_social_metrics <- function() regionalized_rsm('state')
#' @rdname raw_social_metrics
#' @export
region_social_metrics <- function() regionalized_rsm('region')
regionalized_rsm <- function(scope) {
new_rsm(db_tbl(g('{scope}_social_metrics')), 'source_name_idx', scope)
}
#' @export
collect.rsm <- function(x, ...) mutate(
new_rsm(NextMethod(), attributes(x)$source_name_idx, attributes(x)$scope),
across(matches('metric_date'), ~as.POSIXct(.))) # freaking DBI bug
new_rsm <- function(.tbl, source_name_idx, scope = NULL) {
class(.tbl) <- c('rsm', class(.tbl))
attributes(.tbl)$source_name_idx <- source_name_idx
if (!is.null(scope)) {
attributes(.tbl)$scope <- scope
class(.tbl) <- c('regionalized_rsm', class(.tbl))
}
.tbl
}
#' @export
supported_metric_types_.rsm <- function(.tbl, source, metric_type) {
# FIXME this is not using the underlying table
tryCatch(
get_keys(rsm_metrics(), source, source_name_str) %>%
mutate(supported = coalesce(standard_name, non_standard_name)) %>%
pull(supported),
error = function(err) {
if (startsWith(err$message, 'Invalid source_name_str:')) {
stop(glue::glue('Source <<{source}>> is supported, but has no ',
'mapped metric types.'))
}
}
)
}
#' @export
supported_sources.rsm <- function(.tbl) {
source_indices <- .tbl %>%
select(source_name) %>%
distinct %>%
collect %>%
pull(source_name)
source_name_mapping %>%
filter(raw_social_metrics_index %in% source_indices) %>%
pull(source_name) %>% tolower
}
#' @export
for_right_holder_.rsm <- function(.tbl, right_holder_ids) {
external_ids <- db_tbl('right_holder_external_ids') %>%
for_right_holder(.dots = right_holder_ids) %>%
pull(source_id)
.tbl %>% filter(source_id %in% external_ids)
}
#' @export
for_source.rsm <- function(.tbl, ..., .dots = NULL) {
source_names <- tolower(get_parlist(..., .dots = NULL))
src_indices <- source_indices(source_names)
attributes(.tbl)$selected_sources <- src_indices
source_name_idx <- as.symbol(attributes(.tbl)$source_name_idx)
eval(substitute(in_filter(.tbl, source_name_idx, src_indices)))
}
#' @export
for_metric_type.rsm <- function(.tbl, ..., .dots = NULL) {
metric_types <- get_parlist(..., .dots = .dots)
# Preselected sources are a problem. They've already
# inserted filters into the query, and we'll generate redundant
# clauses here. Yet, I don't know of a better way to do this
# which does not imply lazy query generation, which has its
# own set of nasty problems.
# Do we have sources pre-selected?
sources <- attributes(.tbl)$selected_sources
if (is.null(sources)) {
# We don't. Reverse select sources based on metric type.
sources <- reverse_select(metric_types)
}
# Retrieves unanbiguous source_name, metric_type pairs.
pairs <- rsm_metrics() %>%
filter(source_name %in% sources,
(standard_name %in% tolower(metric_types)) |
(non_standard_name %in% tolower(metric_types)))
# Generates where clauses and runs the query.
.tbl %>% filter(!!generate_clauses(attributes(.tbl)$source_name_idx,
pairs$source_name, pairs$metric_type))
}
# Reverse-selects sources from metric types.
reverse_select <- function(metric_types) {
rsm_metrics() %>%
filter((standard_name %in% tolower(metric_types)) |
(non_standard_name %in% tolower(metric_types))) %>%
pull(source_name) %>%
unique
}
generate_clauses <- function(source_name_idx, sources, metric_types, i = 1) {
source <- sources[i]
type <- metric_types[i]
source_name_idx <- as.symbol(source_name_idx)
expr <- substitute((source_name_idx == source & metric_type == type))
if (i < length(sources) & i < length(metric_types)) {
expr <- call('|', expr, generate_clauses(
source_name_idx, sources, metric_types, i + 1))
}
expr
}
#' @export
for_dates_.rsm <- function(.tbl, start, end) {
.tbl %>% filter(!!start <= metric_date && metric_date <= !!end)
}
#' @export
with_right_holders_.rsm <- function(.tbl, drop_invalid = TRUE) {
if ('right_holder_id' %nin% colnames(.tbl)) {
.tbl <- .tbl %>%
inner_join(db_tbl('right_holder_external_ids'), by = 'source_id',
suffix = c('', '.rhei'))
}
NextMethod()
}
#' @export
with_source_names.rsm <- function(.tbl) {
# We can only do this to in-memory tables.
check_in_memory(.tbl)
source_name_idx <- attributes(.tbl)$source_name_idx
check_columns(.tbl, l('metric_type' = 'integer',
!!source_name_idx := 'integer'))
join_spec <- c('metric_type')
join_spec[source_name_idx] = 'source_name'
.tbl %>%
left_join(
rsm_metrics() %>%
mutate(metric_type_str = coalesce(standard_name, non_standard_name)) %>%
select(metric_type, source_name, metric_type_str, source_name_str),
by = join_spec
) %>%
select(-source_name, -metric_type) %>%
rename(
source_name = source_name_str,
metric_type = metric_type_str
)
}
#' @export
with_metric_types.rsm <- with_source_names.rsm
#' Differences cumulative metrics in `raw_social_metrics_cs`
#'
#' Some metric types in `raw_social_metrics_cs` are cumulative, others are not. This
#' function takes all cumulative metrics and transforms them into non-cumulative
#' by taking lag 1 differences of metrics with themselves.
#'
#' Works only for in-memory tables. Must enrich source names and metric types
#' with `with_source_names` first.
#'
#' @export
diff_metrics <- function(.tbl) {
check_in_memory(.tbl)
check_columns(
.tbl, list('source_name' = 'character',
'metric_type' = 'character',
'metric_date' = list('POSIXct', 'Date'))
)
mappings <- std_metrics()
for (source_name in unique(mappings$source_name_str)) {
cumulatives <- mappings %>%
filter(source_name_str == !!source_name,
aggregation_function == 'last_value')
if (!nrow(cumulatives)) {
next
}
for (metric_type in cumulatives$metric_type_str) {
.tbl <- diff_metric(.tbl, source_name, metric_type)
}
}
.tbl
}
diff_metric <- function(.tbl, source_name, metric_type) {
.tbl %>%
arrange(metric_date) %>%
group_by(source_name, metric_type, source_id) %>%
mutate(
value = if_else(
source_name == !!source_name & metric_type == !!metric_type,
c(NA, diff(value)),
value
)
)
}
source_indices <- function(source_names) {
source_name_mapping %>%
get_keys(source_names, source_name, unique = TRUE) %>%
pull(raw_social_metrics_index)
}
#' @export
supported_location_types.regionalized_rsm <- function(.tbl) {
attributes(.tbl)$scope
}
#' @export
for_location_.regionalized_rsm <- function(.tbl, location_type, location_id) {
filter_expr <- str2expression(
g('{attributes(.tbl)$scope}_id == {location_id}'))[[1]]
.tbl %>% filter(!!filter_expr)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.