Nothing
# --- Utility Functions for sensortowerR ---
# This file contains helper functions used by the main data-fetching functions
# in the package. They handle tasks like input validation, query parameter
# preparation, and API request execution.
#' @importFrom rlang abort
#' @importFrom stats setNames
#' @importFrom httr2 request req_user_agent req_url_path_append req_url_query
#' req_error req_perform resp_status resp_body_raw resp_check_status
#' resp_body_string
#' @importFrom jsonlite fromJSON
#' @importFrom tibble as_tibble
#' @importFrom utils URLencode head
#' @importFrom tidyr unnest
#' @importFrom dplyr rename all_of
#'
# --- Shared API Helpers ---
st_api_base_url <- function() {
"https://api.sensortower.com"
}
.st_endpoint_registry <- list(
sales_report_estimates = c("{os}", "sales_report_estimates"),
sales_report_estimates_comparison_attributes = c("{os}", "sales_report_estimates_comparison_attributes"),
facets_metrics = c("facets", "metrics"),
usage_active_users = c("{os}", "usage", "active_users"),
usage_demographics = c("{os}", "usage", "demographics"),
usage_retention = c("{os}", "usage", "retention"),
top_and_trending_active_users = c("{os}", "top_and_trending", "active_users"),
top_and_trending_publishers = c("{os}", "top_and_trending", "publishers"),
ranking = c("{os}", "ranking"),
apps = c("{os}", "apps"),
app_tag_apps = c("app_tag", "apps"),
custom_fields_filter = c("custom_fields_filter"),
custom_fields_filter_id = c("custom_fields_filter", "{id}"),
custom_fields_filter_fields_values = c("custom_fields_filter", "fields_values"),
apps_timeseries = c("apps", "timeseries"),
apps_timeseries_unified = c("apps", "timeseries", "unified_apps"),
unified_publishers_apps = c("unified", "publishers", "apps"),
unified_sales_report_estimates = c("unified", "sales_report_estimates"),
search_entities = c("{app_store}", "search_entities"),
games_breakdown = c("{os}", "games_breakdown")
)
resolve_endpoint_segment <- function(segment, placeholders) {
if (!grepl("^\\{[A-Za-z0-9_]+\\}$", segment)) {
return(segment)
}
key <- gsub("^\\{|\\}$", "", segment)
value <- placeholders[[key]]
value_chr <- if (is.null(value)) "" else as.character(value[1])
if (!nzchar(value_chr)) {
rlang::abort(sprintf("Missing required endpoint placeholder: '%s'.", key))
}
value_chr
}
st_endpoint_relative_segments <- function(endpoint_key, ...) {
template <- .st_endpoint_registry[[endpoint_key]]
if (is.null(template)) {
rlang::abort(sprintf("Unknown endpoint key '%s'.", endpoint_key))
}
placeholders <- list(...)
vapply(
template,
resolve_endpoint_segment,
placeholders = placeholders,
FUN.VALUE = character(1)
)
}
st_endpoint_segments <- function(endpoint_key, ...) {
c("v1", st_endpoint_relative_segments(endpoint_key, ...))
}
st_endpoint_relative_path <- function(endpoint_key, ...) {
paste(st_endpoint_relative_segments(endpoint_key, ...), collapse = "/")
}
st_endpoint_path <- function(endpoint_key, ...) {
paste(st_endpoint_segments(endpoint_key, ...), collapse = "/")
}
resolve_auth_token <- function(auth_token = NULL,
env_var = "SENSORTOWER_AUTH_TOKEN",
error_message = NULL) {
token <- if (is.null(auth_token)) Sys.getenv(env_var) else auth_token
token <- trimws(as.character(token[1]))
if (!nzchar(token)) {
msg <- if (!is.null(error_message) && nzchar(error_message)) {
error_message
} else {
sprintf("Authentication token not found. Set %s environment variable.", env_var)
}
rlang::abort(msg)
}
token
}
# --- Input Validation ---
validate_inputs <- function(os,
comparison_attribute,
time_range,
measure,
date,
category,
regions,
end_date = NULL,
limit = 25,
offset = NULL,
device_type = NULL,
custom_fields_filter_id = NULL,
custom_tags_mode = NULL,
data_model = NULL) {
# Validation checks for common parameters
if (!os %in% c("ios", "android", "unified")) {
rlang::abort("`os` must be one of 'ios', 'android', or 'unified'")
}
if (!comparison_attribute %in% c("absolute", "delta", "transformed_delta")) {
rlang::abort("`comparison_attribute` must be one of 'absolute', 'delta', 'transformed_delta'")
}
if (!is.character(time_range) || !nzchar(time_range)) {
rlang::abort("`time_range` must be a non-empty string")
}
if (!is.character(measure) || !nzchar(measure)) {
rlang::abort("`measure` must be a non-empty string")
}
if (is.null(date)) {
rlang::abort("`date` must be provided")
}
if (is.null(category) && is.null(custom_fields_filter_id)) {
rlang::abort("Either `category` or `custom_fields_filter_id` must be provided")
}
if (is.null(regions)) {
rlang::abort("`regions` must be provided")
}
if (!is.numeric(limit) || limit <= 0 || limit != round(limit)) {
rlang::abort("`limit` must be a positive integer")
}
# Specific validations
if (os %in% c("ios", "unified") && is.null(device_type)) {
message("`device_type` is not specified for `os = '", os, "'`. Defaulting to 'total'.")
device_type <- "total"
}
if (!is.null(custom_fields_filter_id) && os == "unified" && is.null(custom_tags_mode)) {
rlang::abort("`custom_tags_mode` must be provided when `os` is 'unified' and `custom_fields_filter_id` is used.")
}
}
# --- Query Parameter Preparation ---
prepare_query_params_sales <- function(auth_token,
comparison_attribute,
time_range,
measure,
date,
category,
end_date,
regions,
limit,
offset,
device_type,
custom_fields_filter_id,
custom_tags_mode,
os) {
params <- list(
auth_token = auth_token,
comparison_attribute = comparison_attribute,
time_range = time_range,
measure = measure,
date = as.character(date),
category = category, # Keep category even with custom filter
end_date = if (!is.null(end_date)) as.character(end_date) else NULL,
regions = paste(regions, collapse = ","),
limit = limit,
offset = offset,
device_type = if (os %in% c("ios", "unified")) device_type else NULL,
custom_fields_filter_id = custom_fields_filter_id,
custom_tags_mode = if (os == "unified") custom_tags_mode else NULL
)
# Remove NULLs
params <- params[!sapply(params, is.null)]
# Debug: print params when using custom filter
if (!is.null(custom_fields_filter_id)) {
message("Debug: Query params with custom filter (sales):")
message(" custom_fields_filter_id: ", params$custom_fields_filter_id)
message(" category: ", if (is.null(params$category)) "NULL" else params$category)
message(" custom_tags_mode: ", params$custom_tags_mode)
}
params
}
prepare_query_params_active_users <- function(auth_token,
comparison_attribute,
time_range,
measure,
date,
category,
regions,
limit,
offset,
device_type,
custom_fields_filter_id,
custom_tags_mode,
data_model,
os) {
params <- list(
auth_token = auth_token,
comparison_attribute = comparison_attribute,
time_range = time_range,
measure = measure,
date = as.character(date),
category = category, # Keep category even with custom filter
regions = paste(regions, collapse = ","),
limit = limit,
offset = offset,
device_type = if (os %in% c("ios", "unified")) device_type else NULL,
custom_fields_filter_id = custom_fields_filter_id,
custom_tags_mode = if (os == "unified") custom_tags_mode else NULL,
data_model = data_model
)
# Remove NULLs
params[!sapply(params, is.null)]
}
# --- API Request Building and Performance ---
build_request <- function(base_url, path_segments, query_params) {
req <- httr2::request(base_url) %>%
httr2::req_user_agent("sensortowerR (https://github.com/ge-data-solutions/SensorTowerR)")
for (segment in path_segments) {
req <- req %>% httr2::req_url_path_append(segment)
}
req %>% httr2::req_url_query(!!!query_params)
}
perform_request <- function(req) {
tryCatch(
{
resp <- httr2::req_perform(req)
httr2::resp_check_status(resp) # Check for HTTP errors
resp
},
httr2_error = function(e) {
status <- httr2::resp_status(e$resp)
body <- httr2::resp_body_string(e$resp)
rlang::abort(
message = sprintf("API request failed with status %d.", status),
body = body,
parent = e
)
},
error = function(e) {
rlang::abort("An unexpected error occurred during the API request.", parent = e)
}
)
}
#' Core Data Fetching Function
#'
#' A reusable function to handle the common pattern of fetching data from the API.
#' Handles URL building, request execution, error handling, and response processing.
#'
#' @param endpoint Character. API endpoint path (e.g. "ios/sales_report_estimates").
#' @param params List. Query parameters.
#' @param auth_token Character. API token.
#' @param verbose Logical. Whether to print debug messages.
#' @param enrich_response Logical. Whether to enrich the response with metadata.
#' @param processor Function. Function to process the response. Defaults to process_response.
#'
#' @return A tibble with the results.
#' @keywords internal
fetch_data_core <- function(endpoint, params, auth_token, verbose = FALSE, enrich_response = TRUE, processor = process_response) {
base_url <- paste0(st_api_base_url(), "/v1")
# Clean up params
params <- params[!sapply(params, is.null)]
params$auth_token <- auth_token
if (verbose) {
message("Requesting: ", endpoint)
# Don't print auth token in debug
debug_params <- params
debug_params$auth_token <- "HIDDEN"
print(debug_params)
}
req <- httr2::request(base_url) %>%
httr2::req_url_path_append(endpoint) %>%
httr2::req_url_query(!!!params) %>%
httr2::req_user_agent("sensortowerR") %>%
httr2::req_retry(max_tries = 3, backoff = function(i) 2^i)
resp <- tryCatch(
{
httr2::req_perform(req)
},
error = function(e) {
if (verbose) message("API Request Failed: ", e$message)
return(NULL)
}
)
if (is.null(resp)) {
return(NULL)
}
# Call the processor function
# Check if processor accepts enrich_response argument
if ("enrich_response" %in% names(formals(processor))) {
processor(resp, enrich_response = enrich_response)
} else {
processor(resp)
}
}
# --- Response Processing ---
# Helper function to perform unnesting of entities
perform_unnest <- function(result_tbl) {
# Proactively coerce app_id to character in the nested data frames
# to prevent type errors during the unnest operation. The API can return
# a mix of integer and character IDs, which vctrs cannot combine.
result_tbl$entities <- lapply(result_tbl$entities, function(df) {
if (!is.null(df) && "app_id" %in% names(df)) {
df$app_id <- as.character(df$app_id)
}
df
})
tidyr::unnest(result_tbl, dplyr::all_of("entities"), names_sep = ".")
}
process_response <- function(resp, enrich_response = TRUE) {
body_raw <- httr2::resp_body_raw(resp)
if (length(body_raw) == 0) {
return(tibble::tibble())
}
body_text <- rawToChar(body_raw)
result <- jsonlite::fromJSON(body_text, flatten = TRUE)
if (length(result) == 0 || nrow(result) == 0) {
return(tibble::tibble())
}
result_tbl <- tibble::as_tibble(result)
# Enrich with app names if requested and possible
if (enrich_response && "entities" %in% names(result_tbl) && is.list(result_tbl$entities)) {
# IMPORTANT: Preserve the original app_id as unified_app_id BEFORE unnesting
# This is the true unified hex ID from the API
if ("app_id" %in% names(result_tbl)) {
result_tbl$unified_app_id <- result_tbl$app_id
# OPTIMIZATION: If we already have unified hex IDs, check if unnesting is necessary
# When using os="unified" with custom filters, the API returns unified data already
non_na_ids <- result_tbl$app_id[!is.na(result_tbl$app_id)]
if (length(non_na_ids) > 0 && all(grepl("^[a-f0-9]{24}$", non_na_ids))) {
# We have unified hex IDs - check if entities just contains redundant platform data
# If the entities only contain platform-specific versions of the same app,
# we can skip unnesting to avoid creating duplicates that need consolidation
# Check first entity to see if it would create duplicates
first_entity <- result_tbl$entities[[1]]
if (!is.null(first_entity) && is.data.frame(first_entity) && nrow(first_entity) > 1) {
# Multiple rows in entity = iOS + Android versions = will create duplicates
# For unified data, we can use aggregate_tags instead of unnesting entities
message("Streamlined processing: Using unified data without unnesting platform entities")
custom_tag_cols <- unique(unlist(lapply(result_tbl$entities, function(df) {
if (is.null(df) || !is.data.frame(df)) {
return(character())
}
names(df)[grepl("^custom_tags\\.", names(df))]
})))
for (col in custom_tag_cols) {
agg_col <- sub("^custom_tags\\.", "aggregate_tags.", col)
if (!agg_col %in% names(result_tbl)) {
result_tbl[[agg_col]] <- rep(NA_character_, nrow(result_tbl))
}
}
for (i in seq_len(nrow(result_tbl))) {
entity_df <- result_tbl$entities[[i]]
if (is.null(entity_df) || !is.data.frame(entity_df)) next
for (col in custom_tag_cols) {
agg_col <- sub("^custom_tags\\.", "aggregate_tags.", col)
value <- entity_df[[col]][1]
if (!is.null(value) && !is.na(value) && (is.na(result_tbl[[agg_col]][i]) || result_tbl[[agg_col]][i] == "")) {
result_tbl[[agg_col]][i] <- value
}
}
}
# Now safe to remove entities after extracting key fields
if ("aggregate_tags" %in% names(result_tbl)) {
# The aggregate_tags column already contains the unified metrics
# We've extracted gender data, so now we can remove entities
result_tbl$entities <- NULL # Remove entities to prevent confusion
}
} else {
# Single row or empty entity - safe to unnest
result_tbl <- perform_unnest(result_tbl)
}
} else {
# Not all unified IDs - need to unnest for platform resolution
result_tbl <- perform_unnest(result_tbl)
}
} else {
# No app_id column - proceed with normal unnesting
result_tbl <- perform_unnest(result_tbl)
}
# Clean up duplicate columns - prefer the entities.* versions for detailed data
base_cols <- setdiff(names(result_tbl), grep("^entities\\.", names(result_tbl), value = TRUE))
entities_cols <- grep("^entities\\.", names(result_tbl), value = TRUE)
# Remove base columns that have entities.* equivalents
# But preserve unified_app_id and app_id if they're hex format unified IDs
preserve_cols <- "unified_app_id"
if ("app_id" %in% base_cols && all(grepl("^[a-f0-9]{24}$", result_tbl$app_id[!is.na(result_tbl$app_id)]))) {
# If app_id contains hex unified IDs, don't remove it
preserve_cols <- c(preserve_cols, "app_id")
}
duplicated_bases <- intersect(
gsub("^entities\\.", "", entities_cols),
setdiff(base_cols, preserve_cols)
)
result_tbl <- result_tbl[, !names(result_tbl) %in% duplicated_bases]
# Handle app name column - check multiple possible sources
if ("entities.custom_tags.unified_product_name" %in% names(result_tbl)) {
result_tbl <- dplyr::rename(result_tbl, app.name = "entities.custom_tags.unified_product_name")
} else if ("entities.name" %in% names(result_tbl)) {
result_tbl <- dplyr::rename(result_tbl, app.name = "entities.name")
} else if ("name" %in% names(result_tbl) && !"app.name" %in% names(result_tbl)) {
result_tbl <- dplyr::rename(result_tbl, app.name = "name")
}
# Create unified_app_name and unified_app_id for consistency
if ("app.name" %in% names(result_tbl)) {
result_tbl$unified_app_name <- result_tbl$app.name
}
# Note: unified_app_id was already set above before unnesting
# The entities.app_id contains platform-specific IDs, not unified IDs
# Store platform-specific app_id separately for reference
if ("entities.app_id" %in% names(result_tbl) && !"platform_app_id" %in% names(result_tbl)) {
result_tbl$platform_app_id <- result_tbl$entities.app_id
}
# If we have app_ids but missing app names, look them up
if ((!"unified_app_name" %in% names(result_tbl) ||
any(is.na(result_tbl$unified_app_name))) &&
"unified_app_id" %in% names(result_tbl)) {
result_tbl <- lookup_app_names_by_id(result_tbl)
}
# Extract custom metrics with clean names
result_tbl <- extract_custom_metrics(result_tbl)
# Clean special characters from numeric values
result_tbl <- clean_numeric_values(result_tbl)
# Convert date columns to proper Date class
result_tbl <- clean_date_values(result_tbl)
} else {
# Even without enrichment, clean any numeric values that might have special characters
result_tbl <- clean_numeric_values(result_tbl)
# Convert date columns to proper Date class
result_tbl <- clean_date_values(result_tbl)
}
return(result_tbl)
}
# Helper function to extract useful custom metrics from entities.custom_tags and aggregate_tags columns
extract_custom_metrics <- function(data) {
if (nrow(data) == 0) {
return(data)
}
# Use the global constant METRIC_MAPPING
metrics_map <- METRIC_MAPPING
# Also add variations for custom_tags and entities.custom_tags prefixes
metrics_map <- c(
metrics_map,
stats::setNames(metrics_map, gsub("^aggregate_tags\\.", "custom_tags.", names(metrics_map))),
stats::setNames(metrics_map, gsub("^aggregate_tags\\.", "entities.custom_tags.", names(metrics_map)))
)
metrics_map <- metrics_map[!duplicated(names(metrics_map))]
# Extract only the metrics that exist in the data
available_metrics <- intersect(names(metrics_map), names(data))
if (length(available_metrics) > 0) {
# Create a new data frame with the extracted metrics
extracted_data <- data[, available_metrics, drop = FALSE]
names(extracted_data) <- metrics_map[available_metrics]
# Remove the original entities.custom_tags columns to avoid duplication
data <- data[, !names(data) %in% available_metrics, drop = FALSE]
# Bind the cleaned metrics back to the data using dplyr::bind_cols for safety
data <- dplyr::bind_cols(data, extracted_data)
}
return(data)
}
# Helper function to clean special characters from numeric values
# Helper function to clean special characters from numeric values
clean_numeric_values <- function(data) {
if (nrow(data) == 0) {
return(data)
}
# Define patterns of metrics that should be treated as numeric
# These patterns match the metric names we extract in extract_custom_metrics
numeric_metric_patterns <- NUMERIC_METRIC_PATTERNS
# Find columns that likely contain numeric data
numeric_cols <- names(data)[sapply(names(data), function(col_name) {
# Skip non-character columns
if (!is.character(data[[col_name]])) {
return(FALSE)
}
# Check if column name matches our numeric patterns
matches_pattern <- any(sapply(numeric_metric_patterns, function(pattern) {
grepl(pattern, col_name, ignore.case = TRUE)
}))
# Also check for aggregate_tags columns that might contain numeric data
is_aggregate_tag <- grepl("^aggregate_tags\\.", col_name)
# Check for entities.custom_tags columns that might be numeric
is_custom_tag <- grepl("^entities\\.custom_tags\\.", col_name)
# Skip obvious text columns
is_text_column <- grepl("\\bname$|\\burl$|\\bdate$|app_id$|country$|gender|genre|style|category", col_name, ignore.case = TRUE)
# If column matches our criteria and isn't obviously text, check the content
if ((matches_pattern || is_aggregate_tag || is_custom_tag) && !is_text_column) {
sample_values <- head(data[[col_name]][!is.na(data[[col_name]]) & data[[col_name]] != ""], 10)
if (length(sample_values) > 0) {
# Check if values contain digits with special characters OR pure numbers
has_numeric_with_special <- any(grepl("[0-9].*[%$,]|[%$,].*[0-9]", sample_values))
has_pure_numeric <- any(grepl("^[0-9]+\\.?[0-9]*$", sample_values))
# Also check for formatted numbers like "1,234" or scientific notation
has_formatted_numeric <- any(grepl("^[0-9,]+\\.?[0-9]*$", sample_values))
return(has_numeric_with_special || has_pure_numeric || has_formatted_numeric)
}
}
return(FALSE)
})]
# Clean each numeric column
for (col in numeric_cols) {
if (is.character(data[[col]])) {
original_values <- data[[col]]
# Handle percentage values (convert "45%" to 45, not 0.45)
# Most analytics metrics use percentages as whole numbers
has_percentages <- any(grepl("%", original_values, fixed = TRUE), na.rm = TRUE)
# Remove currency symbols, commas, spaces, and other formatting
cleaned_values <- gsub("[$,\\s]", "", original_values)
# Remove percentage signs (but track that they were there)
cleaned_values <- gsub("%", "", cleaned_values)
# Remove any remaining non-numeric characters except decimal points and minus signs
cleaned_values <- gsub("[^0-9.-]", "", cleaned_values)
# Convert empty strings to NA
cleaned_values[cleaned_values == ""] <- NA
# Convert to numeric, suppressing warnings for values that can't be converted
numeric_values <- suppressWarnings(as.numeric(cleaned_values))
# Only replace if we successfully converted most values
# This prevents accidentally converting text columns that happen to match patterns
non_na_original <- sum(!is.na(original_values))
non_na_converted <- sum(!is.na(numeric_values))
if (non_na_original > 0) {
conversion_rate <- non_na_converted / non_na_original
if (conversion_rate > 0.5) { # If more than 50% converted successfully
data[[col]] <- numeric_values
# Special handling for retention metrics - convert to decimals
if (grepl("retention", col, ignore.case = TRUE) && has_percentages) {
# Convert retention percentages to decimals (15.5% becomes 0.155)
data[[col]] <- numeric_values / 100
message(sprintf("Converted column '%s' to numeric (percentages to decimals)", col))
} else {
# Log the cleaning for transparency
if (has_percentages) {
message(sprintf("Converted column '%s' to numeric (removed %% symbols)", col))
} else {
message(sprintf("Converted column '%s' to numeric", col))
}
}
}
}
}
}
return(data)
}
# Global cache for app name lookups to avoid redundant API calls across function calls
.app_name_cache <- new.env()
# Helper function to lookup app names by app ID (for sales endpoint which doesn't provide names)
lookup_app_names_by_id <- function(data) {
# Determine which ID column to use for lookups
id_column <- NULL
if ("entities.app_id" %in% names(data)) {
id_column <- "entities.app_id"
} else if ("unified_app_id" %in% names(data)) {
id_column <- "unified_app_id"
} else if ("app_id" %in% names(data)) {
id_column <- "app_id"
} else {
return(data) # No ID column available
}
if (nrow(data) == 0) {
return(data)
}
# Determine which rows still need a unified app name
needs_lookup <- if ("unified_app_name" %in% names(data)) {
is.na(data$unified_app_name) | data$unified_app_name == ""
} else {
rep(TRUE, nrow(data))
}
candidate_ids <- unique(as.character(data[[id_column]][needs_lookup]))
candidate_ids <- candidate_ids[!is.na(candidate_ids) & candidate_ids != ""]
if (length(candidate_ids) == 0) {
if (!"unified_app_name" %in% names(data)) {
data$unified_app_name <- data[[id_column]]
}
return(data)
}
verbose_lookup <- getOption("sensortowerR.verbose", getOption("SensorTowerR.verbose", FALSE))
# For unified hex IDs, prefer direct /v1/unified/apps lookup.
# This avoids search-based resolution returning IDs as fallback names.
auth_token <- Sys.getenv("SENSORTOWER_AUTH_TOKEN")
all_hex_ids <- length(candidate_ids) > 0 && all(grepl("^[a-f0-9]{24}$", candidate_ids))
if (all_hex_ids && nzchar(auth_token)) {
chunk_size <- 100
id_chunks <- split(candidate_ids, ceiling(seq_along(candidate_ids) / chunk_size))
direct_details <- lapply(id_chunks, function(ids) {
tryCatch(
st_app_details(
app_ids = ids,
os = "unified",
include_developer_contacts = FALSE,
auth_token = auth_token
),
error = function(e) NULL
)
})
direct_details <- direct_details[!vapply(direct_details, is.null, logical(1))]
if (length(direct_details) > 0) {
direct_tbl <- dplyr::bind_rows(direct_details)
if (nrow(direct_tbl) > 0 && all(c("app_id", "app_name") %in% names(direct_tbl))) {
direct_lookup <- direct_tbl %>%
dplyr::transmute(
lookup_id = as.character(.data$app_id),
resolved_app_name = as.character(.data$app_name),
resolved_unified_id = as.character(.data$app_id)
) %>%
dplyr::distinct(.data$lookup_id, .keep_all = TRUE)
data <- dplyr::left_join(data, direct_lookup, by = stats::setNames("lookup_id", id_column))
if (!"unified_app_name" %in% names(data)) {
data$unified_app_name <- NA_character_
}
fill_names <- is.na(data$unified_app_name) | data$unified_app_name == ""
data$unified_app_name[fill_names] <- dplyr::coalesce(
data$resolved_app_name[fill_names],
as.character(data[[id_column]][fill_names])
)
if (!"unified_app_id" %in% names(data)) {
data$unified_app_id <- NA_character_
}
fill_unified <- is.na(data$unified_app_id) | data$unified_app_id == ""
data$unified_app_id[fill_unified] <- dplyr::coalesce(
data$resolved_unified_id[fill_unified],
data$unified_app_id[fill_unified]
)
data <- data %>%
dplyr::select(-dplyr::any_of(c("resolved_app_name", "resolved_unified_id")))
unresolved <- is.na(data$unified_app_name) | data$unified_app_name == "" |
grepl("^[a-f0-9]{24}$", data$unified_app_name)
if (!any(unresolved)) {
return(data)
}
}
}
}
# Attempt to satisfy requests from the ID cache first
cache_entries <- lapply(candidate_ids, lookup_cached_id)
names(cache_entries) <- candidate_ids
missing_ids <- candidate_ids[vapply(cache_entries, is.null, logical(1))]
# Resolve any missing IDs using the shared resolution pipeline
if (length(missing_ids) > 0) {
if (nzchar(auth_token)) {
if (verbose_lookup) {
message("Resolving ", length(missing_ids), " app IDs for name enrichment...")
}
batch_resolve_ids(missing_ids, auth_token = auth_token, use_cache = TRUE, verbose = verbose_lookup)
cache_entries[missing_ids] <- lapply(missing_ids, lookup_cached_id)
} else if (verbose_lookup) {
message("Skipping name lookup for ", length(missing_ids), " IDs (missing auth token)")
}
}
# Build a lookup table from cache results
lookup_table <- tibble::tibble(
lookup_id = candidate_ids,
resolved_app_name = NA_character_,
resolved_unified_id = NA_character_,
)
for (i in seq_along(candidate_ids)) {
entry <- cache_entries[[candidate_ids[i]]]
if (!is.null(entry)) {
name_val <- entry$app_name
if (!is.null(name_val)) {
name_val <- as.character(name_val)[1]
if (!is.na(name_val) && nzchar(name_val)) {
lookup_table$resolved_app_name[i] <- name_val
assign(candidate_ids[i], name_val, envir = .app_name_cache)
}
}
unified_val <- entry$unified_app_id
if (!is.null(unified_val)) {
unified_val <- as.character(unified_val)[1]
if (!is.na(unified_val) && nzchar(unified_val)) {
lookup_table$resolved_unified_id[i] <- unified_val
}
}
}
}
join_cols <- stats::setNames("lookup_id", id_column)
data <- dplyr::left_join(data, lookup_table, by = join_cols)
if (!"unified_app_name" %in% names(data)) {
data$unified_app_name <- NA_character_
}
fill_names <- is.na(data$unified_app_name) | data$unified_app_name == ""
data$unified_app_name[fill_names] <- dplyr::coalesce(
data$resolved_app_name[fill_names],
as.character(data[[id_column]][fill_names])
)
if (!"unified_app_id" %in% names(data)) {
data$unified_app_id <- NA_character_
}
fill_unified <- is.na(data$unified_app_id) | data$unified_app_id == ""
data$unified_app_id[fill_unified] <- dplyr::coalesce(
data$resolved_unified_id[fill_unified],
data$unified_app_id[fill_unified]
)
data <- data %>%
dplyr::select(-dplyr::any_of(c("resolved_app_name", "resolved_unified_id")))
data
}
# Helper function to deduplicate by a specific grouping column
deduplicate_by_group_id <- function(data, group_col) {
if (nrow(data) == 0 || !group_col %in% names(data)) {
return(data)
}
# Separate numeric and non-numeric columns
numeric_cols <- names(data)[sapply(data, is.numeric)]
non_numeric_cols <- setdiff(names(data), numeric_cols)
# Metrics to sum (downloads, revenue, counts)
sum_metrics <- numeric_cols[grepl("downloads|revenue|units|count|absolute", numeric_cols, ignore.case = TRUE)]
# Metrics to average (DAU, MAU, WAU, retention, ratings, percentages)
avg_metrics <- numeric_cols[grepl("dau|mau|wau|retention|rating|percentage|arpdau|rpd|avg|average", numeric_cols, ignore.case = TRUE)]
# Everything else (first value)
other_metrics <- setdiff(numeric_cols, c(sum_metrics, avg_metrics))
# Group by the specified column and aggregate
result <- data %>%
dplyr::group_by(!!rlang::sym(group_col)) %>%
dplyr::summarise(
# Keep the first unified_app_name
unified_app_name = dplyr::first(.data$unified_app_name),
# Keep first unified_app_id
unified_app_id = dplyr::first(.data$unified_app_id),
# Sum metrics that should be additive
dplyr::across(dplyr::all_of(sum_metrics), ~ sum(.x, na.rm = TRUE)),
# Average metrics that should be averaged
dplyr::across(dplyr::all_of(avg_metrics), ~ mean(.x, na.rm = TRUE)),
# Keep first value for other numeric metrics
dplyr::across(dplyr::all_of(other_metrics), ~ dplyr::first(.x[!is.na(.x)])),
# Keep first value for non-numeric columns
dplyr::across(
dplyr::all_of(setdiff(non_numeric_cols, c("unified_app_name", "unified_app_id", group_col))),
~ dplyr::first(.x[!is.na(.x)])
),
# Keep first non-NA date
dplyr::across(where(lubridate::is.Date), ~ dplyr::first(.x[!is.na(.x)])),
dplyr::across(where(lubridate::is.POSIXt), ~ dplyr::first(.x[!is.na(.x)])),
.groups = "drop"
) %>%
# Remove the grouping column if it starts with a dot (temporary column)
{
if (startsWith(group_col, ".")) dplyr::select(., -!!rlang::sym(group_col)) else .
}
# Convert 0 values back to NA where appropriate for averaged metrics
for (col in avg_metrics) {
if (col %in% names(result)) {
result[[col]][result[[col]] == 0] <- NA
}
}
return(result)
}
# Helper function to deduplicate apps by consolidating metrics for the same app name
deduplicate_apps_by_name <- function(data, fuzzy_match = TRUE) {
if (nrow(data) == 0 || !"unified_app_name" %in% names(data)) {
return(data)
}
# Create normalized names first to check for duplicates
if (fuzzy_match) {
# Group similar app names more aggressively
data <- data %>%
dplyr::mutate(
.name_normalized = .data$unified_app_name %>%
# Remove special characters and symbols
gsub("\u2122|\u00AE|\u00A9|:|\\*|\u00A4", "", .) %>%
# Handle specific known patterns
gsub("NYT Games.*|NYTimes.*", "nyt crossword", ., ignore.case = TRUE) %>%
gsub("Scrabble.*GO.*", "scrabble go", ., ignore.case = TRUE) %>%
gsub("Words With Friends.*", "words with friends", ., ignore.case = TRUE) %>%
gsub("Elevate.*Brain.*", "elevate brain training", ., ignore.case = TRUE) %>%
gsub("Word Trip.*|WordTrip.*", "word trip", ., ignore.case = TRUE) %>%
gsub("Heads Up.*|Warner.*Heads Up", "heads up", ., ignore.case = TRUE) %>%
gsub("Word Connect.*", "word connect", ., ignore.case = TRUE) %>%
# General cleanup
gsub("\\s+", " ", .) %>%
trimws() %>%
tolower()
)
} else {
# Simple normalization
data <- data %>%
dplyr::mutate(
.name_normalized = tolower(trimws(.data$unified_app_name))
)
}
# Check if there are actually duplicates to consolidate (after normalization)
if (length(unique(data$.name_normalized)) == nrow(data)) {
return(dplyr::select(data, -dplyr::any_of(".name_normalized"))) # No duplicates, return as-is
}
message(sprintf(
"Consolidating %d app entries into %d unique apps...",
nrow(data), length(unique(data$.name_normalized))
))
# Identify numeric columns to sum vs average
numeric_cols <- names(data)[sapply(data, is.numeric)]
# Metrics to SUM (additive across platforms)
# IMPORTANT: DAU/MAU/WAU/users are NOT additive - same user can be on multiple platforms
sum_metrics <- numeric_cols[grepl(
"downloads|revenue|units|count|absolute",
numeric_cols,
ignore.case = TRUE
)]
# Explicitly exclude user metrics from sum
sum_metrics <- sum_metrics[!grepl("dau|mau|wau|users", sum_metrics, ignore.case = TRUE)]
# Metrics to TAKE MAX (user counts - same users across platforms)
max_metrics <- numeric_cols[grepl(
"dau|mau|wau|users",
numeric_cols,
ignore.case = TRUE
)]
# Exclude from max if it's a ratio/rate
max_metrics <- max_metrics[!grepl("rate|ratio|percent", max_metrics, ignore.case = TRUE)]
# Metrics to AVERAGE (rates, percentages, ratios)
avg_metrics <- numeric_cols[grepl(
"retention|rpd|rating|age|share|percent|rate|ratio|transformed",
numeric_cols,
ignore.case = TRUE
)]
# Everything else (first value)
other_metrics <- setdiff(numeric_cols, c(sum_metrics, avg_metrics, max_metrics))
# Group by normalized name
result <- tryCatch(
{
data %>%
dplyr::group_by(.data$.name_normalized) %>%
dplyr::summarise(
# Keep the first unified_app_name
unified_app_name = dplyr::first(.data$unified_app_name),
# Keep first unified_app_id
unified_app_id = dplyr::first(.data$unified_app_id),
# Sum metrics that should be additive
dplyr::across(dplyr::all_of(sum_metrics), ~ sum(.x, na.rm = TRUE)),
# Max for user metrics (same users across platforms)
dplyr::across(dplyr::all_of(max_metrics), ~ max(.x, na.rm = TRUE)),
# Average metrics that are rates/percentages
dplyr::across(dplyr::all_of(avg_metrics), ~ mean(.x, na.rm = TRUE)),
# First value for other metrics
dplyr::across(dplyr::all_of(other_metrics), ~ dplyr::first(.x[!is.na(.x)])),
# First value for character/date columns
dplyr::across(where(is.character), ~ dplyr::first(.x[!is.na(.x) & .x != ""])),
dplyr::across(where(lubridate::is.Date), ~ dplyr::first(.x[!is.na(.x)])),
dplyr::across(where(lubridate::is.POSIXt), ~ dplyr::first(.x[!is.na(.x)])),
.groups = "drop"
) %>%
dplyr::relocate(unified_app_name, unified_app_id) %>%
dplyr::select(-dplyr::any_of(".name_normalized"))
},
error = function(e) {
message("Warning: Deduplication failed, returning original data. Error: ", e$message)
return(dplyr::select(data, -dplyr::any_of(".name_normalized")))
}
)
# Convert 0 values back to NA where appropriate for averaged and max metrics
for (col in c(avg_metrics, max_metrics)) {
if (col %in% names(result)) {
# For max metrics, -Inf means all were NA, convert to NA
if (col %in% max_metrics) {
result[[col]][is.infinite(result[[col]])] <- NA
}
# For avg metrics, 0 from all NAs should be NA
result[[col]][result[[col]] == 0] <- NA
}
}
return(result)
}
# Helper function to convert date columns to proper Date class
clean_date_values <- function(data) {
if (nrow(data) == 0) {
return(data)
}
# Find columns that likely contain date data
date_cols <- names(data)[sapply(names(data), function(col_name) {
# Skip non-character columns
if (!is.character(data[[col_name]])) {
return(FALSE)
}
# Check if column name suggests it contains dates
is_date_column <- grepl("date|release", col_name, ignore.case = TRUE)
# Skip columns that are clearly not dates (like frequency, days ago)
is_non_date <- grepl("frequency|days ago|update|freq", col_name, ignore.case = TRUE)
if (is_date_column && !is_non_date) {
# Check if the column actually contains date-like values
sample_values <- head(data[[col_name]][!is.na(data[[col_name]]) & data[[col_name]] != ""], 5)
if (length(sample_values) > 0) {
# Check for various date patterns
has_iso_dates <- any(grepl("\\d{4}-\\d{2}-\\d{2}", sample_values))
has_slash_dates <- any(grepl("\\d{4}/\\d{2}/\\d{2}", sample_values))
has_datetime <- any(grepl("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", sample_values))
return(has_iso_dates || has_slash_dates || has_datetime)
}
}
return(FALSE)
})]
if (length(date_cols) == 0) {
return(data)
}
# Convert each date column
for (col in date_cols) {
if (is.character(data[[col]])) {
original_values <- data[[col]]
# Try to convert to dates
converted_dates <- tryCatch(
{
# Handle different date formats
parsed_dates <- rep(as.Date(NA), length(original_values))
# Handle ISO datetime format (e.g., "2025-07-01T00:00:00Z")
iso_pattern <- grepl("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", original_values)
if (any(iso_pattern, na.rm = TRUE)) {
parsed_dates[iso_pattern] <- as.Date(lubridate::ymd_hms(original_values[iso_pattern]))
}
# Handle YYYY-MM-DD format
ymd_pattern <- grepl("^\\d{4}-\\d{2}-\\d{2}$", original_values) & !iso_pattern
if (any(ymd_pattern, na.rm = TRUE)) {
parsed_dates[ymd_pattern] <- as.Date(original_values[ymd_pattern])
}
# Handle YYYY/MM/DD format
slash_pattern <- grepl("^\\d{4}/\\d{2}/\\d{2}$", original_values)
if (any(slash_pattern, na.rm = TRUE)) {
parsed_dates[slash_pattern] <- as.Date(original_values[slash_pattern], format = "%Y/%m/%d")
}
parsed_dates
},
error = function(e) {
# If conversion fails, return original
original_values
}
)
# Only replace if we successfully converted most values
if (inherits(converted_dates, "Date")) {
non_na_original <- sum(!is.na(original_values) & original_values != "")
non_na_converted <- sum(!is.na(converted_dates))
if (non_na_original > 0) {
conversion_rate <- non_na_converted / non_na_original
if (conversion_rate > 0.5) { # If more than 50% converted successfully
data[[col]] <- converted_dates
message(sprintf("Converted column '%s' to Date class", col))
}
}
}
}
}
return(data)
}
#' Clear App Name Cache
#'
#' Clears the internal cache of app name lookups. Useful for testing or when
#' you want to refresh app name data.
#'
#' @return No return value, called for side effects (clearing the cache).
#' @export
st_clear_app_cache <- function() {
rm(list = ls(envir = .app_name_cache), envir = .app_name_cache)
message("App name cache cleared")
}
#' Fetch and Unified Data from Platforms
#'
#' Fetches data from iOS and/or Android and optionally combines them.
#' Handles missing data from one platform gracefully.
#'
#' @param ios_app_id Character. iOS App ID.
#' @param android_app_id Character. Android App ID.
#' @param start_date Date. Start date.
#' @param end_date Date. End date.
#' @param countries Character vector. Country codes.
#' @param date_granularity Character. Granularity.
#' @param auth_token Character. API token.
#' @param verbose Logical. Verbose output.
#' @param combine_to_unified Logical. Whether to sum metrics into a unified view.
#'
#' @return A tibble with columns date, country, revenue, downloads, and optionally platform/app_id.
#' @keywords internal
fetch_unified_data <- function(
ios_app_id = NULL,
android_app_id = NULL,
start_date,
end_date,
countries,
date_granularity,
auth_token,
verbose = FALSE,
combine_to_unified = TRUE
) {
all_data <- tibble::tibble()
# Track what we requested vs what we got
ios_requested <- !is.null(ios_app_id) && !is.na(ios_app_id)
android_requested <- !is.null(android_app_id) && !is.na(android_app_id)
ios_has_data <- FALSE
android_has_data <- FALSE
# Fetch iOS data
if (ios_requested) {
if (verbose) message("Fetching iOS data for: ", ios_app_id)
ios_result <- tryCatch(
{
st_sales_report(
os = "ios",
ios_app_id = ios_app_id,
countries = countries,
start_date = start_date,
end_date = end_date,
date_granularity = date_granularity,
auth_token = auth_token,
verbose = FALSE # Suppress inner verbose to avoid noise
)
},
error = function(e) {
if (verbose) message("iOS fetch warning: ", e$message)
NULL
}
)
if (!is.null(ios_result) && nrow(ios_result) > 0) {
ios_has_data <- TRUE
# Standardize columns
ios_result <- ios_result %>%
dplyr::mutate(
platform = "ios",
app_id = as.character(ios_app_id),
app_id_type = "ios",
revenue = if ("total_revenue" %in% names(.)) total_revenue else if ("revenue" %in% names(.)) revenue else 0,
downloads = if ("total_downloads" %in% names(.)) total_downloads else if ("downloads" %in% names(.)) downloads else 0
) %>%
dplyr::select(date, country, revenue, downloads, platform, app_id, app_id_type)
all_data <- dplyr::bind_rows(all_data, ios_result)
}
}
# Fetch Android data
if (android_requested) {
if (verbose) message("Fetching Android data for: ", android_app_id)
android_result <- tryCatch(
{
st_sales_report(
os = "android",
android_app_id = android_app_id,
countries = countries,
start_date = start_date,
end_date = end_date,
date_granularity = date_granularity,
auth_token = auth_token,
verbose = FALSE
)
},
error = function(e) {
if (verbose) message("Android fetch warning: ", e$message)
NULL
}
)
if (!is.null(android_result) && nrow(android_result) > 0) {
android_has_data <- TRUE
# Standardize columns
android_result <- android_result %>%
dplyr::mutate(
platform = "android",
app_id = as.character(android_app_id),
app_id_type = "android",
country = if ("c" %in% names(.)) c else country
) %>%
dplyr::select(date, country, revenue, downloads, platform, app_id, app_id_type)
all_data <- dplyr::bind_rows(all_data, android_result)
}
}
# Handle missing data scenarios
if (ios_requested && android_requested) {
if (!ios_has_data && !android_has_data) {
if (verbose) message("No data available for either platform.")
return(tibble::tibble(date = as.Date(character()), country = character(), revenue = numeric(), downloads = numeric()))
} else if (!ios_has_data && verbose) {
message("Missing iOS data (returning Android only).")
} else if (!android_has_data && verbose) {
message("Missing Android data (returning iOS only).")
}
} else if ((ios_requested && !ios_has_data) || (android_requested && !android_has_data)) {
if (verbose) message("No data returned for requested platform.")
return(tibble::tibble(date = as.Date(character()), country = character(), revenue = numeric(), downloads = numeric()))
}
# Combine or return raw
if (nrow(all_data) > 0) {
if (combine_to_unified) {
combined <- all_data %>%
dplyr::group_by(date, country) %>%
dplyr::summarise(
revenue = sum(revenue, na.rm = TRUE),
downloads = sum(downloads, na.rm = TRUE),
.groups = "drop"
)
return(combined)
} else {
return(all_data)
}
}
return(all_data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.