#' Make a Google Analytics v4 API fetch
#'
#' @description
#' This function constructs the Google Analytics API v4 call to be called
#' via \link{fetch_google_analytics_4}
#'
#' @param viewId viewId of data to get.
#' @param date_range character or date vector of format \code{c(start, end)} or
#' for two date ranges: \code{c(start1,end1,start2,end2)}
#' @param metrics Metric to fetch. Supports calculated metrics.
#' @param dimensions Dimensions to fetch.
#' @param dim_filters A \link{filter_clause_ga4} wrapping \link{dim_filter}
#' @param met_filters A \link{filter_clause_ga4} wrapping \link{met_filter}
#' @param filtersExpression A v3 API style simple filter string. Not used with other filters.
#' @param order An \link{order_type} object
#' @param segments List of segments as created by \link{segment_ga4}
#' @param pivots Pivots of the data as created by \link{pivot_ga4}
#' @param cohorts Cohorts created by \link{make_cohort_group}
#' @param pageToken Where to start the data fetch
#' @param pageSize How many rows to fetch. Max 10000 each batch.
#' @param samplingLevel Sample level
#' @param metricFormat If supplying calculated metrics, specify the metric type
#' @param histogramBuckets For numeric dimensions such as hour, a list of buckets of data.
#' See details in \link{make_ga_4_req}
#'
#' @section Metrics:
#' Metrics support calculated metrics like ga:users / ga:sessions if you supply
#' them in a named vector.
#'
#' You must supply the correct 'ga:' prefix unlike normal metrics
#'
#' You can mix calculated and normal metrics like so:
#'
#' \code{customMetric <- c(sessionPerVisitor = "ga:sessions / ga:visitors",
#' "bounceRate",
#' "entrances")}
#'
#' You can also optionally supply a \code{metricFormat} parameter that must be
#' the same length as the metrics. \code{metricFormat} can be:
#' \code{METRIC_TYPE_UNSPECIFIED, INTEGER, FLOAT, CURRENCY, PERCENT, TIME}
#'
#' All metrics are currently parsed to as.numeric when in R.
#'
#' @section Dimensions:
#'
#' Supply a character vector of dimensions, with or without \code{ga:} prefix.
#'
#' Optionally for numeric dimension types such as
#' \code{ga:hour, ga:browserVersion, ga:sessionsToTransaction}, etc. supply
#' histogram buckets suitable for histogram plots.
#'
#' If non-empty, we place dimension values into buckets after string to int64.
#' Dimension values that are not the string representation of an integral value
#' will be converted to zero. The bucket values have to be in increasing order.
#' Each bucket is closed on the lower end, and open on the upper end.
#' The "first" bucket includes all values less than the first boundary,
#' the "last" bucket includes all values up to infinity.
#' Dimension values that fall in a bucket get transformed to a new dimension
#' value. For example, if one gives a list of "0, 1, 3, 4, 7", then we
#' return the following buckets: -
#' \itemize{
#' \item bucket #1: values < 0, dimension value "<0"
#' \item bucket #2: values in [0,1), dimension value "0"
#' \item bucket #3: values in [1,3), dimension value "1-2"
#' \item bucket #4: values in [3,4), dimension value "3"
#' \item bucket #5: values in [4,7), dimension value "4-6"
#' \item bucket #6: values >= 7, dimension value "7+"
#' }
#'
#' @examples
#'
#' \dontrun{
#' library(googleAnalyticsR)
#'
#' ## authenticate,
#' ## or use the RStudio Addin "Google API Auth" with analytics scopes set
#' ga_auth()
#'
#' ## get your accounts
#' account_list <- google_analytics_account_list()
#'
#' ## pick a profile with data to query
#'
#' ga_id <- account_list[23,'viewId']
#'
#' ga_req1 <- make_ga_4_req(ga_id,
#' date_range = c("2015-07-30","2015-10-01"),
#' dimensions=c('source','medium'),
#' metrics = c('sessions'))
#'
#' ga_req2 <- make_ga_4_req(ga_id,
#' date_range = c("2015-07-30","2015-10-01"),
#' dimensions=c('source','medium'),
#' metrics = c('users'))
#'
#' fetch_google_analytics_4(list(ga_req1, ga_req2))
#'
#' }
#'
#'
#' @family GAv4 fetch functions
#' @export
make_ga_4_req <- function(viewId,
date_range=NULL,
metrics=NULL,
dimensions=NULL,
dim_filters=NULL,
met_filters=NULL,
filtersExpression=NULL,
order=NULL,
segments=NULL,
pivots=NULL,
cohorts=NULL,
pageToken=0,
pageSize=1000,
samplingLevel=c("DEFAULT", "SMALL","LARGE"),
metricFormat=NULL,
histogramBuckets=NULL) {
samplingLevel <- match.arg(samplingLevel)
if(all(is.null(date_range), is.null(cohorts))){
stop("Must supply one of date_range or cohorts")
}
if(!is.null(cohorts)){
testthat::expect_true(cohort_metric_check(metrics))
testthat::expect_true(cohort_dimension_check(dimensions))
if(!is.null(date_range)){
stop("Don't supply date_range when using cohorts")
}
}
if(is.null(metrics)){
stop("Must supply a metric")
}
if(!is.null(segments)){
if(!any("segment" %in% dimensions)){
dimensions <- c(dimensions, "segment")
}
}
id <- sapply(viewId, checkPrefix, prefix = "ga")
date_list_one <- date_ga4(date_range[1:2])
if(length(date_range) == 4){
date_list_two <- date_ga4(date_range[3:4])
} else {
date_list_two <- NULL
}
dim_list <- dimension_ga4(dimensions, histogramBuckets)
met_list <- metric_ga4(metrics, metricFormat)
# order the dimensions if histograms
if(all(is.null(order), !is.null(histogramBuckets))){
bys <- intersect(dimensions, names(histogramBuckets))
order <- lapply(bys,
order_type,
FALSE,
"HISTOGRAM_BUCKET")
}
request <-
structure(
list(
viewId = id,
dateRanges = list(
date_list_one,
date_list_two
),
samplingLevel = samplingLevel,
dimensions = dim_list,
metrics = met_list,
dimensionFilterClauses = dim_filters,
metricFilterClauses = met_filters,
filtersExpression = filtersExpression,
orderBys = order,
segments = segments,
pivots = pivots,
cohortGroup=cohorts,
pageToken=as.character(pageToken),
pageSize = pageSize,
includeEmptyRows = TRUE
),
class = "ga4_req")
request <- rmNullObs(request)
}
#' Get Google Analytics v4 data (single request)
#'
#' A convenience function that wraps \link{make_ga_4_req} and \link{fetch_google_analytics_4}
#' for the common case of one GA data request.
#'
#' Will perform automatic batching if over the 10000 row per API call limit.
#'
#' @section Anti-sampling:
#'
#' \code{anti_sample} being TRUE ignores \code{max} as the API call is split over days
#' to mitigate the sampling session limit, in which case a row limit won't work. Take the top rows
#' of the result yourself instead e.g. \code{head(ga_data_unsampled, 50300)}
#'
#' If you are lucky enough to need sub-day sampling, it will attempt to fetch per hour, but you are
#' restricted to not using \code{dim_filter} argument if this is the case.
#' Try using \code{filtersExpression} instead.
#'
#' \code{anti_sample} being TRUE will also set \code{samplingLevel='LARGE'} to minimise
#' the number of calls.
#'
#' @inheritParams make_ga_4_req
#' @param max Maximum number of rows to fetch. Defaults at 1000. Use -1 to fetch all results.
#' @param anti_sample If TRUE will split up the call to avoid sampling.
#' @param anti_sample_batches "auto" default, or set to number of days per batch. 1 = daily.
#'
#' @return A Google Analytics data.frame
#'
#' @examples
#'
#' \dontrun{
#' library(googleAnalyticsR)
#'
#' ## authenticate,
#' ## or use the RStudio Addin "Google API Auth" with analytics scopes set
#' ga_auth()
#'
#' ## get your accounts
#' account_list <- google_analytics_account_list()
#'
#' ## pick a profile with data to query
#'
#' ga_id <- account_list[23,'viewId']
#'
#' ga_data <- google_analytics_4(ga_id,
#' date_range = c("2015-07-30","2015-10-01"),
#' dimensions=c('source','medium'),
#' metrics = c('sessions','bounces'))
#'
#' }
#'
#' @family GAv4 fetch functions
#' @export
google_analytics_4 <- function(viewId,
date_range=NULL,
metrics=NULL,
dimensions=NULL,
dim_filters=NULL,
met_filters=NULL,
filtersExpression=NULL,
order=NULL,
segments=NULL,
pivots=NULL,
cohorts=NULL,
max=1000,
samplingLevel=c("DEFAULT", "SMALL","LARGE"),
metricFormat=NULL,
histogramBuckets=NULL,
anti_sample = FALSE,
anti_sample_batches = "auto"){
max <- as.integer(max)
allResults <- FALSE
if(max < 0){
## size of 1 v4 batch 0 indexed
max <- as.integer(49999)
allResults <- TRUE
}
reqRowLimit <- as.integer(10000)
if(anti_sample){
myMessage("anti_sample set to TRUE. Mitigating sampling via multiple API calls.", level = 3)
return(anti_sample(viewId = viewId,
date_range = date_range,
metrics = metrics,
dimensions = dimensions,
dim_filters = dim_filters,
met_filters = met_filters,
filtersExpression = filtersExpression,
order = order,
segments = segments,
pivots = pivots,
cohorts = cohorts,
metricFormat = metricFormat,
histogramBuckets = histogramBuckets,
anti_sample_batches = anti_sample_batches))
}
if(max > reqRowLimit){
myMessage("Multi-call to API", level = 2)
}
meta_batch_start_index <- seq(from=0, to=max, by=reqRowLimit)
## make a list of the requests
requests <- lapply(meta_batch_start_index, function(start_index){
start_index <- as.integer(start_index)
if(allResults){
remaining <- as.integer(10000)
} else {
remaining <- min(as.integer(max - start_index), reqRowLimit)
}
make_ga_4_req(viewId = viewId,
date_range = date_range,
metrics = metrics,
dimensions = dimensions,
dim_filters = dim_filters,
met_filters = met_filters,
filtersExpression = filtersExpression,
order = order,
segments = segments,
pivots = pivots,
cohorts = cohorts,
pageToken = start_index,
pageSize = remaining,
samplingLevel = samplingLevel,
metricFormat = metricFormat,
histogramBuckets = histogramBuckets)
})
out <- fetch_google_analytics_4(requests, merge = TRUE)
if(allResults){
all_rows <- as.integer(attr(out, "rowCount"))
if(nrow(out) < all_rows){
## create the remaining requests
meta_batch_start_index2 <- seq(from=50000, to=all_rows, by=reqRowLimit)
## make a list of the requests
requests2 <- lapply(meta_batch_start_index2, function(start_index){
start_index <- as.integer(start_index)
remaining <- as.integer(10000)
make_ga_4_req(viewId = viewId,
date_range = date_range,
metrics = metrics,
dimensions = dimensions,
dim_filters = dim_filters,
met_filters = met_filters,
filtersExpression = filtersExpression,
order = order,
segments = segments,
pivots = pivots,
cohorts = cohorts,
pageToken = start_index,
pageSize = remaining,
samplingLevel = samplingLevel,
metricFormat = metricFormat,
histogramBuckets = histogramBuckets)
})
the_rest <- fetch_google_analytics_4(requests2, merge = TRUE)
out <- rbind(out, the_rest)
myMessage("All data downloaded, total of [",all_rows,"]", level = 3)
} else {
myMessage("One batch enough to get all results", level = 1)
}
}
sampling_message(attr(out, "samplesReadCounts"),
attr(out, "samplingSpaceSizes"),
hasDateComparison = any(grepl("\\.d1|\\.d2", names(out))))
out
}
#' Fetch multiple GAv4 requests
#'
#' Fetch the GAv4 requests as created by \link{make_ga_4_req}
#'
#' For same viewId, daterange, segments, samplingLevel and cohortGroup, v4 batches can be made
#'
#' @param request_list A list of requests created by \link{make_ga_4_req}
#' @param merge If TRUE then will rbind that list of data.frames
#'
#' @return A dataframe if one request, or a list of data.frames if multiple.
#'
#' @importFrom googleAuthR gar_api_generator
#'
#' @examples
#'
#' \dontrun{
#' library(googleAnalyticsR)
#'
#' ## authenticate,
#' ## or use the RStudio Addin "Google API Auth" with analytics scopes set
#' ga_auth()
#'
#' ## get your accounts
#' account_list <- google_analytics_account_list()
#'
#' ## pick a profile with data to query
#'
#' ga_id <- account_list[23,'viewId']
#'
#' ga_req1 <- make_ga_4_req(ga_id,
#' date_range = c("2015-07-30","2015-10-01"),
#' dimensions=c('source','medium'),
#' metrics = c('sessions'))
#'
#' ga_req2 <- make_ga_4_req(ga_id,
#' date_range = c("2015-07-30","2015-10-01"),
#' dimensions=c('source','medium'),
#' metrics = c('users'))
#'
#' fetch_google_analytics_4(list(ga_req1, ga_req2))
#'
#' }
#'
#' @family GAv4 fetch functions
#' @export
fetch_google_analytics_4 <- function(request_list, merge = FALSE){
testthat::expect_type(request_list, "list")
## amount of batches per v4 api call
ga_batch_limit <- 5
## amount of batches at once
gar_batch_size <- 10
if(length(unique((lapply(request_list, function(x) x$viewId)))) != 1){
stop("request_list must all have the same viewId")
}
if(length(unique((lapply(request_list, function(x) x$dateRanges)))) != 1){
stop("request_list must all have the same dateRanges")
}
if(length(unique((lapply(request_list, function(x) x$segments)))) != 1){
stop("request_list must all have the same segments")
}
if(length(unique((lapply(request_list, function(x) x$samplingLevel)))) != 1){
stop("request_list must all have the same samplingLevel")
}
if(length(unique((lapply(request_list, function(x) x$cohortGroup)))) != 1){
stop("request_list must all have the same cohortGroup")
}
myMessage("Calling APIv4....", level = 2)
## make the function
f <- gar_api_generator("https://analyticsreporting.googleapis.com/v4/reports:batchGet",
"POST",
data_parse_function = google_analytics_4_parse_batch,
# data_parse_function = function(x) x,
simplifyVector = FALSE)
## if under 5, one call
if(!is.null(request_list$viewId) || length(request_list) <= ga_batch_limit){
myMessage("Single v4 batch", level = 2)
request_list <- unitToList(request_list)
body <- list(
reportRequests = request_list
)
out <- f(the_body = body)
} else {
myMessage("Multiple v4 batch", level = 2)
## get list of lists of ga_batch_limit
request_list_index <- seq(1, length(request_list), ga_batch_limit)
batch_list <- lapply(request_list_index,
function(x) request_list[x:(x+(ga_batch_limit-1))])
## make the body for each v4 api call
body_list <- lapply(batch_list, function(x) list(reportRequests = x))
body_list <- rmNullObs(body_list)
## Only if supported in Google batching
GOOGLE_BATCHING <- FALSE
if(GOOGLE_BATCHING){
## make the list of 10 for each gar_batch call
batch_list_index <- seq(1, length(body_list), gar_batch_size)
batch_body_list <- lapply(batch_list_index,
function(x) body_list[x:(x+(gar_batch_size-1))])
batch_body_list <- rmNullObs(batch_body_list)
## make the call list
call_list <- lapply(batch_body_list, function(bl){
lapply(bl, function(rr){
f(the_body = rr, batch = TRUE)
})
})
## make the batch calls
response_list <- lapply(call_list, googleAuthR::gar_batch)
} else {
## loop over the requests normally
myMessage("Looping over maximum [", length(body_list), "] batches.", level = 1)
response_list <- lapply(body_list, function(b){
myMessage("Fetching data batch...", level = 1)
f(the_body = b)
})
out <- unlist(response_list, recursive = FALSE)
}
}
## if only one entry in the list, return the dataframe
if(length(out) == 1){
out <- out[[1]]
} else {
## returned a list of data.frames
if(merge){
## if an empty list, return NULL
if(all(vapply(out, is.null, logical(1)))){
out <- NULL
} else {
## check all dataframes have same columns
df_names <- rmNullObs(lapply(out, function(x) names(x)))
if(length(unique(df_names)) != 1){
stop("List of dataframes have non-identical column names. Got ",
paste(lapply(out, function(x) names(x)), collapse = " "))
}
out <- Reduce(rbind, out)
}
}
}
message("Downloaded [",NROW(out),"] rows from a total of [",attr(out, "rowCount"), "].")
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.