# Copyright 2010 Google Inc. All Rights Reserved.
# Author: Mike Pearmain.
# Author: Nick Mihailovski.
# Author: Nicolas Remy.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is the QueryBuilder() function to be used with the RGoogleAnalytics()
# functions in order to process a request from the GA API.
# The RGoogleAnalytics() functions are not dependent on this, but we use
# this as a helper for constructing the correct URI's to retrieve data.
#
# This requires the RCurl package which can be downloaded
# from: http://www.omegahat.org/R
# using the following command:
# 'install.packages('RCurl', repos = "http://www.omegahat.org/R")'
library(RCurl)
QueryBuilder <- function() {
# The main builder class for constructing URI requests.
# This function lists all the elements and parameters that make up a data
# feed request. In general, you provide the table ID corresponding to the
# profile you want to retrieve data from, choose the combination of
# dimensions and metrics, and provide a date range along with other
# parameters in a query string.
#
# More detailed information on each parameter can be found on the below:
# http://code.google.com/apis/analytics/docs/
# gdata/gdataReferenceDataFeed.html#dataRequest
#
# We use a builder method to construct the final query for data checking and
# exceptions, rather than raw strings.
# This also enables us to scale the parameters map in later versions.
#
# All the parameter values that are accepted for a profile can be found
# by looking at the metrics available in the GetProfileData() function.
#
# Returns:
# builder: The builder method function to process the parameters.
#
# Example:
# # An example of using this construction pattern is shown below.
#
# # Set the relevant variable information, if the variable is optional, you
# # do not need to specify it in the construction or simply place it as NULL.
#
# # query <- QueryBuilder()
# # query$Init(start.date = "2010-05-01",
# # end.date = "2010-08-20",
# # dimensions = "ga:date",
# # metrics = "ga:visits",
# # sort = "ga:date",
# # table.id = "ga:30661272")
# # ga.data <- ga$GetRDataFromQuery(query)
# Constants.
kMaxDimensions <- 7
kMaxMetrics <- 10
kMaxTableIds <- 1
# Query parameters.
start.date <- NULL
end.date <- NULL
dimensions <- NULL
metrics <- NULL
segment <- NULL
sort <- NULL
filters <- NULL
max.results <- NULL
start.index <- NULL
table.id <- NULL
StartDate <- function(start.date.param = NA) {
# Sets the start date.
# Optional.
# All Analytics feed requests must specify a beginning and ending date
# range. If you do not indicate start- and end-date values for the
# request, the server returns a request error.
# Date values are in the form YYYY-MM-DD.
# The earliest valid start-date is 2005-01-01. There is no upper limit
# restriction for a start-date. However, setting a start-date that is
# too far in the future will most likely return empty results.
#
# Args:
# start.date.param: Optional. A start date of the form "YYYY-MM-DD"
# as a string. If NULL is used, the start.date
# parameter will be unset. If no parameter is
# specified, the current start.date value is
# returned.
#
# Returns:
# The start.date value if start.date.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(start.date.param)) {
start.date <<- NULL
return(invisible())
}
# Returns the current dimension value if no parameter is used.
if (is.na(start.date.param)) {
return(start.date)
}
# Error handling.
# Check the form of the start.date.param.
if (is.na(as.Date(start.date.param, "%Y-%m-%d"))) {
stop("A start date must be specified of the form YYYY-MM-DD")
}
start.date <<- start.date.param
return(invisible())
}
EndDate <- function(end.date.param = NA) {
# Sets the end date.
# Optional.
# All Analytics feed requests must specify a beginning and ending date
# range. If you do not indicate start- and end-date values for the
# request, the server returns a request error.
# Date values are in the form YYYY-MM-DD.
# The earliest valid start-date is 2005-01-01. There is no upper limit
# restriction for a start-date. However, setting a start-date that is
# too far in the future will most likely return empty results.
#
# Args:
# end.date.param: An end date of the form 'YYYY-MM-DD'
# as a string. If NULL is used, the end.date.param
# parameter will be unset. If no parameter is specified,
# the current end.date value is returned.
#
# Returns:
# The end.date value if end.date.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(end.date.param)) {
end.date <<- NULL
return(invisible())
}
# Returns the current dimension value if no parameter is used.
if (is.na(end.date.param)) {
return(end.date)
}
# Error handling.
# Check the form of the end.date.param.
if (is.na(as.Date(end.date.param, "%Y-%m-%d"))) {
stop("An end date must be specified of the form YYYY-MM-DD")
}
end.date <<- end.date.param
return(invisible())
}
Dimensions <- function(dimensions.param = NA) {
# Sets the dimensions.
# Optional.
# The dimensions parameter defines the primary data keys for your
# Analytics report, such as ga:browser or ga:city. Use dimensions to
# segment your web property metrics. For example, while you can ask for
# the total number of pageviews to your site, it might be more
# interesting to ask for the number of pageviews segmented by browser.
# In this case, you'll see the number of pageviews from Firefox,
# Internet Explorer, Chrome, and so forth.
#
# When using dimensions in a feed request, be aware of the following
# constraints:
# You can supply a maximum of 7 dimensions for any query.
# You can not send a query comprised only of dimensions:
# You must combine any requested dimension with at least one metric.
# Any given dimension can be used with other dimensions or metrics,
# but only where Valid Combinations apply for that dimension.
#
# More information on valid combinations can be found here:
# http://code.google.com/apis/analytics/docs/gdata/
# gdataReferenceDimensionsMetrics.html#validCombinations
#
# NOTE: This method does not check for invalid dimensions or combinations.
#
# Args:
# dimensions.param: A vector of up to 7 dimensions, either as
# a single string or a vector or strings, E.g.
# "ga:source,ga:medium" or c("ga:source", "ga:medium")
# If NULL is used, the dimensions parameter will be
# unset. If no parameter is specified, the current
# dimension value is returned.
#
# Returns:
# The dimensions value if dimensions.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(dimensions.param)) {
dimensions <<- NULL
return(invisible())
}
# Returns the current dimension value if no parameter is used.
if (is.na(dimensions.param[1])) {
return(dimensions)
}
# Error handling.
# Validate the dimensions input is a vector.
if (!is.vector(dimensions.param)) {
stop(paste("dimensions must be a vector of string variables"))
}
# Error handling.
# Validate the length of the vector is no greater than the max number
# of allowed dimensions.
if (length(dimensions.param) > kMaxDimensions) {
stop(paste("Google Analytics can only handle up to", kMaxDimensions,
"dimensions parameters"))
}
# Error handling.
# Validate the vector is a character type.
# This will not stop a vector like, c("2", "this")
if (!is.character(dimensions.param)) {
stop(paste("dimensions must be character, please refer to the",
"Google Analytics API documentation for more information"))
}
dimensions <<- paste(dimensions.param, collapse = ",")
return(invisible())
}
Metrics <- function(metrics.param = NA) {
# Sets the metrics of interest (clicks, pageviews, etc)
# Optional.
# The aggregated statistics for user activity in a profile, such as
# clicks or pageviews. When queried by alone, metrics provide aggregate
# values for the requested date range, such as overall pageviews or
# total bounces. However, when requested with dimensions, values are
# segmented by the dimension. For example, ga:pageviews requested with
# ga:country returns the total pageviews per country rather than the
# total pageviews for the entire profile. When requesting metrics, keep
# in mind:
#
# Any request must supply at least one metric because a request cannot
# consist only of dimensions.
# You can supply a maximum of 10 metrics for any query.
# Most combinations of metrics from multiple categories can be used
# together, provided no dimensions are specified.
# The exception to the above is the ga:visitors metric, which can only
# be used in combination with a subset of metrics.
# Any given metric can be used in combination with other dimensions or
# metrics, but only where Valid Combinations apply for that metric.
# Metric values are always reported as an aggregate because the Data
# Export API does not provide calculated metrics. For a list of common
# calculations based on aggregate metrics.
#
# NOTE: We do check for valid metrics.
#
# Args:
# metrics.param: A vector of up to 10 dimensions, either as
# a single string or a vector or strings. E.g.
# "ga:visits" or c("ga:visits", "ga:bounces")
# If NULL is used, the metrics parameter will be
# unset. If no parameter is specified, the current
# metrics value is returned.
# Returns:
# The metrics value if metrics.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(metrics.param)) {
metrics <<- NULL
return(invisible())
}
# Returns the current metrics value if no parameter is used.
if (is.na(metrics.param[1])) {
return(metrics)
}
# Error handling.
# Check the metrics input is that of a vector
if (!is.vector(metrics.param)) {
stop("metrics must be a vector of string variables")
}
# Error handling.
# Check the length of the vector is no greater than the max number of
# metrics.
if (length(metrics.param) > kMaxMetrics) {
stop(paste("Google Analytics can only handle up to", kMaxMetrics,
"metrics parameters"))
}
# Error handling.
# Check the vector is a character type.
# this will not stop a vector like, c("2", "this")
if (!is.character(metrics.param)) {
stop(paste("metrics must be character string, please refer to the",
"Google Analytics API documentation for more information"))
}
# Combine and store the parameters.
metrics <<- paste(metrics.param, collapse = ",")
return(invisible())
}
Segment <- function(segment.param = NA) {
# Sets the segments, see dxp:segment in the Account Feed Response section
# in the GA literature online.
# http://code.google.com/apis/analytics/docs/gdata/gdataDeveloperGuide.html
# Optional.
# For general information on advanced segments, see Advanced
# Segmentation in the Help Center. You can request an advanced segment
# in the data feed in two ways:
#
# (1) The numeric ID of a default or custom advanced segment.
# The account feed returns all default advanced segments and their
# IDs, as well as any custom segments defined for the account.
# For more information on segment and their IDs, see dxp:segment in
# the Account Feed Response section.
# (2) The dynamic parameter in the query.
# Use this method to segment your data request by one or more
# dimensions and/or metrics. You can also use regular expressions
# for segments just as you would for the filters parameter.
# Dynamic segments use the same Expressions and Operators used for
# the filters parameter. When using OR boolean logic or AND boolean
# logic, dynamic segment expressions follow the same rules as for
# the filters parameter, except that you may use OR boolean logic
# with both dimensions or metrics.
# Dimensions/metrics combinations in the advanced segment expression
# have fewer restrictions. Except where noted in the table, you can use
# any dimension or metric in combination with another in your filter.
#
# The segment parameter is once again difficult to write checks for,
# as this is a handler we rely on the GA API to report errors with the
# request.
#
# Example:
# gaid::10
# dynamic::ga:medium==referral
#
# Args:
# segment: An advanced segment definition to slice and dice your
# Analytics data. If NULL is used, the segment parameter will be
# unset. If no parameter is specified, the current segment value
# is returned.
#
# Returns:
# The segment value if segment.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(segment.param)) {
segment <<- NULL
return(invisible())
}
# Returns the current segment value if no parameter is used.
if (is.na(segment.param[1])) {
return(segment)
}
segment <<- segment.param
return(invisible())
}
Sort <- function(sort.param = NA) {
# Sets the sorting criteria.
# Optional.
# Indicates the sorting order and direction for the returned data.
# For example, the following parameter would first sort by ga:browser
# and then by ga:pageviews in ascending order.
#
# If you do not indicate a sorting order in your query, the data is
# sorted by dimension from left to right in the order listed.
# When using the sort parameter, keep in mind the following:
# Sort only by dimensions or metrics value that you have used in the
# dimensions or metrics parameter. If your request sorts on a field that
# is not indicated in either the dimensions or metrics parameter, you
# will receive a request error.
#
# Google Analytics treats dimensions as strings, so all dimensions are
# sorted in ascending alphabetical order in an en-US locale.
# Google Analytics treats all metrics as numbers, so all metrics are
# sorted in ascending numeric order.
#
# The sort direction can be changed from ascending to descending by
# using a minus sign (-) prefix on the requested field.
#
# Note: We do not check that the sort parameters are also defined in
# the dimensions or metrics parameters.
#
# Args:
# sort: The sorting order for the data to be returned.
# e.g. "ga:visits" or c("ga:visits", "-ga:browser")
# If NULL is used, the sort parameter will be
# unset. If no parameter is specified, the current sort value
# is returned.
#
# Returns:
# The sort value if sort.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(sort.param)) {
sort <<- NULL
return(invisible())
}
# Returns the current sort value if no parameter is used.
if (is.na(sort.param[1])) {
return(sort)
}
# Error handling.
# Check the sort input is that of a vector
if (!is.vector(sort.param)) {
stop("sort must be a vector of string variables")
}
# Error handling.
# Check the vector is a character type.
# this will not stop a vector like, c("2", "this")
if (!is.character(sort.param)) {
stop(paste("sort must be character string, please refer to the",
"Google Analytics API documentation for more information"))
}
# Combine the elements.
sort <<- paste(sort.param, collapse = ",")
return(invisible())
}
Filters <- function(filters.param = NA) {
# Sets the filters used.
# Optional.
# The filters query string parameter restricts the data returned from
# your request to the Analytics servers. When you use the filters
# parameter, you supply a dimension or metric you want to filter,
# followed by the filter expression. For example, the following feed
# query requests ga:pageviews and ga:browser from profile 12134, where
# the ga:browser dimension starts with the string Firefox:
#
# Args:
# filters: The filter string for the GA request.
# e.g. "ga:medium==referral".
# If NULL is used, the filters parameter will be unset.
# If no parameter is specified, the current filters value
# is returned.
#
# Returns:
# The filters value if filters.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(filters.param)) {
filters <<- NULL
return(invisible())
}
# Returns the current sort value if no parameter is used.
if (is.na(filters.param[1])) {
return(filters)
}
filters <<- filters.param
return(invisible())
}
MaxResults <- function(max.results.param = NA) {
# Sets the maximum number of results to return.
# Optional.
# Maximum number of entries to include in this feed. You can use this in
# combination with start-index to retrieve a subset of elements, or use
# it alone to restrict the number of returned elements, starting with
# the first.
#
# If you do not use the max-results parameter in your query, your feed
# returns the default maximum of 1000 entries.
#
# The Analytics Data Export API returns a maximum of 10,000 entries per
# request, no matter how many you ask for. It can also return fewer
# entries than requested, if there aren't as many dimension segments as
# you expect. For instance, there are fewer than 300 possible values for
# ga:country, so when segmenting only by country, you can't get more
# than 300 entries, even if you set max-results to a higher value.
#
# Args:
# max.results: Maximum number of entries to include in the data feed.
# If not specified we return the default of 1000.
#
# Returns:
# The max.results value if max.results.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(max.results.param)) {
max.results <<- NULL
return(invisible())
}
# Returns the current sort value if no parameter is used.
if (is.na(max.results.param[1])) {
return(max.results)
}
# Error handling.
# Ensure that max.results is a numeric.
if (!is.numeric(max.results.param)) {
stop("max.results must be a number")
}
# Error handling.
check.vector.length <- length(max.results.param)
if (check.vector.length > 1) {
stop("Max Results must be a single numeric value")
}
max.results <<- max.results.param
return(invisible())
}
StartIndex <- function(start.index.param = NA) {
# Sets the starting index from where to return results from.
# Optional.
# If not supplied, the starting index is 1. (Feed indexes are 1-based.
# That is, the first entry is entry 1, not entry 0.) Use this parameter
# as a pagination mechanism along with the max-results parameter for
# situations when totalResults exceeds 10,000 and you want to retrieve
# entries indexed at 10,001 and beyond.
#
# Args:
# start.index.param: The starting point of pagination for results to be
# returned. If NULL is used, the start.index parameter
# will be unset. If no parameter is specified, the
# current start.index value is returned.
#
# Returns:
# The start.index value if start.index.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(start.index.param)) {
start.index <<- NULL
return(invisible())
}
# Returns the current sort value if no parameter is used.
if (is.na(start.index.param[1])) {
return(start.index)
}
# Error handling.
# Ensure that start.index.param is a numeric.
if (!is.numeric(start.index.param)) {
stop("start.index must be a number")
}
# Error handling.
check.vector.length <- length(start.index.param)
if (check.vector.length > 1) {
stop("Start index must be a single numeric value")
}
start.index <<- start.index.param
return(invisible())
}
TableID <- function(table.id.param = NA) {
# Sets the table id for a user.
# Optional.
# The unique table ID used to retrieve the Analytics Report data. This
# ID is provided by the <ga:table.id> element for each entry in the
# account feed. We run a series of checks that the form of the data is
# being correctly entered.
#
# NOTE: This function does not test the table.id is valid from the account
# profile.
#
# Args:
# table.id.param: This value is the table ID of the profile,
# e.g "ga:1234".
# If NULL is used, the table.id parameter will
# be unset. If no parameter is specified, the
# current table.id value is returned.
#
# Returns:
# The table.id value if table.id.param is not set.
# Un-set the parameter if the value NULL is used.
if (is.null(table.id.param)) {
table.id <<- NULL
return(invisible())
}
# Returns the current sort value if no parameter is used.
if (is.na(table.id.param[1])) {
return(table.id)
}
# Error Handling.
# A table.id must be character.
if (!is.character(table.id.param)) {
stop("A table.id must be of the form 'ga:####'")
}
# Error handling.
# Check the input is that of type vector.
if (!is.vector(table.id.param)) {
stop(paste("table.id must be a vector (length ", kMaxTableIds,
") string variable"))
}
if (length(table.id.param) != kMaxTableIds) {
stop(paste("Only", kMaxTableIds, "table.id can be used at a time."))
}
table.id <<- table.id.param
return(invisible())
}
Validate <- function() {
# Returns whether the Query has all the required parameters set. These are
# the start.date, end.date, metrics, and table.id parameters.
#
# Returns:
# TRUE if the query has all the required parameters. Otherwise stops the
# program execution.
missing.params <- c()
if (is.null(start.date)) {
missing.params <- append(missing.params, "start.date")
}
if (is.null(end.date)) {
missing.params <- append(missing.params, "end.date")
}
if (is.null(metrics)) {
missing.params <- append(missing.params, "metrics")
}
if (is.null(table.id)) {
missing.params <- append(missing.params, "table.id")
}
if (length(missing.params) == 0) {
return(TRUE)
}
missing.string <- paste(missing.params, collapse = ", ")
stop(paste("All GA queries must have", missing.string, "parameters.",
sep = " "))
}
ToUri <- function() {
# Returns the URI constructed from the parameter settings. This also
# URI-encodes all the values in each query parameter.
#
# Returns:
# A full URI that can be used with the Google Analytics API. Users
# typically don't need to use this method as the
# RGoogleAnalytics$GetReportData() function accepts an entire
# QueryBuilder object.
query <- c("start.date" = start.date,
"end.date" = end.date,
"dimensions" = dimensions,
"metrics" = metrics,
"segment" = segment,
"sort" = sort,
"filters" = filters,
"max.results" = max.results,
"start.index" = start.index,
"table.id" = table.id)
uri <- "https://www.google.com/analytics/feeds/data?"
for (name in names(query)) {
uri.name <- switch(name,
start.date = "start-date",
end.date = "end-date",
dimensions = "dimensions",
metrics = "metrics",
segment = "segment",
sort = "sort",
filters = "filters",
max.results = "max-results",
start.index = "start-index",
table.id = "ids")
if (!is.null(uri.name)) {
uri <- paste(uri, uri.name, "=", curlEscape(query[[name]]),
"&", sep = "")
}
}
# remove the last '&' that joins the query parameters together.
uri <- sub("&$", "", uri)
# remove any spaces that got added in from bad input
uri <- gsub(" ", "", uri)
return(uri)
}
ClearData <- function() {
# A function to reset all the data values to NULL, for a new query.
# The ClearData() function allows a user to reset the query parameters,
# (start.date, metrics, etc) back to NULL.
#
# Returns:
# Resets all the query parameters to NULL.
start.date <<- NULL
end.date <<- NULL
dimensions <<- NULL
metrics <<- NULL
segment <<- NULL
sort <<- NULL
filters <<- NULL
max.results <<- NULL
start.index <<- NULL
table.id <<- NULL
return(invisible())
}
Init <- function(start.date = NULL,
end.date = NULL,
dimensions = NULL,
metrics = NULL,
segment = NULL,
sort = NULL,
filters = NULL,
max.results = NULL,
start.index = NULL,
table.id = NULL) {
# A function setting initial values of a GA URI query.
#
# Args:
# start.date: See QueryBuilder()
# end.date: See QueryBuilder()
# dimensions: See QueryBuilder()
# metrics: See QueryBuilder()
# segment: See QueryBuilder()
# sort: See QueryBuilder()
# filters: See QueryBuilder()
# max.results: See QueryBuilder()
# start.index: See QueryBuilder()
# table.id: See QueryBuilder()
#
# Returns:
# Sets the initial query parameters.
StartDate(start.date)
EndDate(end.date)
Dimensions(dimensions)
Metrics(metrics)
Segment(segment)
Sort(sort)
Filters(filters)
MaxResults(max.results)
StartIndex(start.index)
TableID(table.id)
return(invisible())
}
return(list("start.date" = StartDate,
"end.date" = EndDate,
"dimensions" = Dimensions,
"metrics" = Metrics,
"segment" = Segment,
"sort" = Sort,
"filters" = Filters,
"max.results" = MaxResults,
"start.index" = StartIndex,
"table.id" = TableID,
"to.uri" = ToUri,
"clear.data" = ClearData,
"validate" = Validate,
"Init" = Init))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.