options("googleAuthR.scopes.selected" = getOption("searchConsoleR.scope") )
options("googleAuthR.client_id" = getOption("searchConsoleR.client_id"))
options("googleAuthR.client_secret" = getOption("searchConsoleR.client_secret"))
options("googleAuthR.webapp.client_id" = getOption("searchConsoleR.webapp.client_id"))
options("googleAuthR.webapp.client_secret" = getOption("searchConsoleR.webapp.client_secret"))
#' Query search traffic keyword data
#'
#' @description Download your Google SEO data.
#'
#' @param siteURL The URL of the website you have auth access to.
#' @param startDate Start date of requested range, in YYYY-MM-DD.
#' @param endDate End date of the requested date range, in YYYY-MM-DD.
#' @param dimensions Zero or more dimensions to group results by:
#' \code{"date", "country", "device", "page" or "query"}
#' @param searchType Search type filter, default 'web'.
#' @param dimensionFilterExp A character vector of expressions to filter.
#' e.g. \code{("device==TABLET", "country~~GBR")}
#' @param aggregationType How data is aggregated.
#' @param rowLimit How many rows, maximum is 5000.
#' @param prettyNames If TRUE, converts SO 3166-1 alpha-3 country code to full name and
#' creates new column called countryName.
#' @param walk_data Make multiple API calls. One of \code{("byBatch","byDate","none")}
#'
#' @return A dataframe with columns in order of dimensions plus metrics, with attribute "aggregationType"
#'
#' @seealso Guide to Search Analytics: \url{https://support.google.com/webmasters/answer/6155685}
#' API docs: \url{https://developers.google.com/webmaster-tools/v3/searchanalytics/query}
#' @export
#'
#' @details
#' \strong{startDate}: Start date of the requested date range, in YYYY-MM-DD format,
#' in PST time (UTC - 8:00). Must be less than or equal to the end date.
#' This value is included in the range.
#'
#' \strong{endDate}: End date of the requested date range, in YYYY-MM-DD format,
#' in PST time (UTC - 8:00). Must be greater than or equal to the start date.
#' This value is included in the range.
#'
#' \strong{dimensions}: [Optional] Zero or more dimensions to group results by.
#' \itemize{
#' \item 'date'
#' \item 'country'
#' \item 'device'
#' \item 'page'
#' \item 'query'
#' }
#' The grouping dimension values are combined to create a unique key
#' for each result row. If no dimensions are specified,
#' all values will be combined into a single row.
#' There is no limit to the number of dimensions that you can group by,
#' but you cannot group by the same dimension twice. Example: c(country, device)
#'
#' \strong{dimensionFilterExp}:
#' Results are grouped in the order that you supply these dimensions.
#' dimensionFilterExp expects a character vector of expressions in the form:
#' ("device==TABLET", "country~~GBR", "dimension operator expression")
#' \itemize{
#' \item dimension
#' \itemize{
#' \item 'country'
#' \item 'device'
#' \item 'page'
#' \item 'query'
#' }
#' \item operator
#' \itemize{
#' \item '~~' meaning 'contains'
#' \item '==' meaning 'equals'
#' \item '!~' meaning 'notContains'
#' \item '!=' meaning 'notEquals
#' }
#'
#' \item expression
#' \itemize{
#' \item country: an ISO 3166-1 alpha-3 country code.
#' \item device: 'DESKTOP','MOBILE','TABLET'.
#' \item page: not checked, a string in page URLs without hostname.
#' \item query: not checked, a string in keywords.
#'
#' }
#' }
#'
#'
#' \strong{searchType}: [Optional] The search type to filter for. Acceptable values are:
#' \itemize{
#' \item "web": [Default] Web search results
#' \item "image": Image search results
#' \item "video": Video search results
#' }
#'
#' \strong{aggregationType}: [Optional] How data is aggregated.
#' \itemize{
#' \item If aggregated by property, all data for the same property is aggregated;
#' \item If aggregated by page, all data is aggregated by canonical URI.
#' \item If you filter or group by page, choose auto; otherwise you can aggregate either by property or by page, depending on how you want your data calculated;
#' }
#' See the API documentation to learn how data is calculated differently by site versus by page.
#' Note: If you group or filter by page, you cannot aggregate by property.
#' If you specify any value other than auto, the aggregation type in the result will match the requested type, or if you request an invalid type, you will get an error.
#' The API will never change your aggregation type if the requested type is invalid.
#' Acceptable values are:
#' \itemize{
#' \item "auto": [Default] Let the service decide the appropriate aggregation type.
#' \item "byPage": Aggregate values by URI.
#' \item "byProperty": Aggregate values by property.
#' }
#'
#' \strong{batchType}: [Optional] Batching data into multiple API calls
#' \itemize{
#' \item byBatch Use the API call to batch
#' \item byData Runs a call over each day in the date range.
#' \item none No batching
#' }
#'
#' @examples
#'
#' \dontrun{
#'
#' library(searchConsoleR)
#' scr_auth()
#' sc_websites <- list_websites()
#'
#' gbr_desktop_queries <-
#' search_analytics("http://www.example.com",
#' start = "2016-01-01", end = "2016-03-01",
#' dimensions = c("query", "page"),
#' dimensionsFilterExp = c("device==DESKTOP", "country==GBR"),
#' searchType = "web", rowLimit = 100)
#'
#' batching <-
#' search_analytics("http://www.example.com",
#' start = "2016-01-01", end = "2016-03-01",
#' dimensions = c("query", "page", "date"),
#' searchType = "web", rowLimit = 100000,
#' walk_data = "byBatch")
#'
#' }
search_analytics <- function(siteURL,
startDate = Sys.Date() - 93,
endDate = Sys.Date() - 3,
dimensions = NULL,
searchType = c("web","video","image"),
dimensionFilterExp = NULL,
aggregationType = c("auto","byPage","byProperty"),
rowLimit = 1000,
prettyNames = TRUE,
walk_data = c("byBatch","byDate","none")){
authenticated <- "Token2.0" %in% class(googleAuthR::Authentication$public_fields$token)
if(!authenticated){
stop("Not authenticated. Run scr_auth()", call. = FALSE)
}
searchType <- match.arg(searchType)
aggregationType <- match.arg(aggregationType)
walk_data <- match.arg(walk_data)
startDate <- as.character(startDate)
endDate <- as.character(endDate)
message("Fetching search analytics for ",
paste("url:", siteURL,
"dates:", startDate, endDate,
"dimensions:", paste(dimensions, collapse = " ", sep=";"),
"dimensionFilterExp:", paste(dimensionFilterExp, collapse = " ", sep=";"),
"searchType:", searchType,
"aggregationType:", aggregationType))
siteURL <- check.Url(siteURL, reserved=T)
if(any(is.na(as.Date(startDate, "%Y-%m-%d")), is.na(as.Date(endDate, "%Y-%m-%d")))){
stop("dates not in correct %Y-%m-%d format. Got these:", startDate, " - ", endDate)
}
if(any(as.Date(startDate, "%Y-%m-%d") > Sys.Date()-3, as.Date(endDate, "%Y-%m-%d") > Sys.Date()-3)){
warning("Search Analytics usually not available within 3 days (96 hrs) of today(",Sys.Date(),"). Got:", startDate, " - ", endDate)
}
if(as.Date(startDate, "%Y-%m-%d") < Sys.Date()-93){
warning("Search Analytics usually not available 93 days before today(",Sys.Date(),"). Got:", startDate, " - ", endDate)
}
if(!is.null(dimensions) && !dimensions %in% c('date','country', 'device', 'page', 'query')){
stop("dimension must be NULL or one or more of 'date','country', 'device', 'page', 'query'.
Got this: ", paste(dimensions, sep=", "))
}
if(!searchType %in% c("web","image","video")){
stop('searchType not one of "web","image","video". Got this: ', searchType)
}
if(!aggregationType %in% c("auto","byPage","byProperty")){
stop('aggregationType not one of "auto","byPage","byProperty". Got this: ', aggregationType)
}
if(aggregationType %in% c("byProperty") && 'page' %in% dimensions ){
stop("Can't aggregate byProperty and include page in dimensions.")
}
if(rowLimit > 5000){
message("Batching data via method: ", walk_data)
rowLimit0 <- rowLimit
rowLimit <- 5000
} else {
walk_data <- "none"
}
## require pre-existing token, to avoid recursion
## a list of filter expressions
## expects dimensionFilterExp like c("device==TABLET", "country~~GBR")
parsedDimFilterGroup <- lapply(dimensionFilterExp, parseDimFilterGroup)
body <- list(
startDate = startDate,
endDate = endDate,
dimensions = as.list(dimensions),
searchType = searchType,
dimensionFilterGroups = list(
list( ## you don't want more than one of these until different groupType available
groupType = "and", ##only one available for now
filters = parsedDimFilterGroup
)
),
aggregationType = aggregationType,
rowLimit = rowLimit
)
search_analytics_g <-
googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"POST",
path_args = list(sites = "siteURL",
searchAnalytics = "query"),
data_parse_function = parse_search_analytics
)
if(walk_data == "byDate"){
if(!'date' %in% dimensions){
stop("To walk data per date requires 'date' to be one of the dimensions.
Got this: ", paste(dimensions, sep=", "))
}
walk_vector <- seq(as.Date(startDate), as.Date(endDate), 1)
out <- googleAuthR::gar_batch_walk(search_analytics_g,
walk_vector = walk_vector,
gar_paths = list(sites = siteURL),
body_walk = c("startDate", "endDate"),
the_body = body,
batch_size = 1,
dim = dimensions)
} else if(walk_data == "byBatch") {
## byBatch uses API batching, but this pulls out less data
## 0 impression keywords not included.
walk_vector <- seq(0, rowLimit0, 5000)
out <- googleAuthR::gar_batch_walk(search_analytics_g,
walk_vector = walk_vector,
gar_paths = list(sites = siteURL),
body_walk = "startRow",
the_body = body,
batch_size = 3,
dim = dimensions)
} else {
out <- search_analytics_g(the_body=body,
path_arguments=list(sites = siteURL),
dim = dimensions)
}
out
}
#' Retrieves dataframe of websites user has in Search Console
#'
#' @return a dataframe of siteUrl and permissionLevel
#'
#' @export
#' @family search console website functions
list_websites <- function() {
l <- googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/sites",
"GET",
data_parse_function = function(x) x$siteEntry)
l()
}
#' Adds website to Search Console
#'
#' @param siteURL The URL of the website to add.
#'
#' @return TRUE if successful, raises an error if not.
#' @family search console website functions
#'
#' @export
add_website <- function(siteURL) {
siteURL <- check.Url(siteURL, reserved = TRUE)
aw <- googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"PUT",
path_args = list(sites = "siteURL"))
aw(path_arguments = list(sites = siteURL))
TRUE
}
#' Deletes website in Search Console
#'
#' @param siteURL The URL of the website to delete.
#'
#' @return TRUE if successful, raises an error if not.
#' @family data fetching functions
#'
#' @export
#' @family search console website functions
delete_website <- function(siteURL) {
siteURL <- check.Url(siteURL, reserved = TRUE)
dw <- googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"DELETE",
path_args = list(sites = "siteURL"))
dw(path_arguments = list(sites = siteURL))
TRUE
}
#' Gets sitemap information for the URL supplied.
#'
#' See here for details: https://developers.google.com/webmaster-tools/v3/sitemaps
#'
#' @param siteURL The URL of the website to get sitemap information from. Must include protocol (http://).
#'
#' @return A list of two dataframes: $sitemap with general info and $contents with sitemap info.
#' @family data fetching functions
#'
#' @export
#' @family sitemap admin functions
list_sitemaps <- function(siteURL) {
siteURL <- check.Url(siteURL, reserved = TRUE)
ls <- googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"GET",
path_args = list(sites = "siteURL",
sitemaps = ""),
data_parse_function = parse_sitemaps)
ls(path_arguments = list(sites = siteURL))
}
#' Submit a sitemap.
#'
#' See here for details: https://developers.google.com/webmaster-tools/v3/sitemaps/submit
#'
#' @param siteURL The URL of the website to delete. Must include protocol (http://).
#' @param feedpath The URL of the sitemap to submit. Must include protocol (http://).
#'
#' @return TRUE if successful, raises an error if not.
#'
#' @export
#' @family sitemap admin functions
add_sitemap <- function(siteURL, feedpath) {
siteURL <- check.Url(siteURL, reserved = TRUE)
feedpath <- check.Url(feedpath, reserved = TRUE)
as <- googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"PUT",
path_args = list(sites = "siteURL",
sitemaps = "feedpath"))
as(path_arguments = list(sites = siteURL,
sitemaps = feedpath))
TRUE
}
#' Delete a sitemap.
#'
#' See here for details: https://developers.google.com/webmaster-tools/v3/sitemaps/delete
#'
#' @param siteURL The URL of the website you are deleting the sitemap from. Must include protocol (http://).
#' @param feedpath The URL of the sitemap to delete. Must include protocol (http://).
#'
#' @return TRUE if successful, raises an error if not.
#'
#' @export
#' @family sitemap admin functions
delete_sitemap <- function(siteURL, feedpath) {
siteURL <- check.Url(siteURL, reserved = TRUE)
feedpath <- check.Url(feedpath, reserved = TRUE)
ds <- googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"DELETE",
path_args = list(sites = "siteURL",
sitemaps = "feedpath"))
ds(path_arguments = list(sites = siteURL,
sitemaps = feedpath))
TRUE
}
#' Fetch a time-series of Googlebot crawl errors.
#'
#' @description
#' Get a list of errors detected by Googlebot over time.
#' See here for details: https://developers.google.com/webmaster-tools/v3/urlcrawlerrorscounts/query
#'
#' @param siteURL The URL of the website to delete. Must include protocol (http://).
#' @param category Crawl error category. Defaults to 'all'
#' @param platform The user agent type. 'all', 'mobile', 'smartphoneOnly' or 'web'.
#' @param latestCountsOnly Default FALSE. Only the latest crawl error counts returned if TRUE.
#'
#' @return dataframe of errors with $platform $category $count and $timecount.
#'
#' @details The timestamp is converted to a date as they are only available daily.
#'
#' Category is one of: authPermissions, manyToOneRedirect, notFollowed, notFound,
#' other, roboted, serverError, soft404.
#'
#' Platform is one of: mobile, smartphoneOnly or web.
#'
#' @export
#' @family working with search console errors
crawl_errors <- function(siteURL,
category="all",
platform=c("all","mobile","smartphoneOnly","web"),
latestCountsOnly = FALSE) {
platform <- match.arg(platform)
siteURL <- check.Url(siteURL, reserved = TRUE)
latestCountsOnly <- ifelse(latestCountsOnly, 'true', 'false')
## require pre-existing token, to avoid recursion
if(is.valid.category.platform(category, platform, include.all = TRUE)) {
params <- list('category' = category,
'latestCountsOnly' = latestCountsOnly,
'platform' = platform)
params <- params[params != 'all']
ce <- googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"GET",
path_args = list(sites = "siteURL",
urlCrawlErrorsCounts = "query"),
pars_args = params,
data_parse_function = parse_crawlerrors)
ce(path_arguments = list(sites = siteURL), pars_arguments = params)
}
}
#' Lists a site's sample URLs for crawl errors.
#'
#' @description Category is one of: authPermissions, manyToOneRedirect, notFollowed, notFound,
#' other, roboted, serverError, soft404.
#'
#' Platform is one of: mobile, smartphoneOnly or web.
#'
#' @param siteURL The URL of the website to delete. Must include protocol (http://).
#' @param category Crawl error category. Default 'notFound'.
#' @param platform User agent type. Default 'web'.
#'
#' @details
#' See here for details: \url{https://developers.google.com/webmaster-tools/v3/urlcrawlerrorssamples}
#'
#' @return A dataframe of $pageUrl, $last_crawled, $first_detected, $response
#'
#' @export
#' @family working with search console errors
list_crawl_error_samples <- function(siteURL,
category="notFound",
platform="web") {
siteURL <- check.Url(siteURL, reserved=T)
## require pre-existing token, to avoid recursion
if(is.valid.category.platform(category, platform)) {
params <- list('category' = category,
'platform' = platform)
lces <-
googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"GET",
path_args = list(sites = "siteURL",
urlCrawlErrorsSamples = ""),
pars_args = params,
data_parse_function = parse_crawlerror_sample)
lces(path_arguments = list(sites = siteURL), pars_arguments = params)
}
}
#' Shows details of errors for individual sample URLs
#'
#' See here for details: https://developers.google.com/webmaster-tools/v3/urlcrawlerrorssamples/get
#'
#' @param siteURL The URL of the website to delete. Must include protocol (http://).
#' @param pageURL A PageUrl taken from list_crawl_error_samples.
#' @param category Crawl error category. Default 'notFound'.
#' @param platform User agent type. Default 'web'.
#'
#' @return Dataframe of $linkedFrom, with the calling URLs $last_crawled, $first_detected and a $exampleURL
#' @family working with search console errors
#' @description
#' pageURL is the relative path (without the site) of the sample URL.
#' It must be one of the URLs returned by list_crawl_error_samples.
#' For example, for the URL https://www.example.com/pagename on the site https://www.example.com/,
#' the url value is pagename (string)
#'
#' Category is one of: authPermissions, manyToOneRedirect, notFollowed, notFound,
#' other, roboted, serverError, soft404.
#'
#' Platform is one of: mobile, smartphoneOnly or web.
#'
#' @export
error_sample_url <- function(siteURL,
pageURL,
category="notFound",
platform="web") {
siteURL <- check.Url(siteURL, reserved = TRUE)
pageURL <- check.Url(pageURL, checkProtocol = FALSE, reserved = TRUE, repeated = TRUE)
## require pre-existing token, to avoid recursion
if(is.valid.category.platform(category, platform)){
params <- list('category' = category,
'platform' = platform)
esu <- googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"GET",
path_args = list(sites = "siteURL",
urlCrawlErrorsSamples = "pageURL"),
pars_args = params,
data_parse_function = parse_errorsample_url)
esu(path_arguments = list(sites = siteURL,
urlCrawlErrorsSamples = pageURL),
pars_arguments = params)
}
}
#' Mark As Fixed the individual sample URLs
#'
#' See here for details:
#' https://developers.google.com/webmaster-tools/v3/urlcrawlerrorssamples/markAsFixed
#'
#' @param siteURL The URL of the website to delete. Must include protocol (http://).
#' @param pageURL A PageUrl taken from list_crawl_error_samples.
#' @param category Crawl error category. Default 'notFound'.
#' @param platform User agent type. Default 'web'.
#'
#' @return TRUE if successful, raises an error if not.
#' @family working with search console errors
#'
#' @description
#' pageURL is the relative path (without the site) of the sample URL.
#' It must be one of the URLs returned by list_crawl_error_samples.
#' For example, for the URL https://www.example.com/pagename on the site https://www.example.com/,
#' the url value is pagename (string)
#'
#' Category is one of: authPermissions, manyToOneRedirect, notFollowed, notFound,
#' other, roboted, serverError, soft404.
#'
#' Platform is one of: mobile, smartphoneOnly or web.
#'
#' @export
fix_sample_url <- function(siteURL,
pageURL,
category = "notFound",
platform = "web") {
siteURL <- check.Url(siteURL, reserved = TRUE)
pageURL <- check.Url(pageURL, checkProtocol = FALSE, reserved = TRUE)
if(is.valid.category.platform(category, platform)){
params <- list('category' = category,
'platform' = platform)
fsu <- googleAuthR::gar_api_generator("https://www.googleapis.com/webmasters/v3/",
"DELETE",
path_args = list(sites = "siteURL",
urlCrawlErrorsSamples = "pageURL"),
pars_args = params)
fsu(path_arguments = list(sites = siteURL,
urlCrawlErrorsSamples = pageURL),
pars_arguments = params)
return(TRUE)
}
return(FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.