#' ga v4 parse batching
#' @keywords internal
google_analytics_4_parse_batch <- function(response_list){
if(!is.null(response_list$reports)){
parsed <- lapply(response_list$reports, google_analytics_4_parse)
} else {
warning("No $reports found.")
}
parsed
}
#' ga v4 data parsing
#'
#' x is response_list$reports[[1]] from google_analytics_4_parse_batch
#' @importFrom stats setNames
#' @keywords internal
google_analytics_4_parse <- function(x){
# message("Parsing GA API v4")
#### x <- ga_data2$reports[[1]]
columnHeader <- x$columnHeader
data <- x$data$rows
hasDateComparison <- if(length(data[[1]]$metrics) == 2) TRUE else FALSE
if(!is.null(x$data$isDataGolden) && !x$data$isDataGolden){
warning("Data is not Golden - may change on subsequent API calls.")
}
if(!is.null(x$data$filteredForPrivacyReasons)){
warning("Some data has been filtered for privacy reasons.")
}
dim_names <- unlist(columnHeader$dimensions)
met_names <- unlist(lapply(columnHeader$metricHeader$metricHeaderEntries, function(x) x$name))
met_names1 <- gsub("ga:","",met_names)
# met_types <- unlist(lapply(columnHeader$metricHeader$metricHeaderEntries, function(x) x$type))
if(is.null(data)){
# message("No data found")
return(NULL)
}
if(!is.null(dim_names)){
dims <- matrix(unlist(lapply(data, function(x) x$dimensions)),
ncol = length(dim_names), byrow = TRUE)
} else {
dims <- NULL
}
mets <- matrix(unlist(lapply(data, function(x) x$metrics[[1]]$values)),
ncol = length(met_names), byrow = TRUE)
## comparison date metrics
if(hasDateComparison){
mets2 <- matrix(unlist(lapply(data, function(x) x$metrics[[2]]$values)),
ncol = length(met_names), byrow=TRUE)
mets <- cbind(mets, mets2)
met_names <- c(paste0(met_names, ".d1"), paste0(met_names, ".d2"))
}
## construct the dataframe
out <- data.frame(cbind(dims, mets),
stringsAsFactors = FALSE, row.names = 1:nrow(mets))
out_names <- c(dim_names, met_names)
out_names <- gsub("ga:","",out_names)
names(out) <- make.names(out_names, unique=TRUE)
## type conversion
met_names <- gsub("ga:","",met_names)
out[,met_names] <- as.numeric(as.character(unlist(out[,met_names])))
if('date' %in% colnames(out)) {
out[,'date'] <- as.Date(unlist(out[,'date']), format="%Y%m%d")
}
## add support for met_types == TIME
pivot_entries <- pivot_ga4_parse(x, hasDateComparison)
if(!is.null(pivot_entries)) out <- cbind(out, pivot_entries)
totals <- lapply(x$data$totals, function(x) setNames(x$values, met_names1))
minimums <- lapply(x$data$minimums, function(x) setNames(x$values, met_names1))
maximums <- lapply(x$data$maximums, function(x) setNames(x$values, met_names1))
attr(out, "totals") <- totals
attr(out, "minimums") <- minimums
attr(out, "maximums") <- maximums
attr(out, "isDataGolden") <- x$data$isDataGolden
attr(out, "rowCount") <- x$data$rowCount
attr(out, "samplesReadCounts") <- x$data$samplesReadCounts
attr(out, "samplingSpaceSizes") <- x$data$samplingSpaceSizes
attr(out, "nextPageToken") <- x$nextPageToken
testthat::expect_s3_class(out, "data.frame")
out
}
sampling_message <- function(samplesReadCounts, samplingSpaceSizes, hasDateComparison = FALSE){
samplePercent <- 100
if(!is.null(samplesReadCounts)){
samplePercent <- get_samplePercent(samplesReadCounts[[1]], samplingSpaceSizes[[1]])
message("Data is sampled, based on ", samplePercent, "% of sessions." )
if(hasDateComparison){
samplePercent <- get_samplePercent(samplesReadCounts[[2]], samplingSpaceSizes[[2]])
message("Data Comparison is sampled, based on ", samplePercent, "% of sessions." )
}
}
}
get_samplePercent <- function(sampleReadCounts, samplingSpaceSizes){
sampleReadCounts <- as.numeric(sampleReadCounts)
samplingSpaceSizes <- as.numeric(samplingSpaceSizes)
if(sampleReadCounts == 0 ||
samplingSpaceSizes == 0 ||
identical(sampleReadCounts, numeric(0)) ||
identical(samplingSpaceSizes, numeric(0))) return(numeric(0))
round(100 * (sampleReadCounts / samplingSpaceSizes), 1)
}
#' New parse GA account summary
#'
#' @param x The account summary items
#' @import tidyjson
#' @importFrom jsonlite toJSON
#' @importFrom dplyr select filter
#' @keywords internal
parse_ga_account_summary <- function(x){
json_accounts <- jsonlite::toJSON(x$items)
class(json_accounts) <- c(class(json_accounts), "character")
tidy_json <- json_accounts %>% as.tbl_json()
tidy_json <- tidy_json %>%
json_lengths() %>% filter(length != 0) %>% select(-length) %>%
gather_array() %>%
spread_values(accountId = jstring("id"),
accountName = jstring("name")) %>%
enter_object("webProperties") %>%
json_lengths() %>% filter(length != 0) %>% select(-length) %>%
gather_array() %>%
spread_values(webPropertyId = jstring("id"),
webPropertyName = jstring("name"),
internalWebPropertyId = jstring("internalWebPropertyId"),
level = jstring("level"),
websiteUrl = jstring("websiteUrl")) %>%
enter_object("profiles") %>%
json_lengths() %>% filter(length != 0) %>% select(-length) %>%
gather_array() %>%
spread_values(viewId = jstring("id"),
viewName = jstring("name"),
type = jstring("type"),
starred = jstring("starred"))
## remove tidyjson artifacts, drop for issue #41 and #52
out <- tidy_json[,setdiff(names(tidy_json), c("document.id","array.index")), drop = FALSE]
out
}
parse_google_analytics <- function(x){
message("Request to profileId: ", x$profileInfo$profileId,
# " accountId: ", x$profileInfo$accountId,
# " webPropertyId: ", x$profileInfo$webPropertyId,
" (", x$profileInfo$profileName, ")")
if(!is.null(x$error)){
stop(x$error$message)
}
samplePercent <- 100
if(!is.null(x$containsSampledData)){
if(x$containsSampledData) {
samplePercent <- round(100 * (as.numeric(x$sampleSize) / as.numeric(x$sampleSpace)), 2)
message("Data is sampled, based on ", samplePercent, "% of visits. Use samplingLevel='WALK' to mitigate it." )
}
}
if(x$kind == "analytics#gaData"){
gadata <- parse_google_analytics_ga(x)
} else if(x$kind == "analytics#mcfData"){
gadata <- parse_google_analytics_mcf(x)
}
message("Fetched: ",
paste(colnames(gadata), collapse = " "),
". [", NROW(gadata), "] total results out of a possible [", x$totalResults, "], Start-Index: ", x$query$`start-index`)
attr(gadata, "containsSampledData") <- x$containsSampledData
attr(gadata, "samplePercent") <- samplePercent
attr(gadata, "samplingLevel") <- x$query$samplingLevel
attr(gadata, "profileInfo") <- x$profileInfo
attr(gadata, "dateRange") <- list(startDate = x$query$`start-date`, endDate = x$query$`end-date`)
attr(gadata, "totalResults") <- x$totalResults
gadata
}
parse_google_analytics_ga <- function(x){
gadata <- data.frame(x$rows, stringsAsFactors = F)
if(nrow(gadata) == 0){
warning("No data found")
return(gadata)
}
colnames(gadata) <- gsub("ga:", "",x$columnHeaders$name)
## changes all metrics into numeric columns
## Metrics can be float, integer or currency,
## but we just turn them all into numerics.
mets <- x$columnHeaders[!x$columnHeaders$columnType %in% "DIMENSION",'name']
mets <- gsub("ga:", "",mets)
gadata[,mets] <- as.numeric(as.character(unlist(gadata[,mets])))
## Date objects.
if('date' %in% colnames(gadata)) {
gadata[,'date'] <- as.Date(unlist(gadata[,'date']), format="%Y%m%d")
}
gadata
}
## from https://bitbucket.org/unikum/rga/src/b5c0cf89607707a5e3e1ca528e92f1f8fa714e0a/R/convert.R?at=master
parse_google_analytics_mcf <- function(x){
# Build data.frame for mcf report
if (is.list(x$rows[[1L]]) && !is.data.frame(x$rows[[1L]]))
x$rows <- do.call(c, x$rows)
names <- gsub("^mcf:", "", x$columnHeaders$name)
types <- x$columnHeaders$dataType
if ("MCF_SEQUENCE" %in% types) {
## take out null object lists that the JSON sometimes strangely returns
type_check <- vapply(x$rows, function(x) inherits(x[1]$conversionPathValue[[1]], "list"), FUN.VALUE=TRUE)
x$rows <- x$rows[!type_check]
pv_idx <- grep("MCF_SEQUENCE", types, fixed = TRUE, invert = TRUE)
cv_idx <- grep("MCF_SEQUENCE", types, fixed = TRUE)
primitive <- lapply(x$rows,
function(i) .subset2(i, "primitiveValue")[pv_idx])
primitive <- do.call(rbind, primitive)
colnames(primitive) <- names[pv_idx]
conversion <- lapply(x$rows,
function(i) .subset2(i, "conversionPathValue")[cv_idx])
conversion <- lapply(conversion,
function(i) lapply(i, function(j) paste(apply(j, 1, paste, collapse = ":"), collapse = " > ")))
conversion <- do.call(rbind, lapply(conversion, unlist))
colnames(conversion) <- names[cv_idx]
data_df <- data.frame(primitive, conversion, stringsAsFactors = FALSE)[, names]
} else {
data_df <- as.data.frame(do.call(rbind, lapply(x$rows, unlist)), stringsAsFactors = FALSE)
colnames(data_df) <- names
}
return(data_df)
}
parse_google_analytics_meta <- function(x){
dim_mets <- x$items$id
dim_mets_attr <- x$items$attributes
data.frame(name=dim_mets, dim_mets_attr, stringsAsFactors = F)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.