R/aggregate.R

Defines functions aggregateGAData ga_aggregate getColNameOfClass

Documented in ga_aggregate

#' Gets the names of a dataframe's columns of a certain class
#'
#' @param df dataframe
#' @param class_name the R class to return columns of
#' @return names of columns of class 
#' @noRd
getColNameOfClass <- function(df, class_name){
  stopifnot(inherits(df, "data.frame"),
            inherits(class_name, "character"))
  
  names(df)[vapply(df, class, "string") == class_name]
}




#' Aggregate a Google Analytics dataframe over inputted columns
#' 
#' A helper function to aggregate over dimensions
#'
#' @param ga_data A dataframe of data to aggregate
#' @param agg_names The columns to aggregate over
#' @param mean_regex The regex for column names to do mean() rather than sum()
#'
#' @details
#'   Will auto select metrics if they are numeric class columns.
#'   Will auto perform mean aggregation it metric names match `mean_regex` argument
#'   If agg_names is NULL will aggregate over all
#'
#' @importFrom magrittr %>%
#' @importFrom rlang !!!
#' @export
#' @import assertthat
#' @examples 
#' 
#' \dontrun{
#' 
#' # use `aggregateGAData` so you can on the fly create summary data
#' ga_data <- google_analytics(81416156, 
#'                             date_range = c("10daysAgo", "yesterday"),
#'                             metrics = "sessions", dimensions = c("hour","date"))
#'                             
#' # if we want totals per hour over the dates:
#' ga_aggregate(ga_data[,c("hour","sessions")], agg_names = "hour")
#' 
#' # it knows not to sum metrics that are rates:
#' ga_aggregate(ga_data[,c("hour","bounceRate")], agg_names = "hour")
#' 
#' 
#' }
ga_aggregate <- function(ga_data, 
                         agg_names=NULL,
                         mean_regex="^avg|^percent|Rate$|^CPC$|^CTR$|^CPM$|^RPC$|^ROI$|^ROAS$|Per"){

  aggregateGAData(ga_data, agg_names= agg_names, mean_regex=mean_regex)
}


aggregateGAData <- function(ga_data, 
                            agg_names=NULL,
                            mean_regex="^avg|^percent|Rate$|^CPC$|^CTR$|^CPM$|^RPC$|^ROI$|^ROAS$|Per"){
  
  assert_that(is.data.frame(ga_data))
  assert_that_ifnn(agg_names, is.character)
  
  metrics <- getColNameOfClass(ga_data, "numeric")
  mean_metrics <- metrics[grepl(mean_regex, metrics)]
  sum_metrics  <- metrics[!grepl(mean_regex, metrics)]
  
  date_col <- getColNameOfClass(ga_data, "Date")
  
  ## do aggregations
  mean_selects <- lapply(c(agg_names, mean_metrics), as.symbol)
  sum_selects <- lapply(c(agg_names, sum_metrics), as.symbol)
  date_selects <- lapply(c(agg_names, date_col), as.symbol)
  
  dots <- lapply(agg_names, as.symbol)

  ## metrics to take mean as per mean_regex
  meanAgg <- ga_data %>%
    dplyr::select(!!!mean_selects) %>%
    dplyr::group_by(!!!dots) %>%
    dplyr::summarise_all(list(~mean(., na.rm = TRUE))) %>% dplyr::ungroup()  
  
  ## metrics to sum over
  sumAgg <- ga_data %>%
    dplyr::select(!!!sum_selects) %>%
    dplyr::group_by(!!!dots) %>%
    dplyr::summarise_all(list(~sum(., na.rm = TRUE))) %>% dplyr::ungroup()    
  
  ## date dimensions take the first entry
  dateAgg <- ga_data %>%
    dplyr::select(!!!date_selects) %>%
    dplyr::group_by(!!!dots) %>%
    dplyr::summarise_all(list(~min(., na.rm = TRUE))) %>% dplyr::ungroup()    
  
  ## join up all the aggregations
  if(!is.null(agg_names)){
    ga_agg <- dplyr::left_join(sumAgg, meanAgg, by = agg_names) %>%
      dplyr::left_join(dateAgg, by = agg_names)
  } else {
    sumAgg  <- if(ncol(sumAgg) == 0) NULL else sumAgg
    meanAgg <- if(ncol(meanAgg) == 0) NULL else meanAgg
    dateAgg <- if(ncol(dateAgg) == 0) NULL else dateAgg
    
    ga_agg <- dplyr::bind_cols(sumAgg, meanAgg, dateAgg)
  }
  
  ga_agg
  
}
MarkEdmondson1234/googleAnalyticsR documentation built on Oct. 13, 2023, 4:40 a.m.