R/QueueRankedBatch.R

Defines functions QueueRankedBatch

Documented in QueueRankedBatch

#' Batch RSiteCatalyst API Requests
#'
#' This function batches out the ranked requests to the Adobe API in an
#' effort to parallelize the requests and utilize the 8 slots available
#' in the API.  Note that batching by time period often outperforms running
#' the same QueueTrended report.
#'
#' @param reportsuite.id Report suite id
#' @param date.from Start date for the report (YYYY-MM-DD)
#' @param date.to End date for the report (YYYY-MM-DD)
#' @param metrics List of metrics to include in the report
#' @param elements List of elements to include in the report
#' @param top List of numbers to limit the number of rows to include (top X). eg. c(10,5)
#' @param segment.id Id(s) of Adobe Analytics segment to retrieve the report for
#' @param classification SAINT classification to use in place of first element. Need to specify element AND classification.
#' @param selected List of specific items (of the first element) to include in the report - e.g. c("www:home","www:search","www:about").
#' this only works for the first element (API limitation).
#' @param search List of keywords for the first specified element - e.g. c("contact","about","shop").
#' search overrides anything specified using selected
#' @param batchBy Accepts "day", "week", "rows", "search", and "selected".
#' Note when using "rows" top is overwritten by batchLength
#' Note when using "search" or "selected" batchLength must be defined.
#' @param rows Estimation of total number of rows.
#' @param batchLength The number of "search", "selected", or "rows" to use in each batch.
#' @param queueUse Set number of slots to use in queue before pausing.
#' @param pauseTime Set the length of time to pause before attempting to submit
#' another report. Should be the amount of time it takes to return 1 report.
#' @param verboseQueue TRUE or FALSE to print queue status at each request.
#'
#' @importFrom RSiteCatalyst GetQueue QueueRanked GetReport
#'
#' @export
#'

QueueRankedBatch <- function(reportsuite.id, date.from, date.to,
                             metrics, elements, top = 5,
                             segment.id = "", classification = c(),
                             search = c(),
                             selected = c(),
                             batchBy = "search",
                             rows = NA,
                             batchLength = 5,
                             queueUse = 6,
                             pauseTime = 15,
                             verboseQueue = F) {

  queueRest <- function(queueUse) {

    if(!is.na(queueUse)) {

      x <- GetQueue()

      if(is.null(nrow(x))) {

        reportid <- do.call(QueueRanked, reportArgs)

      } else {

        while(!is.null(nrow(x)) && nrow(x) >= queueUse) {
          if(verboseQueue) {

            print(x)

          }

          print(paste("Queue Full, Pausing", pauseTime, "Seconds"))
          Sys.sleep(pauseTime)
          x <- GetQueue()

        }

        reportid <- do.call(QueueRanked, reportArgs)

      }


    } else {

      reportid <- do.call(QueueRanked, reportArgs)

    }

    return(reportid)

  }

  startTime <- Sys.time()
  report <- data.frame()
  queuedReports <- c()

  #create args list to simplify and save space.
  reportArgs <- list(reportsuite.id = reportsuite.id,
                     date.from = date.from,
                     date.to = date.to,
                     segment.id = segment.id,
                     elements = elements,
                     classification = classification,
                     metrics = metrics,
                     search = search,
                     selected = selected,
                     top = top,
                     interval.seconds = 1,
                     enqueueOnly = T)

  #run the batchby options through a switch.
  switch(batchBy,
         selected = {

           for(index in seq(1, length(selected), batchLength)) {

             selectedList <- selected[index:(index+(batchLength-1))]
             reportArgs$selected <- selectedList
             reportid <- queueRest(queueUse)
             queuedReports <- c(queuedReports, reportid)

           }
         },
         search = {

           for(index in seq(1, length(search), batchLength)) {

             searchList <- search[index:(index+(batchLength-1))]
             reportArgs$search <- searchList
             reportid <- queueRest(queueUse)
             queuedReports <- c(queuedReports, reportid)

           }

         },
         day = {
           dayList <- c()

           for(day in seq(as.Date(date.from), as.Date(date.to), by = 'day')) {

             dayText <- as.character(as.Date(day, origin = '1970-01-01'))
             reportArgs$date.from <- dayText
             reportArgs$date.to <- dayText
             reportid <- queueRest(queueUse)
             queuedReports <- c(queuedReports, reportid)
             dayList <- c(dayList, dayText)

           }
         },
         week = {
           dayList <- c()

           for(day in seq(as.Date(date.from), as.Date(date.to), by = 'week')) {

             dayFrom <- as.character(as.Date(day, origin = '1970-01-01'))
             dayTo <- as.character(as.Date(day, origin = '1970-01-01') + 6)
             reportArgs$date.from <- dayFrom
             reportArgs$date.to <- dayTo
             reportid <- queueRest(queueUse)
             queuedReports <- c(queuedReports, reportid)
             dayList <- c(dayList, paste(dayFrom, "to", dayTo))

           }
         },
         rows = {
           if(is.na(rows)) {

             stop("You must provide a row count to the 'rows' parameter when using
                  batch by rows, even if it is an estimate.")

           }

           for(start in seq(1, rows, batchLength)) {

             reportArgs$start <- start
             reportArgs$top <- batchLength
             reportid <- queueRest(queueUse)
             queuedReports <- c(queuedReports, reportid)

           }

         })

  #go get the reports
  for (i in 1:length(queuedReports)) {

    tryCatch(
      {
        temp <- GetReport(queuedReports[i],
                          max.attempts = 100, print.attempts = F)

        if(batchBy == "day" | batchBy == "week") {

          temp <- cbind(date = dayList[i], temp)

        }

        if(exists('temp')) {

          report <- rbind(report, temp)
          rm(temp)

        }
      },
      error = function(e) {
        print(
          paste("Report Error on:", queuedReports[i],
                "Batch:", i,
                "Rows:", i*batchLength,
                "to", i*batchLength+batchLength)
        )
        print(paste("Error:", e))
      },
      warning = function(w) print(w)
    )

  }

  endTime <- Sys.time()
  print(endTime - startTime)
  return(report)

         }
blazickjoe/DataScienceLibrary documentation built on Nov. 5, 2019, 2:26 p.m.