R/gtrends.R

Defines functions plot.gtrends gtrends

Documented in gtrends plot.gtrends

#' Google Trends Query
#'
#' The \code{gtrends} default method performs a Google Trends query for the
#' \sQuote{query} argument and session \sQuote{session}. Optional arguments for
#' geolocation and category can also be supplied.
#'
#' @param keyword A character vector with the actual Google Trends query
#'   keywords. Multiple keywords are possible using \code{gtrends(c("NHL",
#'   "NBA", "MLB", "MLS"))}.
#'
#' @param geo A character vector denoting geographic regions for the query,
#'   default to \dQuote{all} for global queries. Multiple regions are possible
#'   using \code{gtrends("NHL", c("CA", "US"))}.
#'
#' @param time A string specifying the time span of the query. Possible values
#'   are:
#'
#'   \describe{ \item{"now 1-H"}{Last hour} \item{"now 4-H"}{Last four hours}
#'   \item{"now 1-d"}{Last day} \item{"now 7-d"}{Last seven days} \item{"today
#'   1-m"}{Past 30 days} \item{"today 3-m"}{Past 90 days} \item{"today
#'   12-m"}{Past 12 months} \item{"today+5-y"}{Last five years (default)}
#'   \item{"all"}{Since the beginning of Google Trends (2004)} \item{"Y-m-d
#'   Y-m-d"}{Time span between two dates (ex.: "2010-01-01 2010-04-03")} }
#'
#' @param category A character denoting the category, defaults to \dQuote{0}.
#'
#' @param gprop A character string defining the Google product for which the
#'   trend query if preformed. Valid options are:
#'
#'   \itemize{ \item "web" (default) \item "news" \item "images" \item "froogle"
#'   \item "youtube" }
#'
#' @param hl A string specifying the ISO language code (ex.: \dQuote{en-US} or
#'   \dQuote{fr}). Default is \dQuote{en-US}. Note that this is only influencing
#'   the data returned by related topics.
#'   
#' @param tz A number specifying the minutes the returned dates should be offset to UTC. 
#' Note the parameter 'time' above is specified in UTC. 
#' E.g. choosing "time=2018-01-01T01 2018-01-01T03" and "tz=-120" will yield data between 2018-01-01T03 and 2018-01-01T05, 
#' i.e. data specified to be in UTC+2.
#'
#' @param low_search_volume Logical. Should include low search volume regions?
#'
#' @param cookie_url A string specifying the URL from which to obtain cookies.
#'   Default should work in general; should only be changed by advanced users.
#'
#' @param onlyInterest If you only want the interest over time set it to TRUE.
#' 
#' @section Categories: The package includes a complete list of categories that
#'   can be used to narrow requests. These can be accessed using
#'   \code{data("categories")}.
#'
#' @section Related topics: Note that *related topics* are not retrieved when
#'   more than one keyword is provided due to Google restriction.
#'
#' @importFrom stats na.omit reshape setNames
#' @importFrom utils URLencode read.csv head
#'
#' @return An object of class \sQuote{gtrends} (basically a list of data
#'   frames).
#'
#' @examples
#'
#' \dontrun{
#'
#' head(gtrends("NHL")$interest_over_time)
#' head(gtrends("NHL")$related_topics)
#' head(gtrends("NHL")$related_queries)
#'
#' head(gtrends(c("NHL", "NFL"))$interest_over_time)
#'
#' head(gtrends(c("NHL", "NFL"), geo = c("CA", "US"))$interest_over_time)
#'
#' ## Interest by city
#'
#' gtrends(keyword="obama",geo="US-AL-630")
#'
#' ## Sport category (20)
#' data(categories)
#' categories[grepl("^Sport", categories$name), ]
#' gtrends(c("NHL", "NFL"), geo = c("CA", "US"), category = 20)
#' gtrends(geo = c("CA"), category = 20)
#'
#' ## Playing with time format
#'
#' gtrends(c("NHL", "NFL"), time = "now 1-H") # last hour
#' gtrends(c("NHL", "NFL"), time = "now 4-H") # last four hours
#' gtrends(c("NHL", "NFL"), time = "now 1-d") # last day
#' gtrends(c("NHL", "NFL"), time = "today 1-m") # last 30 days
#' gtrends(c("NHL", "NFL"), time = "today 3-m") # last 90 days
#' gtrends(c("NHL", "NFL"), time = "today 12-m") # last 12 months
#' gtrends(c("NHL", "NFL"), time = "today+5-y") # last five years (default)
#' gtrends(c("NHL", "NFL"), time = "all") # since 2004
#'
#'
#' ## Custom date format
#'
#' gtrends(c("NHL", "NFL"), time = "2010-01-01 2010-04-03")
#'
#' ## Search from various Google's services
#'
#' head(gtrends(c("NHL", "NFL"), gprop = "news")$interest_over_time)
#' head(gtrends(c("NHL", "NFL"), gprop = "youtube")$interest_over_time)
#'
#' ## Language settings
#'
#' head(gtrends("NHL", hl = "en")$related_topics)
#' head(gtrends("NHL", hl = "fr")$related_topics)
#' }
#' @export
gtrends <- function(
                    keyword = NA,
                    geo = "",
                    time = "today+5-y",
                    gprop = c("web", "news", "images", "froogle", "youtube"),
                    category = 0,
                    hl = "en-US",
                    low_search_volume = FALSE,
                    cookie_url = "http://trends.google.com/Cookies/NID",
                    tz=0, # This equals UTC
                    onlyInterest=FALSE
                    ) {
  stopifnot(
    # One  vector should be a multiple of the other
    (length(keyword) %% length(geo) == 0) || (length(geo) %% length(keyword) == 0) || (length(time) %% length(keyword) == 0),
    is.vector(keyword),
    length(keyword) <= 5,
    length(geo) <= 5,
    length(time) <= 5,
    length(hl) == 1,
    is.character(hl),
    hl %in% language_codes$code,
    length(cookie_url) == 1,
    is.character(cookie_url)
  )


  ## Check if valide geo
  if (geo != "" &&
    !all(geo %in% c(as.character(countries[, "country_code"]), as.character(countries[, "sub_code"])))) {
    stop(
      "Country code not valid. Please use 'data(countries)' to retreive valid codes.",
      call. = FALSE
    )
  }

  ## Check if valide category
  if (!all(category %in% categories[, "id"])) {
    stop(
      "Category code not valid. Please use 'data(categories)' to retreive valid codes.",
      call. = FALSE
    )
  }

  ## Check if time format is ok
  if (!check_time(time)) {
    stop("Cannot parse the supplied time format.", call. = FALSE)
  }
  
  if(!(is.numeric(tz))){
    if (tz %in% OlsonNames()){
      tz <- map_tz2min(tz)
    }else{
      stop("Given timezone not known. Check function OlsonNames().", call. = FALSE)
    }
  }

  # time <- "today+5-y"
  # time <- "2017-02-09 2017-02-18"
  # time <- "now 7-d"
  # time <- "all_2006"
  # time <- "all"
  # time <- "now 4-H"
  # geo <- c("CA", "FR", "US")
  # geo <- c("CA", "DK", "FR", "US", "CA")
  # geo <- "US"

  gprop <- match.arg(gprop, several.ok = FALSE)
  gprop <- ifelse(gprop == "web", "", gprop)

  # ****************************************************************************
  # Request a token from Google
  # ****************************************************************************
  keyword <- sapply(keyword,function(x){
    y <- gsub("[+]","%2B",x)
    z <- gsub(" ","+",y)
    return(z)
    })
  names(keyword) <- NULL
  comparison_item <- data.frame(keyword,geo,time, stringsAsFactors = FALSE)

  widget <- get_widget(comparison_item, category, gprop, hl, cookie_url,tz)

  # ****************************************************************************
  # Now that we have tokens, we can process the queries
  # ****************************************************************************

  interest_over_time <- interest_over_time(widget, comparison_item,tz)
  
  if(!onlyInterest){
    interest_by_region <- interest_by_region(widget, comparison_item, low_search_volume,tz)
    related_topics <- related_topics(widget, comparison_item, hl,tz)
    related_queries <- related_queries(widget, comparison_item,tz,hl)
    res <- list(
      interest_over_time = interest_over_time,
      interest_by_country = do.call(rbind, interest_by_region[names(interest_by_region) == "country"]),
      interest_by_region = do.call(rbind, interest_by_region[names(interest_by_region) == "region"]),
      interest_by_dma = do.call(rbind, interest_by_region[names(interest_by_region) == "dma"]),
      interest_by_city = do.call(rbind, interest_by_region[names(interest_by_region) == "city"]),
      related_topics = related_topics,
      related_queries = related_queries
    )
  }else{
    res <- list(interest_over_time = interest_over_time)
  }

  ## Remove row.names
  res <- lapply(res, function(x) {
    row.names(x) <- NULL
    x
  })

  class(res) <- c("gtrends", "list")

  return(res)
}

#' Plot Google Trends interest over time
#'
#' @param x A \code{\link{gtrends}} object.
#' @param ... Additional parameters passed on in method dispatch. Currently not
#'   used.
#'
#' @import ggplot2
#'
#' @return A ggplot2 object is returned silently.
#' @export
#'
#' @examples
#' \dontrun{
#' res <- gtrends("nhl", geo = c("CA", "US"))
#' plot(res)
#' }
plot.gtrends <- function(x, ...) {
  df <- x$interest_over_time
  df$hits <- if(typeof(df$hits) == 'character'){
    as.numeric(gsub('<','',df$hits))
    } else {
    df$hits
    }

  df$legend <- paste(df$keyword, " (", df$geo, ")", sep = "")

  p <- ggplot(df, aes_string(x = "date", y = "hits", color = "legend")) +
    geom_line() +
    xlab("Date") +
    ylab("Search hits") +
    ggtitle("Interest over time") +
    theme_bw() +
    theme(legend.title = element_blank())

  print(p)
  invisible(p)
}

Try the gtrendsR package in your browser

Any scripts or data that you put into this service are public.

gtrendsR documentation built on Feb. 23, 2021, 5:13 p.m.