R/initialize_request_trends.R

Defines functions initialize_request_trends

Documented in initialize_request_trends

#' Initialize Google Trends Request
#'
#' This function initializes a request to the Google Trends API using pytrends,
#' creates necessary directories, and prepares parameters for data collection.
#'
#' The initiation stage involves creating two folders automatically:
#' - The main folder chosen by the user (`folder_name`).
#' - A subfolder corresponding to the `data_format` (e.g., 'daily', 'weekly', 'monthly') for storing data.
#'
#' @param keyword The keyword to be used for collecting Google Trends data.
#' @param topic The topic associated with the keyword. For example, '/m/0ddwt' will give Google Trends data for Insomnia as topic of 'Disorder'. If identical to the keyword, data will reflect Google Trends search term data. NOTE: URL's have certain codes for special characters. For example, \%20 = white space, \%2F = / (forward slash) etc.
#' @param folder_name Name of the parent folder where all data will be stored.
#' @param start_date The start date from which to collect Google Trends data.
#' @param end_date The end date until which to collect Google Trends data.
#' @param data_format Time basis for the query. Can be one of 'daily', 'weekly', or 'monthly'.
#'
#' @return A list containing initialized values and objects for further interaction with the package:
#' \item{logger}{A logging object for recording messages.}
#' \item{keyword}{The keyword used for data collection.}
#' \item{topic}{The topic associated with the keyword.}
#' \item{folder_name}{Name of the parent folder for storing data.}
#' \item{start_date}{Start date for data collection.}
#' \item{end_date}{End date for data collection.}
#' \item{data_format}{Time basis for the data query ('daily', 'weekly', or 'monthly').}
#' \item{num_of_days}{Number of days between \code{start_date} and \code{end_date}.}
#' \item{pytrend}{Initialized pytrends request object.}
#' \item{time_window}{Optional. Time window parameter, applicable for 'weekly' data format.}
#' \item{times}{Optional. Time periods determined for 'weekly' or 'daily' data formats.}
#'
#' @examples
#' # Create a temporary folder for the example
#'
#' # Ensure the temporary folder is cleaned up after the example

#' if (reticulate::py_module_available("pytrends")) {
#'   # Run the function with the temporary folder
#'   params <- initialize_request_trends(
#'     keyword = "Coronavirus disease 2019",
#'     topic = "/g/11j2cc_qll",
#'     folder_name = file.path(tempdir(), "test_folder"),
#'     start_date = "2024-05-01",
#'     end_date = "2024-05-03",
#'     data_format = "daily"
#'   )
#'   on.exit(unlink("test_folder", recursive = TRUE))
#' } else {
#'   message("The 'pytrends' module is not available.
#'   Please install it by running install_pytrendslongitudinalr()")
#' }

#'
#' @export

initialize_request_trends <- function(keyword, topic = NULL, folder_name, start_date, end_date, data_format) {
  # Convert date strings to R Date objects
  start_date <- as.Date(start_date)
  end_date <- as.Date(end_date)

  # Calculate number of days as integer
  num_of_days <- as.integer(end_date - start_date)

  # Initialize logger
  logger <- logging$getLogger("rich")

  # Validate data_format
  if (!data_format %in% c('daily', 'weekly', 'monthly')) {
    stop("data_format should be 'daily'/'weekly'/'monthly'")
  } else if (data_format == 'monthly') {
    start_date <- as.Date(paste0(format(start_date, "%Y-%m"), "-01"))
    if (num_of_days < 1890) {
      stop(sprintf("Difference Between Start and End date needs to be more than 1889 days to get monthly data. Given only '%d' days", num_of_days))
    }
  } else if (data_format == 'weekly') {
    if (num_of_days < 270) {
      stop(sprintf("Difference Between Start and End date needs to be more than 269 days to get weekly data. Given '%d' days", num_of_days))
    }
    if (num_of_days < 270) {
      stop(sprintf("Difference Between Start and End date needs to be more than 269 days to get weekly data. Given '%d' days", num_of_days))
    }
    time_window <- 1889
    times <- determine_time_periods(start_date, end_date, num_of_days, time_window)
    check_end_period <- as.integer(difftime(times[length(times)], times[length(times)-1], units = "days"))
    if (check_end_period < 270) {
      stop(sprintf("Last Time Period is less than 270 days. Given %d", check_end_period))
    }
  } else {
    time_window <- 269
    times <- determine_time_periods(start_date, end_date, num_of_days, time_window)
  }

  # Create required directories
  dir.create(folder_name, showWarnings = FALSE)  # Main folder
  dir.create(file.path(folder_name, data_format), showWarnings = FALSE)  # Subfolder

  # Save parameters into a JSON file
  params <- list(
    keyword = keyword,
    topic = topic,
    folder_name = folder_name,
    start_date = as.character(start_date),
    end_date = as.character(end_date),
    data_format = data_format
  )
  params_fl <- file.path(folder_name, data_format, "params.txt")
  jsonlite::write_json(params, params_fl, pretty = TRUE)

  # Check if the pytrends module is available
  if (!reticulate::py_module_available("pytrends")) {
    stop("pytrends module is not available. Please run install_pytrendslongitudinalr() to install the required Python packages.")
  }

  # Initialize pytrends request
  pytrend <- pytrendsRequest$TrendReq() # tried retries=7, backoff_factor=0.3


  # Return a list of initialized values
  list(
    logger = logger,
    keyword = keyword,
    topic = topic,
    folder_name = folder_name,
    start_date = start_date,
    end_date = end_date,
    data_format = data_format,
    num_of_days = num_of_days,
    pytrend = pytrend,
    time_window = if (data_format == "monthly") NULL else time_window,
    times = if (data_format != "monthly") times else NULL
  )
}

Try the PytrendsLongitudinalR package in your browser

Any scripts or data that you put into this service are public.

PytrendsLongitudinalR documentation built on Sept. 17, 2024, 5:08 p.m.