#' Helper function to add ID column
#'
#' This function checks if the dataframe from Google
#' contains a column "id". If there is no said column,
#' this function adds one with the fitting ID from Google.
#'
#' @param data A tibble or dataframe containing a Google Trends
#' time series.
#' @param keyword The keyword or keywords that have been searched in
#' the prior function.
#' @param category The category ID from Google Trends.
#' @return A tibble with an added ID column, if there was no beforehand.
#' Otherwise, the original data will be returned.
#'
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom magrittr %>%
#' @importFrom tidyselect everything
#' @keywords internal
add_id_column <- function(data, keyword, category) {
if (!("id" %in% colnames(data))) {
# Case if a single keyword is used
if (length(keyword == 1) & category == 0) {
data <- mutate(
data,
id = keyword
) %>%
select(id, everything())
} else {
# Reformulate the category id into its name
data <- mutate(
data,
id = as.character(
gtrendsR::categories[gtrendsR::categories$id == category, 1]
)
) %>%
select(id, everything())
}
# If more than one keyword is used, the ID
# column will be automatically added by the
# search function before. Hence, this function
# doesn't need a thing to do.
}
return(data)
}
#' Preparation of Google Trends data
#'
#' @description \code{gtpreparation} downloads for
#' various search requests respective categories
#' data and applies a logarithmic transformation aswell as a
#' seasonal adjustment on downloaded data.
#' The function returns seasonal adjusted
#' first derivatives (lagged if desired).
#'
#' @param keyword A character vector with search requests.
#' @param category A numerical category ID from Google Trends. As of now,
#' only one category can be given. If you need to use more categories,
#' the use of a \code{for}-loop is recommended.
#' @param geo A geographical region to restrict the search queries to.
#' @param time Time period from where the relative values should be taken
#' (for more information, visit the
#' documentation of \code{\link[trendecon]{ts_gtrends}}). Attention:
#' As this function will only work on monthly data, you need to enter
#' for time a time frame that is longer than 5 years.
#' Otherwise, the time series from Google Trends will be based on
#' weekly or daily data and cannot be evaluated.
#' @param lags Number of delays in additional columns (max. value: 4).
#' Be careful, the first \code{i} months will not be returned
#' with \code{i} being the number of lags.
#'
#' @return Firstly, each row will be log transformed and
#' seasonal adjusted (via [seasonal::seas()]'s X-13 ARIMA methods).
#' Furthermore, the first derivatives of these adjusted time
#' series will be returned
#' (optionally with additional columns containing
#' lags).
#'
#' @import zoo
#' @importFrom dplyr across
#' @importFrom dplyr filter
#' @importFrom dplyr full_join
#' @importFrom dplyr lag
#' @importFrom dplyr mutate
#' @importFrom dplyr rename
#' @importFrom dplyr select
#' @importFrom gtrendsR gtrends
#' @importFrom magrittr %>%
#' @importFrom lubridate as_date
#' @importFrom lubridate years
#' @importFrom lubridate ymd
#' @importFrom stringr str_c
#' @importFrom tidyr pivot_longer
#' @importFrom tidyr pivot_wider
#' @importFrom tidyselect any_of
#' @importFrom trendecon ts_gtrends
#' @importFrom tsibble as_tsibble
#' @importFrom tsibble group_by_key
#' @examples
#' gtpreparation(keyword = "ikea", time = "2015-01-01 2021-01-01")
#' @export
gtpreparation <- function(keyword = NA,
category = 0,
geo = "DE",
time = str_c("2006-01-01 ", Sys.Date()),
lags = 0) {
# some date variables
start <- str_sub(time, 1, 10)
end <- str_sub(time, 12, 21)
dates <- seq.Date(from = as.Date(start), to = as.Date(end), by = "month")
# Only monthly time series can be used. Hence,
# anything shorter than 5 years cannot be analysed (as this
# are weekly/daily time series).
stopifnot("You need to use a time frame longer than 5 years (otherwise we wont have monthly data)!" = ymd(end) - ymd(start) > years(5))
# data containing a trend calculated on 250 GTrends time series'.
# comtrend is saved as internal data in
# R/sysdata.rda and is automatically
# loaded into namespace
fit <- comtrend %>%
select(time = date, trend) %>%
filter(time >= as.Date(start) & time <= as.Date(end))
# make search queries
google_data <- ts_gtrends(
keyword = keyword,
category = category,
geo = "DE",
time = time
) %>%
mutate(value = log(value)) %>%
mutate(value = replace(value, value == -Inf, NA_real_)) %>%
mutate(value = na.approx(value, rule = 2)) %>%
add_id_column(keyword, category)
# Trend adjust and seasonal adjust data
adjusted_data <- google_data %>%
full_join(fit, by = "time") %>%
mutate(time = as.Date(time), adj = value - trend) %>%
select(id, time, adj) %>%
seas_adj(method = "arima") %>%
rename(s_adj = value)
# group by category
grouped_data <- adjusted_data %>%
group_by_key() %>%
add_id_column(keyword, category)
# Add lagged columns
if (lags >= 1) grouped_data <- mutate(grouped_data, lag_1 = lag(s_adj))
if (lags >= 2) grouped_data <- mutate(grouped_data, lag_2 = lag(s_adj, 2))
if (lags >= 3) grouped_data <- mutate(grouped_data, lag_3 = lag(s_adj, 3))
if (lags == 4) grouped_data <- mutate(grouped_data, lag_4 = lag(s_adj, 4))
# Reorder some stuff
result <- grouped_data %>%
ungroup() %>%
rename(lag_0 = s_adj) %>%
filter(across(everything(), ~ !is.na(.))) %>%
pivot_longer(cols = -c(id, time), names_to = "lag", values_to = "value") %>%
pivot_wider(names_from = lag, values_from = value)
if (!("id" %in% names(result))) {
# Reformulate the category id into its name
result <- mutate(
result,
id = as.character(
gtrendsR::categories[gtrendsR::categories$id == category, 1]
)
)
}
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.