#' Plots tweets data as a time series-like data object.
#'
#' Creates a ggplot2 plot of the frequency of tweets over a specified
#' interval of time.
#'
#' @param data Data frame or grouped data frame.
#' @param by Desired interval of time expressed as numeral plus one of
#' "secs", "mins", "hours", "days", "weeks", "months", or
#' "years". If a numeric is provided, the value is assumed to be in
#' seconds.
#' @param trim The number of observations to drop off the beginning
#' and end of the time series.
#' @param tz Time zone to be used, defaults to "UTC" (Twitter default)
#' @param ... Other arguments passed to
#' \code{\link[graphics]{plot}}.
#' @return If
#' A basic time series plot
#' @examples
#'
#' \dontrun{
#'
#' ## search for tweets containing "rstats"
#' rt <- search_tweets("rstats", n = 10000)
#'
#' ## plot frequency in 1 min intervals
#' ts_plot(rt, "mins")
#'
#'
#' }
#' @family ts_data
#' @export
ts_plot <- function(data, by = "days", trim = 0L, tz ="UTC", ...) {
do.call("ts_plot_", list(data = data, by = by, trim = trim, tz = tz, ...))
}
#' @importFrom graphics plot
ts_plot_ <- function(data, by = "days", trim = 0L, tz ="UTC", ...) {
data <- ts_data(data, by, trim, tz)
Time <- data[["created_at"]]
Count <- data[["n"]]
graphics::plot(Time, Count)
}
#' Converts tweets data into time series-like data object.
#'
#' Returns data containing the frequency of tweets over a specified
#' interval of time.
#'
#' @param data Data frame or grouped data frame.
#' @param by Desired interval of time expressed as numeral plus one of
#' "secs", "mins", "hours", "days", "weeks", "months", or
#' "years". If a numeric is provided, the value is assumed to be in
#' seconds.
#' @param trim Number of observations to trim off the front and end of
#' each time series
#' @param tz Time zone to be used, defaults to "UTC" (Twitter default)
#' @return Data frame with time, n, and grouping column if applicable.
#' @examples
#'
#' \dontrun{
#'
#' ## handles of women senators
#' sens <- c("SenatorBaldwin", "SenGillibrand", "PattyMurray", "SenatorHeitkamp")
#'
#' ## get timelines for each
#' sens <- get_timeline(sens, n = 3200)
#'
#' ## get single time series for tweets
#' ts_data(sens)
#'
#' ## using weekly intervals
#' ts_data(sens, "weeks")
#'
#' }
#'
#' @export
ts_data <- function(data, by = "days", trim = 0L, tz ="UTC") {
args <- list(data = data, by = by, trim = trim, tz = tz)
do.call("ts_data_", args)
}
ts_data_ <- function(data, by = "days", trim = 0L, tz = "UTC") {
stopifnot(is.data.frame(data), is.atomic(by))
if (has_name_(data, "created_at")) {
dtvar <- "created_at"
} else {
dtvar <- vapply(data, inherits, "POSIXct", FUN.VALUE = logical(1))
if (sum(dtvar) == 0L) stop("no datetime (POSIXct) var found", call. = FALSE)
dtvar <- names(data)[which(dtvar)[1]]
}
## drop NAs and sort data
data <- data[!is.na(data[[dtvar]]), ]
data <- data[order(data[[dtvar]]), ]
## reformat time var
.unit <- parse_unit(by)
## adjust to desired tz
data[[dtvar]] <- as.POSIXct(format(data[[dtvar]], tz = "UTC"), tz = tz)
data[[dtvar]] <- round_time(data[[dtvar]], by, tz)
## get unique values of time in series
dtm <- unique(
seq(data[[dtvar]][1], data[[dtvar]][length(data[[dtvar]])], .unit)
)
## if grouped df (up to 2 groups)
if (inherits(data, "grouped_df") &&
("groups" %in% names(attributes(data)) ||
"labels" %in% names(attributes(data)))) {
if (!"groups" %in% names(attributes(data)) &&
"labels" %in% names(attributes(data))) {
groups <- names(attr(data, "labels"))
} else {
groups <- names(attr(data, "groups"))
groups <- groups[!groups %in% ".rows"]
}
if (length(groups) > 1L) {
group2 <- groups[2]
} else {
group2 <- NULL
}
group1 <- groups[1]
lv1 <- unique(data[[group1]])
df1 <- as.POSIXct(character(), tz = tz)
df2 <- integer()
df3 <- list()
if (!is.null(group2)) {
lv2 <- unique(data[[group2]])
df4 <- list()
## count expressions for each row for output time series-like data
for (i in seq_along(dtm)) {
for (j in seq_along(lv1)) {
for (k in seq_along(lv2)) {
df1[length(df1) + 1L] <- dtm[i]
df2[length(df2) + 1L] <- sum(
data[[dtvar]] == dtm[i] &
data[[group1]] == lv1[j] &
data[[group2]] == lv2[k],
na.rm = TRUE
)
df3[[length(df3) + 1L]] <- lv1[j]
df4[[length(df4) + 1L]] <- lv2[k]
}
}
}
df <- data.frame(
time = df1,
n = df2,
g1 = unlist(df3),
g2 = unlist(df4),
stringsAsFactors = FALSE
)
names(df)[3:4] <- groups[1:2]
} else {
## count expressions for each row for output time series-like data
for (i in seq_along(dtm)) {
for (j in seq_along(lv1)) {
df1[length(df1) + 1L] <- dtm[i]
df2[length(df2) + 1L] <- sum(
data[[dtvar]] == dtm[i] &
data[[group1]] == lv1[j],
na.rm = TRUE
)
df3[[length(df3) + 1L]] <- lv1[j]
}
}
df <- data.frame(
time = df1,
n = df2,
g1 = unlist(df3),
stringsAsFactors = FALSE
)
names(df)[3] <- group1
}
} else {
df <- data.frame(
time = dtm,
n = vapply(dtm, function(x) sum(data[[dtvar]] == x), FUN.VALUE = integer(1)),
stringsAsFactors = FALSE
)
}
df <- as_rtwibble(df)
if (trim > 0L) {
df <- trim_ts(df, trim)
}
df
}
parse_unit <- function(by) {
stopifnot(is.atomic(by))
if (is.numeric(by)) {
return(by)
} else if (grepl("year", by)) {
n <- 60 * 60 * 24 * 365
} else if (grepl("month", by)) {
n <- 60 * 60 * 24 * 30
} else if (grepl("week", by)) {
n <- 60 * 60 * 24 * 7
} else if (grepl("day", by)) {
n <- 60 * 60 * 24
} else if (grepl("hour", by)) {
n <- 60 * 60
} else if (grepl("min", by)) {
n <- 60
} else if (grepl("sec", by)) {
n <- 1
} else {
stop("must express time interval in secs, mins, hours, days, weeks, months, or years",
call. = FALSE)
}
x <- as.double(gsub("[^[:digit:]|\\.]", "", by))
if (any(is.na(x), identical(x, ""))) {
x <- 1
}
n * x
}
#' A generic function for rounding date and time values
#'
#' @param x A vector of class POSIX or Date.
#' @param n Unit to round to. Defaults to mins. Numeric values treated
#' as seconds. Otherwise this should be one of "mins", "hours", "days",
#' "weeks", "months", "years" (plural optional).
#' @param tz Time zone to be used, defaults to "UTC" (Twitter default)
#' @return If POSIXct then POSIX. If date then Date.
#' @examples
#'
#' ## class posixct
#' round_time(Sys.time(), "12 hours")
#'
#' ## class date
#' unique(round_time(seq(Sys.Date(), Sys.Date() + 100, "1 day"), "weeks"))
#'
#' @export
round_time <- function(x, n, tz) UseMethod("round_time")
#' @export
round_time.POSIXt <- function(x, n = "mins", tz = "UTC") {
n <- parse_to_secs(n)
#as.POSIXct(hms::hms(as.numeric(x) %/% n * n), tz = tz)
hms(as.numeric(x) %/% n * n, tz = tz)
}
hms <- function(secs = NULL, tz = "UTC") {
if (is.null(secs)) {
secs <- numeric()
}
structure(secs, tzone = tz,
class = c("POSIXct", "POSIXt"))
}
#' @export
round_time.Date <- function(x, n = "months", tz = "UTC") {
x <- as.POSIXct(format(x, tz = "UTC"), tz = tz)
as.Date(round_time(x, n, tz = tz))
}
round_time2 <- function(x, interval = 60, center = TRUE, tz = "UTC") {
stopifnot(inherits(x, "POSIXct"))
## parse interval
interval <- parse_unit(interval)
## round off to lowest value
rounded <- floor(as.numeric(x) / interval) * interval
if (center) {
## center so value is interval mid-point
rounded <- rounded + round(interval * .5, 0)
}
## return to date-time
as.POSIXct(rounded, tz = tz, origin = "1970-01-01")
}
trim_ts <- function(data, trim = 1L) {
if (ncol(data) > 2L) {
g <- unique(data[[3]])
g <- lapply(g, function(x) trim_ots(data[data[[3]] == x, ], trim, trim))
g <- do.call("rbind", g)
if (ncol(data) == 4L) {
g2 <- unique(data[[4]])
g2 <- lapply(g2, function(x) trim_ots(data[data[[4]] == x, ], trim, trim))
g2 <- do.call("rbind", g2)
g <- rbind(g, g2)
}
g
} else {
trim_ots(data, trim, trim)
}
}
trim_ots <- function(x, f = 1L, l = 1L) {
x <- x[order(x[[1]]), ]
f <- seq_len(f)
l <- nrow(x) - seq_len(l) + 1L
if ((length(l) + length(f)) >= nrow(x)) {
return(x)
}
x[-c(f, l), ]
}
parse_to_secs <- function(x) {
if (is.numeric(x)) {
n <- x
} else if (grepl("year", x)) {
n <- 60 * 60 * 24 * 365
} else if (grepl("month", x)) {
n <- 60 * 60 * 24 * 30
} else if (grepl("week", x)) {
n <- 60 * 60 * 24 * 7
} else if (grepl("day", x)) {
n <- 60 * 60 * 24
} else if (grepl("hour", x)) {
n <- 60 * 60
} else if (grepl("min", x)) {
n <- 60
} else if (grepl("sec", x)) {
n <- 1
} else {
stop("must express time interval in secs, mins, hours, days, weeks, months, or years",
call. = FALSE)
}
x <- as.double(gsub("[^[:digit:]|\\.]", "", x))
if (any(is.na(x), identical(x, ""))) {
x <- 1
}
n * x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.