Nothing
#' @title Merge collected data files
#'
#' @param path Directory path of Collect data to merge. Default is the working directory.
#' @param pattern Regular expression (regex) for matching file names to merge.
#' @param unique Logical. Remove duplicates based on observation id. Default is \code{TRUE}.
#' @param rev Logical. Reverses order of observations before removing duplicates. If collect data is provided
#' chronologically then this should ensure the most recent copy of a duplicate is kept. Default is \code{TRUE}.
#' @param writeToFile Logical. Save data to a file in the current working directory. Default is \code{FALSE}.
#' @param verbose Logical. Output additional information. Default is \code{FALSE}.
#'
#' @return A merged Collect object.
#'
#' @aliases MergeFiles
#' @name MergeFiles
#' @export
MergeFiles <-
function(path = ".",
pattern = "(?-i).+?\\.rds$",
unique = TRUE,
rev = TRUE,
writeToFile = FALSE,
verbose = FALSE) {
msg <- f_verbose(verbose)
msg("Merging collect files...\n")
if (!dir.exists(path)) stop("Path does not exist.")
files <- list.files(path = path, pattern = pattern, full.names = TRUE)
if (length(files) < 1) stop("No matching files found.")
msg(paste0("Matching files:\n", paste0(paste0("- ", files), collapse = "\n"), "\n"))
data <- purrr::reduce(lapply(files, readRDS), .f = Merge)
if (writeToFile) write_output_file(data, "rds", "DataMergeFile", verbose = verbose)
msg("Done.\n")
data
}
#' @title Merge collected data
#'
#' @param ... Collect data to merge.
#' @param unique Logical. Remove duplicates based on observation id. Default is \code{TRUE}.
#' @param rev Logical. Reverses order of observations before removing duplicates. If collect data is provided
#' chronologically then this should ensure the most recent copy of a duplicate is kept. Default is \code{TRUE}.
#' @param writeToFile Logical. Save data to a file in the current working directory. Default is \code{FALSE}.
#' @param verbose Logical. Output additional information. Default is \code{FALSE}.
#'
#' @return A merged Collect object.
#'
#' @aliases Merge
#' @name Merge
#' @export
Merge <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
msg <- f_verbose(verbose)
msg("Merging collect data...\n")
merge_dots <- list(...)
merge_cls_list <- lapply(merge_dots, class)
if (!identical(unname(merge_cls_list[-length(merge_cls_list)]), unname(merge_cls_list[-1]))) {
stop("Collect data must be of the same class.", call. = FALSE)
}
merge_cls_data <- unlist(merge_cls_list[1])
class(merge_dots) <- merge_cls_data
UseMethod("Merge", merge_dots)
}
#' @noRd
#' @export
Merge.default <- function(...) {
stop("Unknown data type passed to merge.", call. = FALSE)
}
#' @noRd
#' @method Merge twitter
#' @export
Merge.twitter <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
merge_dots <- merge_dots
merge_cls_data <- merge_cls_data
data <- purrr::reduce(merge_dots, .f = merge_twitter)
class(data) <- merge_cls_data
rev_twitter_df <- function(x) {
x$tweets <- rev_row_order(x$tweets)
x$users <- rev_row_order(x$users)
x
}
if (unique) {
if (rev) data <- rev_twitter_df(data)
data$tweets <- data$tweets |> dplyr::distinct(.data$status_id, .keep_all = TRUE)
data$users <- data$users |> dplyr::distinct(.data$id_str, .keep_all = TRUE)
if (rev) data <- rev_twitter_df(data)
}
if (writeToFile) write_output_file(data, "rds", "TwitterDataMerge", verbose = verbose)
msg("Done.\n")
data
}
#' @noRd
#' @method Merge youtube
#' @export
Merge.youtube <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
merge_cls_data <- merge_cls_data
data <- dplyr::bind_rows(...)
class(data) <- merge_cls_data
if (unique) {
if (rev) data <- rev_row_order(data)
data <- data |> dplyr::distinct(.data$VideoID, .data$CommentID, .keep_all = TRUE)
if (rev) data <- rev_row_order(data)
}
if (writeToFile) write_output_file(data, "rds", "YoutubeDataMerge", verbose = verbose)
msg("Done.\n")
data
}
#' @noRd
#' @method Merge reddit
#' @export
Merge.reddit <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
merge_cls_data <- merge_cls_data
data <- dplyr::bind_rows(...)
class(data) <- merge_cls_data
if (unique) {
if (rev) data <- rev_row_order(data)
data <- data |> dplyr::distinct(.data$subreddit, .data$thread_id, .data$comm_id, .keep_all = TRUE)
if (rev) data <- rev_row_order(data)
}
if (writeToFile) write_output_file(data, "rds", "RedditDataMerge", verbose = verbose)
msg("Done.\n")
data
}
# #' @noRd
# #' @method Merge web
# #' @export
# Merge.web <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
# merge_cls_data <- merge_cls_data
#
# data <- dplyr::bind_rows(...)
# class(data) <- merge_cls_data
#
# if (writeToFile) write_output_file(data, "rds", "WebDataMerge", verbose = verbose)
# msg("Done.\n")
#
# data
# }
merge_twitter <- function(x, y) {
list(tweets = dplyr::bind_rows(x$tweets, y$tweets),
users = dplyr::bind_rows(x$users, y$users))
}
rev_row_order <- function(x) {
x[nrow(x):1, ]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.