R/Merge.R
In vosonSML: Collecting Social Media Data and Generating Networks for Analysis

Documented in Merge MergeFiles

#' @title Merge collected data files
#'
#' @param path Directory path of Collect data to merge. Default is the working directory.
#' @param pattern Regular expression (regex) for matching file names to merge.
#' @param unique Logical. Remove duplicates based on observation id. Default is \code{TRUE}.
#' @param rev Logical. Reverses order of observations before removing duplicates. If collect data is provided
#'   chronologically then this should ensure the most recent copy of a duplicate is kept. Default is \code{TRUE}.
#' @param writeToFile Logical. Save data to a file in the current working directory. Default is \code{FALSE}.
#' @param verbose Logical. Output additional information. Default is \code{FALSE}.
#'
#' @return A merged Collect object.
#'
#' @aliases MergeFiles
#' @name MergeFiles
#' @export
MergeFiles <-
  function(path = ".",
           pattern = "(?-i).+?\\.rds$",
           unique = TRUE,
           rev = TRUE,
           writeToFile = FALSE,
           verbose = FALSE) {

    msg <- f_verbose(verbose)

    msg("Merging collect files...\n")

    if (!dir.exists(path)) stop("Path does not exist.")

    files <- list.files(path = path, pattern = pattern, full.names = TRUE)

    if (length(files) < 1) stop("No matching files found.")

    msg(paste0("Matching files:\n", paste0(paste0("- ", files), collapse = "\n"), "\n"))

    data <- purrr::reduce(lapply(files, readRDS), .f = Merge)

    if (writeToFile) write_output_file(data, "rds", "DataMergeFile", verbose = verbose)
    msg("Done.\n")

    data
  }

#' @title Merge collected data
#'
#' @param ... Collect data to merge.
#' @param unique Logical. Remove duplicates based on observation id. Default is \code{TRUE}.
#' @param rev Logical. Reverses order of observations before removing duplicates. If collect data is provided
#'   chronologically then this should ensure the most recent copy of a duplicate is kept. Default is \code{TRUE}.
#' @param writeToFile Logical. Save data to a file in the current working directory. Default is \code{FALSE}.
#' @param verbose Logical. Output additional information. Default is \code{FALSE}.
#'
#' @return A merged Collect object.
#'
#' @aliases Merge
#' @name Merge
#' @export
Merge <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
  msg <- f_verbose(verbose)

  msg("Merging collect data...\n")

  merge_dots <- list(...)
  merge_cls_list <- lapply(merge_dots, class)

  if (!identical(unname(merge_cls_list[-length(merge_cls_list)]), unname(merge_cls_list[-1]))) {
    stop("Collect data must be of the same class.", call. = FALSE)
  }

  merge_cls_data <- unlist(merge_cls_list[1])
  class(merge_dots) <- merge_cls_data

  UseMethod("Merge", merge_dots)
}

#' @noRd
#' @export
Merge.default <- function(...) {
  stop("Unknown data type passed to merge.", call. = FALSE)
}

#' @noRd
#' @method Merge twitter
#' @export
Merge.twitter <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
  merge_dots <- merge_dots
  merge_cls_data <- merge_cls_data

  data <- purrr::reduce(merge_dots, .f = merge_twitter)
  class(data) <- merge_cls_data

  rev_twitter_df <- function(x) {
    x$tweets <- rev_row_order(x$tweets)
    x$users <- rev_row_order(x$users)
    x
  }

  if (unique) {
    if (rev) data <- rev_twitter_df(data)
    data$tweets <- data$tweets |> dplyr::distinct(.data$status_id, .keep_all = TRUE)
    data$users <- data$users |> dplyr::distinct(.data$id_str, .keep_all = TRUE)
    if (rev) data <- rev_twitter_df(data)
  }

  if (writeToFile) write_output_file(data, "rds", "TwitterDataMerge", verbose = verbose)
  msg("Done.\n")

  data
}

#' @noRd
#' @method Merge youtube
#' @export
Merge.youtube <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
  merge_cls_data <- merge_cls_data

  data <- dplyr::bind_rows(...)
  class(data) <- merge_cls_data

  if (unique) {
    if (rev) data <- rev_row_order(data)
    data <- data |> dplyr::distinct(.data$VideoID, .data$CommentID, .keep_all = TRUE)
    if (rev) data <- rev_row_order(data)
  }

  if (writeToFile) write_output_file(data, "rds", "YoutubeDataMerge", verbose = verbose)
  msg("Done.\n")

  data
}

#' @noRd
#' @method Merge reddit
#' @export
Merge.reddit <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
  merge_cls_data <- merge_cls_data

  data <- dplyr::bind_rows(...)
  class(data) <- merge_cls_data

  if (unique) {
    if (rev) data <- rev_row_order(data)
    data <- data |> dplyr::distinct(.data$subreddit, .data$thread_id, .data$comm_id, .keep_all = TRUE)
    if (rev) data <- rev_row_order(data)
  }

  if (writeToFile) write_output_file(data, "rds", "RedditDataMerge", verbose = verbose)
  msg("Done.\n")

  data
}

# #' @noRd
# #' @method Merge web
# #' @export
# Merge.web <- function(..., unique = TRUE, rev = TRUE, writeToFile = FALSE, verbose = FALSE) {
#   merge_cls_data <- merge_cls_data
#
#   data <- dplyr::bind_rows(...)
#   class(data) <- merge_cls_data
#
#   if (writeToFile) write_output_file(data, "rds", "WebDataMerge", verbose = verbose)
#   msg("Done.\n")
#
#   data
# }

merge_twitter <- function(x, y) {
  list(tweets = dplyr::bind_rows(x$tweets, y$tweets),
       users = dplyr::bind_rows(x$users, y$users))
}

rev_row_order <- function(x) {
  x[nrow(x):1, ]
}

Any scripts or data that you put into this service are public.

vosonSML documentation built on Aug. 16, 2022, 5:14 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

vosonSML
Collecting Social Media Data and Generating Networks for Analysis

R/Merge.R
In vosonSML: Collecting Social Media Data and Generating Networks for Analysis

Defines functions rev_row_order merge_twitter Merge.reddit Merge.youtube Merge.twitter Merge.default Merge MergeFiles

Documented in Merge MergeFiles

Try the vosonSML package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

vosonSML Collecting Social Media Data and Generating Networks for Analysis

R/Merge.R In vosonSML: Collecting Social Media Data and Generating Networks for Analysis

Defines functions rev_row_order merge_twitter Merge.reddit Merge.youtube Merge.twitter Merge.default Merge MergeFiles

Documented in Merge MergeFiles

Try the vosonSML package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

vosonSML
Collecting Social Media Data and Generating Networks for Analysis

R/Merge.R
In vosonSML: Collecting Social Media Data and Generating Networks for Analysis