R/AddText.R

Defines functions AddText.actor.reddit AddText.activity.reddit AddText.actor.youtube AddText.activity.youtube AddText.twomode.default AddText.twomode AddText.semantic.default AddText.semantic AddText.actor.default AddText.actor AddText.activity.default AddText.activity AddText.default AddText

Documented in AddText AddText.activity.reddit AddText.activity.youtube AddText.actor.reddit AddText.actor.youtube

#' @title Add columns containing text data to network dataframes
#'
#' @description Network is supplemented with additional social media text data applied as node or edge attributes.
#'
#' @note Supports social media \code{activity} and \code{actor} networks. Refer to \code{\link{AddText.activity.reddit}}
#'   and \code{\link{AddText.actor.reddit}} for additional reddit parameters. Refer to
#'   \code{\link{AddText.actor.youtube}} for additional YouTube actor network parameters.
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param ... Additional parameters passed to function.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges} including columns
#'   containing text data.
#'
#' @examples
#' \dontrun{
#' # add text to an activity network
#' net_activity <- collect_data |>
#'   Create("activity") |> AddText(collect_data)
#'
#' # network
#' net_activity$nodes
#' net_activity$edges
#' }
#'
#' @aliases AddText
#' @name AddText
#' @export
AddText <- function(net, data, ...) {
  msg <- f_verbose(check_dots("verbose", ...))
  msg("Adding text data to network...")

  if ("voson.text" %in% class(net)) {
    stop("Network already has text attribute.")
  }

  # searches the class list of net for matching method
  UseMethod("AddText", net)
}

#' @noRd
#' @export
AddText.default <- function(net, ...) {
  stop("Unknown network type passed to AddText.", call. = FALSE)
}

#' @noRd
#' @method AddText activity
#' @export
AddText.activity <- function(net, ...) {
  UseMethod("AddText.activity", net)
}

#' @noRd
#' @export
AddText.activity.default <- function(net, ...) {
  stop("Unknown social media type passed to AddText.", call. = FALSE)
}

#' @noRd
#' @method AddText actor
#' @export
AddText.actor <- function(net, ...) {
  UseMethod("AddText.actor", net)
}

#' @noRd
#' @export
AddText.actor.default <- function(net, ...) {
  stop("Unknown social media type passed to AddText.", call. = FALSE)
}

#' @noRd
#' @method AddText semantic
#' @export
AddText.semantic <- function(net, ...) {
  UseMethod("AddText.semantic", net)
}

#' @noRd
#' @export
AddText.semantic.default <- function(net, ...) {
  stop("Unknown social media type passed to AddText.", call. = FALSE)
}

#' @noRd
#' @method AddText twomode
#' @export
AddText.twomode <- function(net, ...) {
  UseMethod("AddText.twomode", net)
}

#' @noRd
#' @export
AddText.twomode.default <- function(net, ...) {
  stop("Unknown social media type passed to AddText.", call. = FALSE)
}

#' @title Add columns containing text data to YouTube activity network dataframes
#'
#' @description Text comments are added to the network as node attributes.
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param ... Additional parameters passed to function. Not used in this method.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges} including columns
#'   containing text data.
#'
#' @examples
#' \dontrun{
#' # add text to an activity network
#' net_activity <- collect_yt |>
#'   Create("activity") |> AddText(collect_yt)
#'
#' # network
#' net_activity$nodes
#' net_activity$edges
#' }
#'
#' @aliases AddText.actor.youtube
#' @name AddText.actor.youtube
#' @export
AddText.activity.youtube <- function(net, data, ...) {

  net$nodes <- dplyr::left_join(
    net$nodes,
    dplyr::select(data, .data$CommentID, .data$Comment) |>
      dplyr::rename(
        id = .data$CommentID,
        vosonTxt_comment = .data$Comment
      ),
    by = c("id")
  )

  class(net) <- union(class(net), c("voson.text"))
  msg("Done.\n")

  net
}

#' @title Add columns containing text data to YouTube actor network dataframes
#'
#' @description Text comments are added to the network as edge attributes. References to actors are detected at the
#'   beginning of comments and edges redirected to that actor instead if they differ from the top-level comment author.
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param repliesFromText Logical. If comment text for an edge begins with \code{screen_name} change the edge to be
#'   directed to \code{screen_name} - if different from the top level comment author that the reply comment was posted
#'   to. Default is \code{FALSE}.
#' @param atRepliesOnly Logical. Comment \code{screen_names} must begin with an '@' symbol to be redirected. Default is
#'   \code{TRUE}.
#' @param ... Additional parameters passed to function. Not used in this method.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges} including columns
#'   containing text data.
#'
#' @examples
#' \dontrun{
#' # add text to an actor network ignoring references to actors at
#' # the beginning of comment text
#' net_actor <- collect_yt |>
#'   Create("actor") |>
#'   AddText(collect_yt, repliesFromText = FALSE)
#'
#' # network
#' net_actor$nodes
#' net_actor$edges
#' }
#'
#' @aliases AddText.actor.youtube
#' @name AddText.actor.youtube
#' @export
AddText.actor.youtube <-
  function(net,
           data,
           repliesFromText = FALSE,
           atRepliesOnly = TRUE,
           ...) {

    net$edges <- net$edges |> dplyr::left_join(
      dplyr::select(data, .data$CommentID, .data$Comment) |>
        dplyr::rename(
          comment_id = .data$CommentID,
          vosonTxt_comment = .data$Comment
        ),
      by = c("comment_id")
    )

    # in comment reply to
    if (repliesFromText) {
      net$edges <- net$edges |> dplyr::left_join(dplyr::select(net$nodes, -.data$node_type),
                                      by = c("from" = "id"))
      vid_comments <-
        dplyr::select(net$edges, .data$video_id, .data$vosonTxt_comment) |> purrr::transpose()

      net$edges$at_id <- sapply(vid_comments, function(x) {
        for (name_at in net$nodes$screen_name) {
          if (atRepliesOnly) {
            name_at_regex <- paste0("^", escape_regex(paste0("@", name_at)))
          } else {
            name_at_regex <- paste0("^[@]?", escape_regex(name_at))
          }

          if (grepl(name_at_regex, x$vosonTxt_comment)) {
            to_id <-
              dplyr::filter(net$edges,
                            .data$screen_name == name_at & .data$video_id == x$video_id) |>
              dplyr::select(.data$from) |> dplyr::distinct()
            if (nrow(to_id)) {
              return(as.character(tail(to_id, n = 1)))
            } # choose last match - best effort
          }
        }
        return(as.character(NA))
      })
      net$edges <- net$edges |> dplyr::mutate(
        to = ifelse(is.na(.data$at_id), .data$to, .data$at_id),
        edge_type = ifelse(
          is.na(.data$at_id),
          .data$edge_type,
          "reply-comment-text"
        )
      )
    }

    class(net) <- union(class(net), c("voson.text"))
    msg("Done.\n")

    net
  }

#' @title Add columns containing text data to reddit activity network dataframes
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param cleanText Logical. Simple removal of problematic characters for XML 1.0 standard. Implemented to prevent
#'   reddit specific XML control character errors when generating graphml files. Default is \code{FALSE}.
#' @param ... Additional parameters passed to function. Not used in this method.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges} including columns
#'   containing text data.
#'
#' @examples
#' \dontrun{
#' # add text to an activity network
#' net_activity <- collect_rd |>
#'   Create("activity") |>
#'   AddText(collect_rd)
#'
#' # network
#' net_activity$nodes
#' net_activity$edges
#' }
#'
#' @aliases AddText.activity.reddit
#' @name AddText.activity.reddit
#' @export
AddText.activity.reddit <-
  function(net, data, cleanText = FALSE, ...) {

    net$nodes <- dplyr::left_join(
      net$nodes,
      dplyr::mutate(data, id = paste0(.data$thread_id, ".", .data$structure)) |>
        dplyr::select(.data$id, .data$subreddit, .data$comment),
      by = c("id", "subreddit")
    )

    threads <-
      dplyr::select(data,
                    .data$subreddit,
                    .data$thread_id,
                    .data$title,
                    .data$post_text) |>
      dplyr::distinct() |> dplyr::mutate(id = paste0(.data$thread_id, ".0"), thread_id = NULL)

    net$nodes <-
      dplyr::left_join(net$nodes, threads, by = c("id", "subreddit")) |>
      dplyr::mutate(comment = ifelse(.data$node_type == "thread", .data$post_text, .data$comment)) |>
      dplyr::select(-c(.data$post_text)) |> dplyr::rename(vosonTxt_comment = .data$comment)

    if (cleanText) {
      net$nodes$vosonTxt_comment <-
        xml_clean_reddit(net$nodes$vosonTxt_comment)
      net$nodes$title <- xml_clean_reddit(net$nodes$title)
    }

    class(net) <- union(class(net), c("voson.text"))
    msg("Done.\n")

    net
  }

#' @title Add columns containing text data to reddit actor network dataframes
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param data A dataframe generated by \code{Collect}.
#' @param cleanText Logical. Simple removal of problematic characters for XML 1.0 standard. Implemented to prevent
#'   reddit specific XML control character errors when generating graphml files. Default is \code{FALSE}.
#' @param ... Additional parameters passed to function. Not used in this method.
#'
#' @return Network as a named list of two dataframes containing \code{$nodes} and \code{$edges} including columns
#'   containing text data.
#'
#' @examples
#' \dontrun{
#' # add text to an actor network ignoring references to actors at the beginning of
#' # comment text
#' net_actor <- collect_rd |>
#'   Create("actor") |>
#'   AddText(collect_rd)
#'
#' # network
#' net_actor$nodes
#' net_actor$edges
#' }
#'
#' @aliases AddText.actor.reddit
#' @name AddText.actor.reddit
#' @export
AddText.actor.reddit <-
  function(net, data, cleanText = FALSE, ...) {

    # rename the edge attribute containing the thread comment
    net$edges <- dplyr::left_join(
      net$edges,
      dplyr::select(
        data,
        .data$subreddit,
        .data$thread_id,
        .data$id,
        .data$comment
      ),
      by = c("subreddit", "thread_id", "comment_id" = "id")
    ) |>
      dplyr::rename(vosonTxt_comment = .data$comment)

    authors <-
      dplyr::select(data,
                    .data$subreddit,
                    .data$thread_id,
                    .data$title,
                    .data$post_text) |>
      dplyr::distinct() |> dplyr::mutate(comment_id = 0)

    net$edges <-
      dplyr::left_join(net$edges,
                       authors,
                       by = c("subreddit", "thread_id", "comment_id")) |>
      dplyr::mutate(
        vosonTxt_comment = ifelse(
          .data$comment_id == 0,
          .data$post_text,
          .data$vosonTxt_comment
        ),
        post_text = NULL
      )

    net$edges$vosonTxt_comment <-
      ifelse(trimws(net$edges$vosonTxt_comment) == "",
             NA,
             net$edges$vosonTxt_comment)

    if (cleanText) {
      net$edges$vosonTxt_comment <-
        xml_clean_reddit(net$edges$vosonTxt_comment)
      net$edges$title <- xml_clean_reddit(net$edges$title)
    }

    class(net) <- union(class(net), c("voson.text"))
    msg("Done.\n")

    net
  }
vosonlab/vosonSML documentation built on Feb. 1, 2024, 7:58 p.m.