R/AddVideoData.R

Defines functions GetVideoData AddVideoData.actor.youtube AddVideoData.actor.default AddVideoData.actor AddVideoData.default AddVideoData

Documented in AddVideoData AddVideoData.actor.youtube

#' @title Add columns of video information to network dataframes
#'
#' @description Network is supplemented with additional downloaded video information.
#'
#' @note Only supports YouTube actor networks. Refer to \code{\link{AddVideoData.actor.youtube}}.
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param youtubeAuth YouTube Authenticate object.
#' @param ... Additional parameters passed to function.
#'
#' @return Network as a named list of three dataframes containing \code{$nodes}, \code{$edges} and \code{$videos} nodes
#'   and edges include columns for additional video data.
#'
#' @aliases AddVideoData
#' @name AddVideoData
#' @export
AddVideoData <- function(net, youtubeAuth = NULL, ...) {
  msg <- f_verbose(check_dots("verbose", ...))
  msg("Adding video data to network...")

  if ("voson.video" %in% class(net)) {
    stop("Network already has video data attribute.")
  }

  if (is.null(youtubeAuth)) {
    stop("AddVideoData requires Authenticate object.", call. = FALSE)
  }

  # searches the class list of net for matching method
  UseMethod("AddVideoData", net)
}

#' @noRd
#' @export
AddVideoData.default <- function(net, youtubeAuth = NULL, ...) {
  stop("Unknown network type passed to AddVideoData.", call. = FALSE)
}

#' @noRd
#' @method AddVideoData actor
#' @export
AddVideoData.actor <- function(net, youtubeAuth = NULL, ...) {
  UseMethod("AddVideoData.actor", net)
}

#' @noRd
#' @export
AddVideoData.actor.default <-
  function(net, youtubeAuth = NULL, ...) {
    stop("Unknown social media type passed to AddVideoData.", call. = FALSE)
  }

#' @title Add video information to youtube actor network dataframes
#'
#' @description YouTube actor network is supplemented with additional downloaded video information. Adds video id,
#'   title, description and publish time as edge attributes. Nodes or actor references to video id's in the network are
#'   substituted with the actor id (video channel id) retrieved from the video details.
#'
#' @param net A named list of dataframes \code{nodes} and \code{edges} generated by \code{Create}.
#' @param youtubeAuth YouTube Authenticate object.
#' @param videoIds List. Video id's for which to download video information.
#' @param actorSubOnly Logical. Only substitute video id's for their publishers channel id. Don't add additional video
#'   data to edge list.
#' @param ... Additional parameters passed to function.
#'
#' @return Network as a named list of three dataframes containing \code{$nodes}, \code{$edges} and \code{$videos} nodes
#'   and edges include columns for additional video data.
#'
#' @examples
#' \dontrun{
#' # replace video id references with actors and add video id, title, description and plublish time
#' # to an actor network
#' actorNetwork <- collectData |> Create("actor") |> AddVideoData(youtubeAuth)
#'
#' # only replace video id references with actors that published videos in network
#' actorNetwork <- collectData |> Create("actor") |> AddVideoData(youtubeAuth, actorSubOnly = TRUE)
#'
#' # network
#' # actorNetwork$nodes
#' # actorNetwork$edges
#'
#' # dataframe of downloaded video data
#' # actorNetwork$videos
#' }
#'
#' @aliases AddVideoData.actor.youtube
#' @name AddVideoData.actor.youtube
#' @export
AddVideoData.actor.youtube <-
  function(net,
           youtubeAuth = NULL,
           videoIds = NULL,
           actorSubOnly = FALSE,
           ...) {

    if (is.null(videoIds)) {
      # extract video ids from network nodes
      rm_videoid_str <- function(col) {
        sub("VIDEOID:", "", col)
      }

      videoIds <-
        net$nodes |> dplyr::filter(.data$node_type == "video") |> dplyr::select(.data$id) |>
        dplyr::mutate_at(.vars = "id", .funs = rm_videoid_str)
      videoIds <- unlist(unname(as.list(videoIds)))
    }

    videoIds <- unique(videoIds)
    df_video <- GetVideoData(youtubeAuth, videoIds)

    net$videos <- df_video

    if (nrow(df_video) == 0) {
      msg("No video data could be retrieved.\n")
      return(net)
    }

    # downloaded video data
    df_video <- df_video |> dplyr::mutate(id = paste0("VIDEOID:", .data$VideoID))
    ch_details <-
      df_video |> dplyr::select(.data$id, .data$ChannelTitle, .data$ChannelID) |>
      dplyr::rename(
        video_author_title = .data$ChannelTitle,
        video_author_id = .data$ChannelID
      )

    # replace video id values in nodes with the video publishers id (channel id) and screen name
    net$nodes <- net$nodes |> dplyr::left_join(ch_details, by = c("id")) |>
      dplyr::mutate(
        id = dplyr::if_else(
          !is.na(.data$video_author_id),
          .data$video_author_id,
          .data$id
        ),
        screen_name = dplyr::if_else(
          !is.na(.data$video_author_id),
          .data$video_author_title,
          .data$screen_name
        ),
        node_type = dplyr::if_else(!is.na(.data$video_author_id), "actor", .data$node_type),
        video_author_id = NULL,
        video_author_title = NULL
      ) |>
      dplyr::distinct(.data$id, .keep_all = TRUE)

    ch_details <- ch_details |> dplyr::select(-.data$video_author_title)

    # replace from and to video id values in edges with the video publishers id (channel id)
    net$edges <- net$edges |> dplyr::left_join(ch_details, by = c("from" = "id")) |>
      dplyr::mutate(
        from = dplyr::if_else(
          !is.na(.data$video_author_id),
          .data$video_author_id,
          .data$from
        ),
        video_author_id = NULL
      ) |>
      dplyr::left_join(ch_details, by = c("to" = "id")) |>
      dplyr::mutate(
        to = dplyr::if_else(
          !is.na(.data$video_author_id),
          .data$video_author_id,
          .data$to
        ),
        video_author_id = NULL
      )

    # change node type from actor to publisher if they published a video in network
    net$nodes <- net$nodes |> dplyr::left_join(
      dplyr::filter(net$edges, .data$edge_type == "self-loop") |>
        dplyr::select(.data$from, .data$edge_type) |> dplyr::distinct(.keep_all = TRUE),
      by = c("id" = "from")
    ) |>
      dplyr::mutate(
        node_type = dplyr::if_else(!is.na(.data$edge_type), "publisher", .data$node_type),
        edge_type = NULL
      )

    # add video title, description and publish time
    if (actorSubOnly == FALSE) {
      net$edges <- net$edges |> dplyr::left_join(dplyr::select(df_video, dplyr::starts_with("Video")),
                                      by = c("video_id" = "VideoID")) |>
        dplyr::rename(
          video_title = .data$VideoTitle,
          video_description = .data$VideoDescription,
          video_published_at = .data$VideoPublishedAt
        )
    }

    class(net) <- union(class(net), c("voson.video"))
    msg("Done.\n")

    net
  }

# make a single api request for video data for supplied video id's
# when id's are supplied the api request for videos does not support paging
GetVideoData <- function(youtubeAuth, videoIds) {
  video_data <- list()

  base_url_videos <- "https://www.googleapis.com/youtube/v3/videos"
  api_opts_videos <- list(
    part = "snippet",
    id = paste0(videoIds, collapse = ","),
    key = youtubeAuth$auth
  )

  req <- httr::GET(base_url_videos, query = api_opts_videos)
  res <- httr::content(req)

  if (req$status_code != 200) {
    msg(
      paste0(
        "\nStatus: ",
        req$status_code,
        "\nError: ",
        res$error$code,
        "\nDetail: ",
        res$error$message,
        "\n"
      )
    )
    return(tibble::tibble())
  } else {
    video_data <- c(video_data, res$items)
  }

  df_video <- lapply(video_data, function(x) {
    tibble::tibble(
      VideoID = x$id,
      VideoTitle = x$snippet$title,
      VideoDescription = x$snippet$description,
      VideoPublishedAt = x$snippet$publishedAt,
      ChannelID = x$snippet$channelId,
      ChannelTitle = x$snippet$channelTitle
    )
  })

  # df_video <- tibble::as_tibble(do.call("rbind", df_video))
  df_video <- dplyr::bind_rows(df_video)

  df_video
}
vosonlab/vosonSML documentation built on April 28, 2024, 6:26 a.m.