R/videoListDownload.R

Defines functions videoListDownload

Documented in videoListDownload

#' Downloading by an URL seed
#' @description Download YouTube videos/audios by providing an URL seed,
#' which is any URL in a video list.
#' @param fileList the file name of video/audio list table. This table
#' is a csv file generated by \code{\link{videoListDownload}} function.
#' The columns of this table are 'rowNames', 'data.index',
#' 'data.video.title', 'fileName', and 'URL', in which 'URL' is most
#' important information.
#' @param path the path to save video/audio files.
#' @param bothVideoAudio logic, whether download both audio and video,
#' it should be FALSE if only downloading audio or video. The default
#' is \code{TRUE}.
#' @param priority the file format to download. The option can be
#' any one (or combination) of "mp4", "best", "audio only", "mp3",
#' "webm", the default
#' is c("mp4", "best", "audio only"), which means the downloader will
#' look for mp4 file first, if this file does not exist, then the
#' downloader will look for the best file marked by YouTube, then
#' look for only audio file.
#' @param sleepTime numeric, the time to pause to prevent YouTube blocking.
#' The default is 10 second
#' @param webmConvert either NULL (not convert), "mp4" or "mp3",
#' convert webm file format to "mp4" or "mp3". The default is NULL.
#' @import curl
#' @importFrom limma strsplit2
#' @import urltools
#' @import stringr
#' @import V8
#' @export
#' @examples {
#' \dontrun{
#' url0 = "https://www.youtube.com/watch?v=DejHQYAGb7Q&list=PLkDaE6sCZn6F6wUI9tvS_Gw1vaFAx6rd6"
#' # the folder to save downloaded files
#' folder = "/data/surfDrive/TutorialVideos/"
#'
#' # #### download video
#' newFolder = videoListDownload(urlSeed = url0, path = folder,
#'                               saveFileList = TRUE,
#'                               sleepTime = 5, maxDownload = 200,
#'                               bothVideoAudio = TRUE)
#'
#' # #### download audio
#' newFolder = videoListDownload(urlSeed = url0, path = folder,
#'                                saveFileList = TRUE,
#'                                sleepTime = 5, maxDownload = 200,
#'                                priority = c("audio only"),
#'                                bothVideoAudio = FALSE,
#'                                webmConvert = "mp3")
#' }
#' }

##### download a list of videos/audios if an URL seed provided (only for "Playlist" in YouTube)
videoListDownload = function(urlSeed,
                             path = "./",
                             saveFileList = TRUE,
                             sleepTime = 10,
                             maxDownload = 1000,
                             priority = c("audio only", "best", "mp4"),
                             bothVideoAudio = TRUE,
                             webmConvert = NULL) {
  # copyright: Weiyang Tao 2017-11-02
  # file list table
  tableList = videoListTable(
    urlSeed = urlSeed,
    path = path,
    saveFileList = saveFileList,
    sleepTime = sleepTime,
    maxDownload = maxDownload,
    priority = priority,
    bothVideoAudio = bothVideoAudio
  )
  orderTitle = tableList$fileTable
  folderName = tableList$folderName

  # download videos
  num = nrow(orderTitle)
  if (num > maxDownload) {
    message(num,
            " files in total, only downloading ",
            maxDownload,
            " file(s).")
    num = min(num, maxDownload)
  }
  videoListDownloadByTable(
    fileTable = orderTitle,
    path = folderName,
    bothVideoAudio = bothVideoAudio,
    priority = priority,
    id = 1:num,
    sleepTime = sleepTime
  )
  # file format converting
  if (!is.null(webmConvert)){
    if (webmConvert == "mp3") {
      audio2mp3(fileFormat = "webm",
                path = folderName,
                removeSource = FALSE)
    } else if (webmConvert == "mp4") {
      video2mp4(fileFormat = "webm",
                path = folderName,
                removeSource = FALSE)
    } else{
      warning("Unknown file format to convert.")
    }
  }
  return(folderName)
}
paodan/youtubeDownloader documentation built on Nov. 15, 2020, 9:48 p.m.