Nothing
#' get_caption
#' @title Obtaining YouTube video caption/subtitle in a tidy tibble form.
#' @aliases get_caption
#' @keywords get_caption
#' @description Use this function for downloading a desired YouTube video caption in a tidy tibble data_frame form and save it as an Excel file in your current working directory.
#' @export get_caption
#' @param url A string value for a single YouTube video link URL. A typical form should start with "https://www.youtube.com/watch?v=" followed by a unique video ID.
#' @param language two-character language code for the video URL. Set to "en" (English) by default. You can change this to fit with your needs (e.g., "ko" for Korean, "de" for German, etc.).
#' @param savexl A logical value for determining whether or not to save the obtained tidy YouTube caption data as an Excel file. The default is FALSE which does not save it as a file. If set to TRUE, a file named "YouTube_caption_`videoID`.xlsx" is saved in your specified directory (the default is your current working directory).
#' @param openxl A logical value for determining whether or not to open, if any, the saved YouTube_caption Excel file in your working directory. The default is FALSE. TRUE works only when the preceding argument (i.e., savexl) is set to TRUE.
#' @param path A character vector of full path names; the default corresponds to the working directory, \link[base]{getwd}. Tilde expansion (see \link[base]{path.expand}) is performed. Missing values will be ignored.
#' @details
#' See example below.
#' @return tibble (advanced data.frame) object for a YouTube video caption will be returned.
#' @examples
#' \donttest{
#' library(youtubecaption)
#' # Let's get the video caption out of Hadley Wickham's "You can't do data science in a GUI":
#' url <- "https://www.youtube.com/watch?v=cpbtcsGE0OA"
#' caption <- get_caption(url)
#' caption
#'
#' # Save the caption as an Excel file and open it right it away
#' ## Changing path to temp for the demonstration purpose only:
#' get_caption(url = url, savexl = TRUE, openxl = TRUE, path = tempdir())
#' }
#'
#' @author JooYoung Seo, \email{jooyoung@psu.edu}
#' @author Soyoung Choi, \email{sxc940@psu.edu}
#' @references \url{https://pypi.org/project/youtube-transcript-api/}
get_caption <-
function(url = NULL, language = "en", savexl = FALSE, openxl = FALSE, path = getwd()) { # Function starts:
if (is.null(url)) {
stop("Please pass the first argument (YouTube Video URL).")
} else {
envnm <- "R_youtube_caption"
tryCatch(
{
if (!(envnm %in% reticulate::conda_list()$name)) {
reticulate::conda_create(envnm, packages = c("python=3.7.3"), conda = "auto")
}
},
error = function(e) {
stop("Need to install Anaconda from https://www.anaconda.com/download/.")
},
finally = {
reticulate::use_condaenv(envnm, required = TRUE)
if (!reticulate::py_module_available("youtube_transcript_api")) {
reticulate::conda_install(envnm, packages = c("youtube-transcript-api"), pip = TRUE)
}
}
)
if (stringr::str_detect(url, "youtube[.]com/watch[?]v=")) {
vid <- unlist(stringr::str_split(url, "[?]v="))[2]
l <- reticulate::import("youtube_transcript_api")$YouTubeTranscriptApi$get_transcripts(video_ids = list(vid), languages = list(language))
caption_df <- l[[1]][[1]] %>%
purrr::map_dfr(~ tibble::as_tibble(.)) %>%
tibble::rowid_to_column("segment_id") %>%
dplyr::mutate(vid = vid)
if (savexl) {
file_name <- paste0(path, "/YT_caption_", vid, ".xlsx")
writexl::write_xlsx(caption_df, file_name)
}
tryCatch(
{
if (openxl) {
if (file.exists(file_name)) {
utils::browseURL(file_name)
}
}
},
error = function(e) {
warning("You have not saved the caption file yet. Use TRUE for 'savexl' (the second argument) in advance.")
},
finally = {
return(caption_df)
}
)
} else {
stop("Please make sure the provided URL is valid YouTube video link. Play/channel list is not acceptable.")
}
}
} # Function ends.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.