keep_chars <- function(x) gsub('([^[:alpha:]])', '', x)
srt_id <- function(x) x$id
srt_start <- function(x) sprintf("%02.0f:%02.0f:%02.0f,%03.0f",
x$start_hour,
x$start_min,
x$start_sec,
x$start_msec)
srt_stop <- function(x) sprintf("%02.0f:%02.0f:%02.0f,%03.0f",
x$stop_hour,
x$stop_min,
x$stop_sec,
x$stop_msec)
srt_text <- function(x) paste(x$text, collapse = '\\N')
srt_nlines <- function(x) length(x$text)
srt_length <- function(x) x$stop_secs - x$start_secs
srt_nchars <- function(x) {
only_chars <- keep_chars(x$text)
nchar(paste0(only_chars, collapse = ''))
}
srt_longestline <- function(x) {
only_chars <- keep_chars(x$text)
max(nchar(only_chars))
}
srt_ytlink <- function(x, yt_id) {
sprintf("https://youtu.be/%s?t=%d", yt_id,
floor(x$start_secs) - 2)
}
#' calculate common statistics for subs readability
#'
#' calculate common statistics for subs readability
#' @param s srt object produced by read_srt
#' @param yt_id YouTube id for link creation
#'
#' @export
srt_stats <- function(s, yt_id = '', add_checks = TRUE){
## id & stats
id <- unlist(lapply(s, srt_id))
text <- unlist(lapply(s, srt_text))
start <- unlist(lapply(s, srt_start))
stop <- unlist(lapply(s, srt_stop))
secs <- unlist(lapply(s, srt_length))
nlines <- unlist(lapply(s, srt_nlines))
nchars <- unlist(lapply(s, srt_nchars))
nchars_longest_line <- unlist(lapply(s, srt_longestline))
cps <- nchars / secs
link <- unlist(lapply(s, srt_ytlink, yt_id = yt_id))
## checks
too_many_lines <- nlines > 2
long_line <- nchars_longest_line > 42
high_cps <- cps > 30
nfails <- too_many_lines + long_line + high_cps
## results
rval <- data.frame(id ,
text ,
start ,
stop ,
secs ,
nchars ,
cps ,
nchars_longest_line ,
nlines ,
link ,
high_cps ,
long_line ,
too_many_lines ,
nfails)
ord <- with(rval, order(nfails, cps, nchars_longest_line, nlines,
decreasing = TRUE))
rval <- rval[ord, ]
rownames(rval) <- NULL
class(rval$link) <- 'hyperlink'
rval
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.