R/srt_stats.R

Defines functions srt_stats srt_ytlink srt_longestline srt_nchars srt_length srt_nlines srt_text srt_stop srt_start srt_id keep_chars

Documented in srt_stats

keep_chars <- function(x) gsub('([^[:alpha:]])', '', x)
srt_id     <- function(x) x$id
srt_start  <- function(x) sprintf("%02.0f:%02.0f:%02.0f,%03.0f",
                                  x$start_hour,
                                  x$start_min,
                                  x$start_sec,
                                  x$start_msec)
srt_stop  <- function(x) sprintf("%02.0f:%02.0f:%02.0f,%03.0f",
                                 x$stop_hour,
                                 x$stop_min, 
                                 x$stop_sec, 
                                 x$stop_msec)
srt_text   <- function(x) paste(x$text, collapse = '\\N')
srt_nlines <- function(x) length(x$text)
srt_length <- function(x) x$stop_secs - x$start_secs
srt_nchars <- function(x) {
    only_chars <- keep_chars(x$text)
    nchar(paste0(only_chars, collapse = ''))
}
srt_longestline <- function(x) {
    only_chars <- keep_chars(x$text)
    max(nchar(only_chars))
}
srt_ytlink <- function(x, yt_id) {
    sprintf("https://youtu.be/%s?t=%d", yt_id,
            floor(x$start_secs) - 2)
}
    

#' calculate common statistics for subs readability
#'
#' calculate common statistics for subs readability
#' @param s srt object produced by read_srt
#' @param yt_id YouTube id for link creation
#' 
#' @export
srt_stats <- function(s, yt_id = '', add_checks = TRUE){
    ## id & stats
    id           <- unlist(lapply(s, srt_id))
    text         <- unlist(lapply(s, srt_text))
    start       <- unlist(lapply(s, srt_start))
    stop       <- unlist(lapply(s, srt_stop))
    secs       <- unlist(lapply(s, srt_length))
    nlines       <- unlist(lapply(s, srt_nlines))
    nchars       <- unlist(lapply(s, srt_nchars))
    nchars_longest_line <- unlist(lapply(s, srt_longestline))
    cps          <- nchars / secs
    link <- unlist(lapply(s, srt_ytlink, yt_id = yt_id))
    ## checks
    too_many_lines <- nlines > 2
    long_line      <- nchars_longest_line > 42
    high_cps       <- cps > 30
    nfails         <- too_many_lines + long_line + high_cps
    ## results
    rval <- data.frame(id                  ,
                       text                ,
                       start               ,
                       stop                ,
                       secs                ,
                       nchars              ,
                       cps                 ,
                       nchars_longest_line ,
                       nlines              ,
                       link                ,
                       high_cps            ,      
                       long_line           ,     
                       too_many_lines      ,
                       nfails)
    
    ord <- with(rval, order(nfails, cps, nchars_longest_line, nlines,
                            decreasing = TRUE))
    rval <- rval[ord, ]
    rownames(rval) <- NULL
    class(rval$link) <- 'hyperlink'
    
    rval
}
lbraglia/lbav documentation built on March 26, 2021, 2:02 a.m.