R/read_dv.R

Defines functions inspect `plays<-` plays print.summary.datavolleylist dvlist_summary print.summary.datavolley summary.datavolley dv_read

Documented in dvlist_summary dv_read inspect plays print.summary.datavolley print.summary.datavolleylist summary.datavolley

## main reader function

#' Read a DataVolley or VolleyStation file
#'
#' The `do_transliterate` option may be helpful when trying to work with multiple files from the same competition, since different text encodings may be used on different files. This can lead to e.g. multiple versions of the same team name. Transliterating can help avoid this, at the cost of losing e.g. diacriticals. Transliteration is applied after converting from the specified text encoding to UTF-8. Common encodings used with DataVolley files include "windows-1252" (western Europe), "windows-1250" (central Europe), "iso-8859-1" (western Europe and Americas), "iso-8859-2" (central/eastern Europe), "iso-8859-13" (Baltic languages)
#'
#' @references \url{http://www.dataproject.com/IT/en/Volleyball}
#' @param filename string: file name to read
#' @param insert_technical_timeouts logical or list: should we insert technical timeouts? If TRUE, technical timeouts are inserted at points 8 and 16 of sets 1--4 (for indoor files) or when the team scores sum to 21 in sets 1--2 (beach). Otherwise a two-element list can be supplied, giving the scores at which technical timeouts will be inserted for sets 1--4, and  set 5.
#' @param do_warn logical: should we issue warnings about the contents of the file as we read it?
#' @param extra_validation numeric: should we run some extra validation checks on the file? 0=no extra validation, 1=check only for major errors, 2=somewhat more extensive, 3=the most extra checking
#' @param validation_options list: additional options to pass to the validation step. See [dv_validate()] for details
#' @param do_transliterate logical: should we transliterate all text to ASCII? See details
#' @param encoding character: text encoding to use. Text is converted from this encoding to UTF-8. A vector of multiple encodings can be provided, and this function will attempt to choose the best. If encoding is "guess", the encoding will be guessed
#' @param date_format string: the expected date format (one of "ymd", "mdy", or "dmy") or "guess". If `date_format` is something other than "guess", that date format will be preferred where dates are ambiguous
#' @param surname_case string or function: should we change the case of player surnames? If `surname_case` is a string, valid values are "upper","lower","title", or "asis"; otherwise `surname_case` may be a function that will be applied to the player surname strings
#' @param skill_evaluation_decode function or string: if `skill_evaluation_decode` is a string, it can be either "default" (use the default DataVolley conventions for dvw or vsm files), "volleymetrics" (to follow the scouting conventions used by VolleyMetrics), "german" (same as "default" but with B/ and B= swapped), or "guess" (use volleymetrics if it looks like a VolleyMetrics file, otherwise default). If `skill_evaluation_decode` is a function, it should convert skill evaluation codes into meaningful phrases. See [skill_evaluation_decoder()]
#' @param custom_code_parser function: function to process any custom codes that might be present in the datavolley file. This function takes one input (the `datavolley` object) and should return a list with two named components: `plays` and `messages`
#' @param metadata_only logical: don't process the plays component of the file, just the match and player metadata
#' @param verbose logical: if TRUE, show progress
#' @param edited_meta list: if supplied, will be used in place of the metadata present in the file itself. This makes it possible to, for example, read a file, edit the metadata, and re-parse the file but using the modified metadata
#'
#' @return A named list with several elements. `meta` provides match metadata, `plays` is the main play-by-play data in the form of a data.frame. `raw` is the line-by-line content of the datavolley file. `messages` is a data.frame describing any inconsistencies found in the file.
#'
#' @seealso [skill_evaluation_decoder()], [dv_validate()]
#' @examples
#' \dontrun{
#'   ## to read the example file bundled with the package
#'   myfile <- dv_example_file()
#'   x <- dv_read(myfile, insert_technical_timeouts=FALSE)
#'   summary(x)
#'
#'   ## or to read your own file:
#'   x <- dv_read("c:/some/path/myfile.dvw", insert_technical_timeouts=FALSE)
#'
#'   ## Insert a technical timeout at point 12 in sets 1 to 4:
#'   x <- dv_read(myfile, insert_technical_timeouts=list(c(12),NULL))
#'
#'   ## to read a VolleyMetrics file
#'   x <- dv_read(myfile, skill_evaluation_decode = "volleymetrics")
#' }
#' @export
dv_read <- function(filename, insert_technical_timeouts = TRUE, do_warn = FALSE, do_transliterate = FALSE, encoding = "guess", date_format = "guess", extra_validation = 2, validation_options = list(), surname_case = "asis", skill_evaluation_decode = "default", custom_code_parser, metadata_only = FALSE, verbose = FALSE, edited_meta) {
    assert_that(is.string(filename))
    if (nchar(filename) < 1) stop("filename was specified as an empty string (\"\")")
    if (!file.exists(filename)) stop("specified input file (", filename, ") does not exist")
    assert_that(is.flag(insert_technical_timeouts) || is.list(insert_technical_timeouts))
    assert_that(is.flag(do_warn), !is.na(do_warn))
    assert_that(is.flag(do_transliterate), !is.na(do_transliterate))
    assert_that(is.flag(metadata_only), !is.na(metadata_only))
    assert_that(is.flag(verbose), !is.na(verbose))
    assert_that(is.string(date_format))
    date_format <- match.arg(tolower(date_format), c("guess", "dmy", "ymd", "mdy"))
    date_format_suggested <- NULL
    assert_that(is.numeric(extra_validation),extra_validation %in% 0:3)
    assert_that(is.list(validation_options))
    assert_that(is.string(surname_case) || is.function(surname_case))


    ft <- dv_file_type(filename) ## dvw, vsm, psvb, hxml
    if (ft == "psvb") {
        stop("for perana/vbstats files use `pv_read` from the peranavolley package")
    } else if (ft == "unknown") {
        ## treat as dvw
        ft <- "dvw"
    } else if (!ft %in% c("dvw", "hxml", "vsm")) {
        stop("unknown or unsupported file type")
    }
    if (ft == "dvw") dvlines <- stringi::stri_trans_general(readLines(filename, warn = FALSE, n = 100L), "latin-ascii")
    ## figure out the scouting conventions to follow
    if (is.string(skill_evaluation_decode)) {
        skill_evaluation_decode <- match.arg(tolower(skill_evaluation_decode), c("default", "volleymetrics", "guess", "german"))
        if (skill_evaluation_decode == "guess") {
            ## if file type is vsm or hxml, pass the guessing responsibility to the specific handler
            if (ft == "dvw") {
                ## otherwise for dvw, guess here
                is_vm <- tryCatch(any(grepl("volleymetric", dvlines, ignore.case = TRUE)), error = function(e) FALSE)
                skill_evaluation_decode <- if (is_vm) "volleymetrics" else "default"
            } else if (ft == "psvb") {
                skill_evaluation_decode <- "default"
            }
            if (ft == "dvw" && skill_evaluation_decode %eq% "volleymetrics" && is.null(date_format_suggested)) date_format_suggested <- "mdy"
        }
        if (ft == "dvw") skill_evaluation_decode <- skill_evaluation_decoder(style = skill_evaluation_decode)
    }
    ## pass non-dvw readers off to their dedicated read functions
    if (ft == "vsm") {
        return(dv_read_vsm(filename, skill_evaluation_decode = skill_evaluation_decode, insert_technical_timeouts = insert_technical_timeouts, do_transliterate = do_transliterate, extra_validation = extra_validation, validation_options = validation_options, verbose = verbose))
    } else if (ft == "hxml") {
        return(dv_read_hxml(filename, skill_evaluation_decode = skill_evaluation_decode, insert_technical_timeouts = insert_technical_timeouts, do_transliterate = do_transliterate, extra_validation = extra_validation, validation_options = validation_options, verbose = verbose))
    }
    assert_that(is.function(skill_evaluation_decode))
    if (is.null(date_format_suggested)) {
        ## check if the decoder function is the volleymetrics one
        is_vm <- tryCatch(identical(get("dtbl", envir = environment(skill_evaluation_decode)), get("dtbl", envir = environment(skill_evaluation_decoder(style = "volleymetrics")))), error = function(e) FALSE)
        if (is_vm) date_format_suggested <- "mdy"
    }
    ## note that we use a preferred date format of mdy for volleymetrics files here, but if a user has opened and re-saved the file with different date format, this will be wrong ...
    if (!missing(edited_meta)) {
        assert_that(is.list(edited_meta))
        ## test for names? c("match", "more", "result", "teams", "players_h", "players_v", "attacks", "sets", "match_id", "filename")
    }
    if (!missing(custom_code_parser)) assert_that(is.function(custom_code_parser) || is.null(custom_code_parser))
    ## don't do this unless FIVB rules change?
    ##    if (missing(insert_technical_timeouts)) warning("the current default value insert_technical_timeouts=TRUE will change to FALSE in a forthcoming release")
    out <- list()
    file_text <- readLines(filename, warn = FALSE) ## read raw lines in
    ## some files have a wacky string of 0x00 bytes at the start, which means that the first line is empty (should be "[3DATAVOLLEYSCOUT]")
    if (grepl("^$", file_text[1], useBytes = TRUE) && length(grep("[3MATCH]", file_text, useBytes = TRUE, fixed = TRUE)) == 1 &&
        length(grep("[3TEAMS]", file_text, useBytes = TRUE, fixed = TRUE)) == 1 && !any(grepl("[3DATAVOLLEYSCOUT]", file_text, useBytes = TRUE, fixed = TRUE))) {
        file_text[1] <- "[3DATAVOLLEYSCOUT]"
    }
    ## also check to see if the [3SCOUT] entry is the last one, if it is then we can't read the file anyway, it has no play-by-play data
    if (length(file_text) > 0 && grepl("[3SCOUT]", file_text[length(file_text)], fixed = TRUE, useBytes = TRUE) && !isTRUE(metadata_only)) {
        stop("file cannot be read, the [3SCOUT] section is empty (no plays have been scouted)")
    }
    if (length(encoding) < 1) encoding <- "guess"
    assert_that(is.character(encoding))
    are_guessing_encoding <- identical(tolower(encoding), "guess")
    if (length(encoding) > 1 || are_guessing_encoding) {
        ## test from [3MATCH] section to end of [3SETTERCALL] or [3ATTACKCOMBINATION]
        idx1 <- suppressWarnings(grep("[3MATCH]", file_text, fixed = TRUE)) + 1L
        idx2 <- suppressWarnings(grep("[3WINNINGSYMBOLS]", file_text, fixed = TRUE)) - 1L ## click and scout doesn't have this, nor does it have attack combos or setter calls
        ## fallback
        if (length(idx1) < 1 || is.na(idx1)) idx1 <- 10
        if (length(idx2) < 1 || is.na(idx2)) idx2 <- 90
        if (!isTRUE(idx2 > idx1)) { idx1 <- 10; idx2 <- 90; }
        ok <- FALSE
        enclist <- character()
        if (!are_guessing_encoding) {
            ## user has provided multiple encodings to try
            encodings_to_test <- encoding
        } else {
            likely_encodings <- detect_encodings(file_text)
            ## add common ones, in order of preference if tied in scores
            encodings_to_test <- tolower(c(likely_encodings$likely, likely_encodings$embedded_resolved, "windows-1252", "iso-8859-2", "windows-1250", "US-ASCII", "UTF-8", "windows-1251", "SHIFT-JIS", "CP932", "iso-8859-9", "iso-8859-4", "iso-8859-5", "iso-8859-7", "iso-8859-13", "iso-8859-16"))
            ## windows-1252 should be used in preference to "iso-8859-1", see https://en.wikipedia.org/wiki/ISO/IEC_8859-1
            ## windows-1250 is similar to 8859-2 but not identical
            ## additional notes from wikipedia https://en.wikipedia.org/wiki/ISO/IEC_8859
            ## iso-8859-3 is Maltese and Esperanto
            ## iso-8859-4 Estonian, Latvian, Lithuanian, Greenlandic, and Sami.
            ## iso-8859-5 Cyrillic alphabet, including Belarusian, Bulgarian, Macedonian, Russian, Serbian, and Ukrainian (partial)
            ## iso-8859-6 Covers the most common Arabic language characters. Does not support other languages using the Arabic script. Needs to be BiDi and cursive joining processed for display.
            ## iso-8859-7 modern Greek
            ## iso-8859-8 modern Hebrew
            ## iso-8859-9 Turkish (NB largely the same as ISO/IEC 8859-1, replacing the rarely used Icelandic letters with Turkish ones)
            ## iso-8859-10 A rearrangement of Latin-4. Considered more useful for Nordic languages. Baltic languages use Latin-4 more.
            ## iso-8859-11 Thai
            ## iso-8859-12 Latin/Devanagari but abandoned, possibly being reserved for ISCII Indian
            ## iso-8859-13 Baltic Rim
            ## iso-8859-14 Celtic languages such as Gaelic and the Breton language. Welsh letters correspond to the earlier (1994) ISO-IR-182.
            ## iso-8859-15 A revision of 8859-1 that removes some little-used symbols, replacing them with the euro sign € and the letters Š, š, Ž, ž, Œ, œ, and Ÿ, which completes the coverage of French, Finnish and Estonian.
            ## iso-8859-16 Albanian, Croatian, Hungarian, Italian, Polish, Romanian and Slovene, but also Finnish, French, German and Irish Gaelic (new orthography). The focus lies more on letters than symbols. The generic currency sign is replaced with the euro sign
            encodings_to_test <- unique(intersect(encodings_to_test, tolower(iconvlist())))
        }
        ## if it's a DV4 file with embedded UTF8 strings, try basing our guess on those first
        ## there's some ambiguity about how much we care about text encodings if we have a DV4 file, because most of the text should be available in the utf8-encoded sections (which we should be able to decode regardless of what encoding was used to read the file) and so it doesn't matter if we read the file with the wrong encoding. BUT we won't get e.g. attack and set code descriptions, because these aren't provided as utf8-encoded copies. So we still want to get the encoding right. But (i) it's possible that the player names etc are in plain ASCII and so the embedded utf8 sections don't give us any hints about the broader file encoding, and (ii) it's presumably possible that the file has been read and re-saved (in some other non-DV4-software, maybe DV2007) with a different encoding to that used by DV4. The other software won't update the embedded utf8 sections, so if we guess our encoding based on the embedded utf8 it will be wrong. But it's difficult to see how we can cope with that
        if (any_dv_utf8(file_text)) {
            chk <- enc_from_embedded_utf8(file_text, encodings_to_test = encodings_to_test)
            if (length(chk$encodings) > 0 && chk$error_score < 1) {
                enclist <- chk$encodings
                ok <- TRUE
            }
        }
        ## if this doesn't give an unambiguous answer, it probably indicates that the file was text-edited or modified by other software, or that the embedded utf8 encodings are equivalent to plain ASCII so we don't get any info from them
        ## if there is no embedded UTF8 (file from earlier DV version), or if it does not give an unambiguous answer, try the embedded encoding BUT only if it appears in the list suggested by detect_encodings
        if (!ok && are_guessing_encoding && length(likely_encodings$embedded_resolved) == 1 && likely_encodings$embedded_resolved %in% likely_encodings$likely) {
            try({
                out <- dv_read(filename = filename, insert_technical_timeouts = insert_technical_timeouts, do_warn = do_warn, do_transliterate = do_transliterate, encoding = likely_encodings$embedded_resolved, date_format = date_format, extra_validation = extra_validation, validation_options = validation_options, surname_case = surname_case, skill_evaluation_decode = skill_evaluation_decode, custom_code_parser = custom_code_parser, metadata_only = metadata_only, verbose = verbose, edited_meta = edited_meta)
                ## TODO: perhaps check that this actually worked, because there are files with the wrong encoding specified in their metadata. But with luck this situation should be uncommon, because an incorrect embedded encoding hopefully won't appear in the list of suggested encodings
                ## some files also appear to use different encodings for the home/visiting player lists, but it is going to be very difficult to handle the possibility of different encodings in different parts of the file
                if (verbose) message("Using text encoding: ", likely_encodings$embedded_resolved)
                return(out)
            }, silent = TRUE)
        }
        ## and finally just test the encodings, either those provided by the user or the guessed list
        if (!ok) {
            expect_tildes <- tryCatch(!any(grepl("PRG: Essential Stats", dvlines)), error = function(e) FALSE)
            chk <- get_best_encodings(encodings_to_test, expect_tildes = expect_tildes, file_text = file_text, read_from = idx1, read_to = idx2) ## filename = filename
            if (!isTRUE(chk$error_score < 1) && are_guessing_encoding) {
                ## haven't found an encoding with zero error score, but we have relied on stri_enc_detect
                ## now just brute force it over all possible encodings (will be slow)
                encoding_brute <- get_best_encodings(iconvlist(), expect_tildes = expect_tildes, file_text = file_text, read_from = idx1, read_to = idx2) ## filename = filename
                if (length(encoding_brute$encodings) > 0 && encoding_brute$error_score < chk$error_score) chk <- encoding_brute
            }
            enclist <- chk$encodings
        }
        if (length(enclist) < 1) enclist <- "windows-1251" ## absolute fallback
        ## so now we have a list of possible encodings
        ## in order of preference: a windows encoding, then UTF-8, then US-ASCII, then CP, then just whatever was first
        other_enc <- enclist
        if (any(grepl("^windows",tolower(enclist)))) {
            encoding <- enclist[grepl("^windows",tolower(enclist))][1]
        } else if (any(tolower(enclist) %in% c("utf-8", "utf8"))) {
            encoding <- enclist[tolower(enclist) %in% c("utf-8", "utf8")][1]
        } else if (any(tolower(enclist) %in% c("us-ascii"))) {
            encoding <- enclist[tolower(enclist) %in% c("us-ascii")][1]
        } else if (any(grepl("^cp[[:digit:]\\-]",tolower(enclist)))) {
            encoding <- enclist[grepl("^cp[[:digit:]\\-]",tolower(enclist))][1]
        } else {
            encoding <- enclist[1]
        }
        other_enc <- setdiff(other_enc, encoding)
        if (verbose) {
            message(sprintf("Using text encoding: %s", encoding))
            if (length(other_enc) > 0)
                message(sprintf(" (Other possible options: %s)", paste(other_enc, collapse = ", ")))
        }
    }
    file_text <- txt2utf8(file_text, from = encoding) ## convert from our detected encoding to utf-8
    ## get rid of the "Secondo tocco di  la" with encoding on the trailing a, which is part of the default dv-generated file structure
    file_text <- sub("Secondo tocco di[[:space:]]+l.;", "Secondo tocco di la;", file_text)

    ## so we got to here, either by reading the file, or using the supplied file_text
    out$raw <- file_text

    if (do_transliterate) file_text <- stri_trans_general(file_text, "latin-ascii")

    ## file metadata
    if (!do_warn) {
        suppressWarnings(temp <- read_filemeta(file_text, date_format = if (date_format == "guess") date_format_suggested else date_format))
    } else {
        temp <- read_filemeta(file_text, date_format = if (date_format == "guess") date_format_suggested else date_format)
    }
    ## if the last-change date was unambiguous, the match date should follow this (it doesn't always, but it's often true)
    ## at this point: date_format is the date_format specified by the user (or default "guess")
    ## date_format_suggested is either NULL or "mdy" if we have suggested the volleymetrics format
    date_format <- c(setdiff(date_format, "guess"), temp$lastchange_date_format, date_format_suggested) ## in order of preference, some of those might be NULL
    if (length(date_format) < 1) date_format <- NULL
    ## insert preferred date_format so it can be used if this object gets passed to dv_write
    ##  this isn't the ideal solution, because they actual date format in the file might not actually follow the preferred date_format
    ##  but it's a start
    temp$file_meta$preferred_date_format <- if (length(date_format) > 0) date_format[1] else NULL
    out$file_meta <- temp$file_meta
    out$messages <- temp$messages
    if (is.null(out$messages)) out$messages <- data.frame(file_line_number=integer(), video_time=numeric(), message=character(), file_line=character(), stringsAsFactors=FALSE)
    ## match metadata
    if (missing(edited_meta)) {
        if (!do_warn) {
            suppressWarnings(temp <- read_meta(file_text, surname_case, date_format = date_format))
        } else {
            temp <- read_meta(file_text, surname_case, date_format = date_format)
        }
        out$meta <- temp$meta
        out$meta$filename <- filename
    } else {
        out$meta <- edited_meta
    }
    file_type <- "indoor"
    if ((nrow(out$meta$players_h) == 2 && nrow(out$meta$players_h) == 2) || (!is.null(out$meta$match$regulation) && grepl("beach", out$meta$match$regulation))) {
        ## assume is a beach volley file
        file_type <- "beach"
    }
    out$file_meta$file_type <- file_type
    if (!is.null(temp$messages) && nrow(temp$messages)>0) {
        temp$messages$file_line <- as.character(temp$messages$file_line)
        out$messages <- bind_rows(out$messages, temp$messages)
    }
    if (metadata_only) return(out)
    this_main <- NULL
    thismsg <- NULL
    tryCatch({
        ## try using the already-read file text first, because it has been read with the proper encoding
        try({
            idx <- grep("[3SCOUT]", file_text, fixed = TRUE)
            if (length(idx) == 1 && idx < length(file_text)) {
                tmp <- file_text[idx:length(file_text)]
                if (!do_warn) {
                    suppressWarnings(this_main <- read_main(file_text = tmp))
                } else {
                    this_main <- read_main(file_text = tmp)
                }
            }
        }, silent = TRUE)
        if (is.null(this_main)) {
            ## don't need to pass encoding here, because we are only reading the [3SCOUT] section
            if (!do_warn) {
                this_main <- suppressWarnings(read_main(filename))
            } else {
                this_main <- read_main(filename)
            }
        }
    }, error=function(e) {
        ## if file ends with 0x00, fread from the file will fail
        ## but already have the file contents as file_text, so use that
        if (grepl("Expected sep (';') but new line, EOF (or other non printing character) ends field 0", e$message, fixed = TRUE)) {
            thismsg <<- data.frame(file_line_number = length(file_text), video_time = NA_real_, message = "File ends with null line", file_line = "", severity = 3, stringsAsFactors = FALSE)
            idx <- grep("[3SCOUT]", file_text, fixed = TRUE)
            if (length(idx) == 1 && idx < length(file_text)) {
                tmp <- file_text[idx:length(file_text)]
                if (!do_warn) {
                    suppressWarnings(this_main <<- read_main(file_text = tmp))
                } else {
                    this_main <<- read_main(file_text = tmp)
                }
            } else {
                stop("could not read file (error message was: ", e$message, ")")
            }
        } else {
            stop("could not read file (error message was: ", e$message, ")")
        }
    })
    if (is.null(this_main)) stop("could not read dv file (unspecified error)")
    ##if (!is.null(thismsg)) mymsgs <- bind_rows(mymsgs,thismsg)
    ## don't actually issue this warning, for now at least

    ## count line numbers: where do codes start from?
    suppressWarnings(cln <- grep("[3SCOUT]",file_text,fixed=TRUE))
    if (length(cln)==1) {
        cln <- (cln+1):length(file_text)
        ## file_text may have empty lines at end, which won't show up in codes from read_main
        cln <- cln[1:min(length(cln),length(this_main$code))]
    } else {
        cln <- NULL
    }
    temp <- parse_code(code = this_main$code, meta = out$meta, evaluation_decoder = skill_evaluation_decode, code_line_num = cln, full_lines = if (is.null(cln)) NULL else file_text[cln], file_type = file_type)
    out$plays <- temp$plays
    if (!is.null(temp$messages) && nrow(temp$messages)>0) {
        temp$messages$file_line <- as.character(temp$messages$file_line)
        out$messages <- bind_rows(out$messages, temp$messages)
    }
    ## post-process plays data
    ##add the recognised columns from main to plays (note that we are discarding a few columns from main here)
    team_player_num <- if (grepl("beach", file_type)) 1:2 else 1:6
    out$plays <- cbind(this_main[, c("time", "video_file_number", "video_time")], out$plays, this_main[, c(paste0("home_p", team_player_num), paste0("visiting_p", team_player_num), "start_coordinate", "mid_coordinate", "end_coordinate", "point_phase", "attack_phase")])
    ## tidy up coordinates, and rescale to match zones and our ggcourt dimensions
    cxy <- dv_index2xy() ## grid of coords in our ggcourt space
    temp <- out$plays$start_coordinate
    ##temp[temp %in% c("-1-1", "")] <- NA_real_
    temp[grepl("-", temp, fixed = TRUE) | !nzchar(temp)] <- NA_real_ ## missing is usually "-1-1", but in some cases (DV bug?) can be e.g. "80-1", so turf out anything with "-" (or empty string)
    temp <- as.numeric(temp)
    temp[temp<1] <- NA_real_
    out$plays$start_coordinate <- temp
    out$plays$start_coordinate_x <- cxy[out$plays$start_coordinate, 1]
    out$plays$start_coordinate_y <- cxy[out$plays$start_coordinate, 2]
    temp <- out$plays$mid_coordinate
    temp[grepl("-", temp, fixed = TRUE) | !nzchar(temp)] <- NA_real_
    temp <- as.numeric(temp)
    temp[temp<1] <- NA_real_
    out$plays$mid_coordinate <- temp
    out$plays$mid_coordinate_x <- cxy[out$plays$mid_coordinate, 1]
    out$plays$mid_coordinate_y <- cxy[out$plays$mid_coordinate, 2]
    temp <- out$plays$end_coordinate
    temp[grepl("-", temp, fixed = TRUE) | !nzchar(temp)] <- NA_real_
    temp <- as.numeric(temp)
    temp[temp<1] <- NA_real_
    out$plays$end_coordinate <- temp
    out$plays$end_coordinate_x <- cxy[out$plays$end_coordinate, 1]
    out$plays$end_coordinate_y <- cxy[out$plays$end_coordinate, 2]
    ## add player_id values for home_p1 etc
    for (thisp in team_player_num)
        out$plays[,paste0("home_player_id",thisp)] <- get_player_id(rep("*",nrow(out$plays)),out$plays[,paste0("home_p",thisp)],out$meta)
    for (thisp in team_player_num)
        out$plays[,paste0("visiting_player_id",thisp)] <- get_player_id(rep("a",nrow(out$plays)),out$plays[,paste0("visiting_p",thisp)],out$meta)

    ## add set number
    if (!any(out$plays$end_of_set)) {
        out$messages <- bind_rows(out$messages, data.frame(file_line_number = NA_integer_, video_time = NA_real_, message = "Could not find any end-of-set markers (e.g. \"**1set\") in the input file", file_line = NA_character_, stringsAsFactors = FALSE))
        out$plays$set_number <- 1L
    } else {
        temp <- c(0L, which(out$plays$end_of_set))
        out$plays$set_number <- sapply(seq_len(nrow(out$plays)), function(z) sum(z >= temp))
        ## now fix the last **Xset entry in the file
        if (tail(temp, 1) == nrow(out$plays)) out$plays$set_number[nrow(out$plays)] <- out$plays$set_number[nrow(out$plays)] - 1L
        out$plays$home_team_score[which(out$plays$end_of_set)] <- NA_integer_
        out$plays$visiting_team_score[which(out$plays$end_of_set)] <- NA_integer_
    }

    ## technical timeouts
    if (is.flag(insert_technical_timeouts)) {
        if (insert_technical_timeouts) {
            insert_technical_timeouts <- if (grepl("beach", file_type)) list(function(s1, s2) (s1 + s2) == 21, NULL) else list(c(8,16),NULL)
        } else {
            insert_technical_timeouts <- list(NULL,NULL)
        }
    }
    set_sets <- if (grepl("beach", file_type)) list(1:2, 3) else list(1:4, 5)
    for (si in 1:2) {
        if (!is.null(insert_technical_timeouts[[si]])) {
            if (is.numeric(insert_technical_timeouts[[si]])) {
                ## find technical timeouts at e.g. points 8 and 16 in sets 1-4
                for (this_set in set_sets[[si]]) {
                    for (thisp in insert_technical_timeouts[[si]]) {
                        idx <- which((out$plays$home_team_score==thisp | out$plays$visiting_team_score==thisp) & out$plays$set_number==this_set)
                        if (length(idx) > 0) {
                            idx <- idx[1]
                            out$plays <- bind_rows(out$plays[seq_len(idx), ],
                                                   data.frame(skill = "Technical timeout", custom_code = "", timeout = TRUE, set_number = this_set, point = FALSE, end_of_set = FALSE, substitution = FALSE),
                                                   out$plays[(idx+1):nrow(out$plays), ])
                        }
                    }
                }
            } else if (is.function(insert_technical_timeouts[[si]])) {
                ## function that is TRUE when a TTO should occur
                ## note this only allows one such TTO per set
                for (this_set in set_sets[[si]]) {
                    idx <- which(insert_technical_timeouts[[si]](out$plays$home_team_score, out$plays$visiting_team_score) & out$plays$set_number==this_set)
                    if (length(idx) > 0) {
                        idx <- idx[1]
                        out$plays <- bind_rows(out$plays[seq_len(idx), ],
                                               data.frame(skill = "Technical timeout", custom_code = "", timeout = TRUE, set_number = this_set, point = FALSE, end_of_set = FALSE, substitution = FALSE),
                                               out$plays[(idx+1):nrow(out$plays), ])
                    }
                }
            }
        }
    }

    ## add match_id
    out$plays$match_id <- out$meta$match_id

    ## turn plays times (character) into POSIXct
    temp <- paste(format(as.Date(out$meta$match$date)),out$plays$time,sep=" ")
    temp[out$plays$time=="" | is.na(out$plays$time)] <- NA
    suppressWarnings(out$plays$time <- lubridate::ymd_hms(temp))
    ##if (all(is.na(out$plays$time))) {
    ##    ## maybe match date failed to parse? try
    ##    suppressWarnings(out$plays$time <- lubridate::hms(temp))
    ## except this gives objects of class period, not POSIXct - hold off on this for now
    ##}

    ## add point_id - an identifier of each point. One point may consist of multiple attacks or other actions. Timeouts get assigned to their own "point", but other non-play rows may get assigned as part of a point.
    pid <- 0
    temp_point_id <- rep(NA, nrow(out$plays))
    temp_point_id[1] <- pid
    temp_point <- out$plays$point
    temp_timeout <- out$plays$timeout
    temp_setterpos <- grepl("^[a\\*]z", out$plays$code)
    temp_end_of_set <- grepl("^\\*\\*[[:digit:]]set", out$plays$code, ignore.case = TRUE)
    sideout_scoring <- grepl("sideout", out$meta$match$regulation)
    for (k in seq_len(nrow(out$plays))[-1]) {
        if (isTRUE(temp_point[k-1] || temp_timeout[k] || temp_timeout[k-1])) { ## timeout[k-1] otherwise the following play does not start with a new point_id
            pid <- pid + 1
        } else if (isTRUE(sideout_scoring && temp_setterpos[k])) {
            ## in sideout scoring, we won't see points assigned unless the team is serving, but we still see the *z or az code
            pid <- pid + 1
        }
        temp_point_id[k] <- pid
    }
    out$plays$point_id <- temp_point_id
    ## fill in setter position
    temp_eos <- out$plays$end_of_set
    eos_idx <- which(temp_eos)
    out$plays$home_setter_position[eos_idx] <- NA_integer_
    out$plays$visiting_setter_position[eos_idx] <- NA_integer_
    temp_home_setter_position <- out$plays$home_setter_position
    temp_visiting_setter_position <- out$plays$visiting_setter_position
    hsp <- temp_home_setter_position[1]
    vsp <- temp_visiting_setter_position[1]
    for (k in 2:nrow(out$plays)) {
        if (isTRUE(temp_eos[k])) {
            ## start of new set, don't let current setter positions flow from last set
            hsp <- NA_integer_
            vsp <- NA_integer_
        }
        if (!is.na(temp_home_setter_position[k]))
            hsp <- temp_home_setter_position[k]
        temp_home_setter_position[k] <- hsp
        if (!is.na(temp_visiting_setter_position[k]))
            vsp <- temp_visiting_setter_position[k]
        temp_visiting_setter_position[k] <- vsp
    }
    out$plays$home_setter_position <- temp_home_setter_position
    out$plays$visiting_setter_position <- temp_visiting_setter_position

    ## add team_touch_id - an identifier of consecutive touches by same team in same point - e.g. a dig-set-attack sequence by one team is a "team touch"
    tid <- 0
    temp_ttid <- rep(NA, nrow(out$plays))
    temp_ttid[1] <- tid
    temp_team <- out$plays$team
    temp_ptid <- out$plays$point_id
    temp_skill <- out$plays$skill
    ## keep track of attacks - if a file has been scouted with only attacks, we need to account for that
    had_attack <- length(temp_skill) > 0 && temp_skill[1] %eq% "Attack" ## had_attack = had an attack already in this touch sequence
    for (k in seq_len(nrow(out$plays))[-1]) {
        if (!identical(temp_team[k], temp_team[k-1]) || !identical(temp_ptid[k], temp_ptid[k-1]) || (temp_skill[k] %eq% "Attack" && had_attack))  {
            tid <- tid+1
        }
        temp_ttid[k] <- tid
        had_attack <- temp_skill[k] %eq% "Attack"
    }
    out$plays$team_touch_id <- temp_ttid

    ## team name and ID
    idx <- out$meta$teams$home_away_team %eq% "*"
    home_team <- out$meta$teams$team[idx]
    home_team_id <- out$meta$teams$team_id[idx]
    idx <- out$meta$teams$home_away_team %eq% "a"
    visiting_team <- out$meta$teams$team[idx]
    visiting_team_id <- out$meta$teams$team_id[idx]
    ## replace * and a with team name
    temp <- temp_id <- rep(NA_character_, nrow(out$plays))
    idx <- out$plays$team %eq% "*"
    temp[idx] <- home_team
    temp_id[idx] <- home_team_id
    idx <- out$plays$team %eq% "a"
    temp[idx] <- visiting_team
    temp_id[idx] <- visiting_team_id
    out$plays$home_team <- home_team
    out$plays$visiting_team <- visiting_team
    out$plays$home_team_id <- home_team_id
    out$plays$visiting_team_id <- visiting_team_id
    out$plays$team <- temp
    out$plays$team_id <- temp_id

    ## keep track of who won each point
    temp <- as.data.frame(setNames(unique(out$plays[which(out$plays$point), c("point_id", "team")]), c("point_id", "point_won_by")))
    out$plays <- left_join(out$plays, temp, by = "point_id")
    if (grepl("sideout", out$meta$match$regulation)) {
        ## points aren't assigned when the winning team was receiving, so $point can't be used like it is with rally point scoring
        exactly_1 <- function(z) if (length(z) != 1) as(NA, class(z)) else z
        temp_ws <- out$meta$winning_symbols %>% mutate(skill = case_when(.data$skill == "S" ~ "Serve",
                                                                         .data$skill == "R" ~ "Reception",
                                                                         .data$skill == "A" ~ "Attack",
                                                                         .data$skill == "B" ~ "Block",
                                                                         .data$skill == "D" ~ "Dig",
                                                                         .data$skill == "E" ~ "Set",
                                                                         .data$skill == "F" ~ "Freeball"))
        out$plays <- out$plays %>% mutate(other_team = case_when(.data$team == .data$home_team ~ .data$visiting_team,
                                                            .data$team == .data$visiting_team ~ .data$home_team)) %>%
            left_join(temp_ws, by = c("skill", evaluation_code = "code")) %>%
            group_by(.data$point_id) %>% mutate(point_won_by = case_when(!is.na(.data$point_won_by[1]) ~ .data$point_won_by[1],
                                                                         any(.data$code %in% c("*$$&H#", "a$$&H=")) ~ .data$home_team[1],
                                                                         any(.data$code %in% c("a$$&H#", "*$$&H=")) ~ .data$visiting_team[1],
                                                                         sum(.data$win_lose == "W", na.rm = TRUE) == 1 ~ exactly_1(.data$team[which(.data$win_lose == "W")]),
                                                                         sum(.data$win_lose == "L", na.rm = TRUE) == 1 ~ exactly_1(.data$team[which(.data$win_lose == "L")])
                                                                         )) %>% ungroup %>%
            dplyr::select(-"win_lose", -"other_team") %>% as.data.frame
    }
    ## catch any that we missed
    ##dud_point_id <- unique(out$plays$point_id[is.na(out$plays$point_won_by) & !out$plays$skill %in% c(NA,"Timeout","Technical timeout")])
    ##for (dpi in dud_point_id) {
    ##    tail(na.omit(out$plays[out$plays$point_id<dpi,c("home_team_score","visiting_team_score","point_won_by")]),1)
    ##    head(na.omit(out$plays[out$plays$point_id>dpi,c("home_team_score","visiting_team_score","point_won_by")]),1)
    ##}
#### not sure how to deal with these!

    ## winning attacks
    ## A followed by D with "Error" evaluation, or A with "Winning attack" evaluation
    temp1 <- out$plays[-nrow(out$plays),]
    temp2 <- out$plays[-1,]
    out$plays$winning_attack <- FALSE
    out$plays$winning_attack[1:(nrow(out$plays)-1)] <- temp1$skill=="Attack" & (temp1$evaluation=="Winning attack" | ((temp2$skill=="Dig" | temp2$skill=="Block") & temp2$evaluation=="Error"))
    ## note the block error is not actually needed there, since such attacks are recorded as winning ones anyway
    out$plays$winning_attack[is.na(out$plays$winning_attack)] <- FALSE
    ## fill in scores, so that all lines have a score
    scores <- unique(na.omit(out$plays[, c("point_id", "home_team_score", "visiting_team_score")]))
    scores <- left_join(out$plays[, "point_id", drop = FALSE], scores, by = "point_id")
    ## double-check
    if (any(na.omit(out$plays$home_team_score-scores$home_team_score) != 0) | any(na.omit(out$plays$visiting_team_score-scores$visiting_team_score) != 0)) stop("error in scores")

    temp_home_team_score <- scores$home_team_score
    temp_visiting_team_score <- scores$visiting_team_score
    ## will still have NA scores for timeouts and technical timeouts, patch NAs where we can
    temp_pt <- out$plays$point
    for (k in seq_len(nrow(out$plays))[-1]) {
        if (grepl("^\\*\\*[[:digit:]]set", out$plays$code[k], ignore.case = TRUE)) {
            ## make end-of-set row have the same as the preceding scores
            if (is.na(temp_home_team_score[k])) temp_home_team_score[k] <- temp_home_team_score[k-1]
            if (is.na(temp_visiting_team_score[k])) temp_visiting_team_score[k] <- temp_visiting_team_score[k-1]
        } else {
            if (is.na(temp_home_team_score[k]) & !temp_pt[k]) {
                temp_home_team_score[k] <- temp_home_team_score[k-1]
            }
            if (is.na(temp_visiting_team_score[k]) & !temp_pt[k]) {
                temp_visiting_team_score[k] <- temp_visiting_team_score[k-1]
            }
        }
    }
    out$plays$home_team_score <- temp_home_team_score
    out$plays$visiting_team_score <- temp_visiting_team_score

    ##out$plays$home_team_score <- scores$home_team_score
    ##out$plays$visiting_team_score <- scores$visiting_team_score
    #### will still have NA scores for timeouts and technical timeouts, patch NAs where we can
    ##for (k in 2:nrow(out$plays)) {
    ##    if (is.na(out$plays$home_team_score[k]) & !out$plays$point[k]) {
    ##        out$plays$home_team_score[k] <- out$plays$home_team_score[k-1]
    ##    }
    ##    if (is.na(out$plays$visiting_team_score[k]) & !out$plays$point[k]) {
    ##        out$plays$visiting_team_score[k] <- out$plays$visiting_team_score[k-1]
    ##    }
    ##}

    ## enforce some columns to be integer
    ints <- intersect(names(out$plays), c("player_number", "start_zone", "end_zone", "end_cone", "home_team_score", "visiting_team_score", "home_setter_position", "visiting_setter_position", "set_number"))
    for (i in ints) out$plays[,i] <- as.integer(out$plays[,i])

    ## add serving_team info
    who_served <- unique(out$plays[out$plays$skill %eq% "Serve", c("match_id", "point_id", "team")])
    who_served <- setNames(as.data.frame(who_served[!duplicated(who_served$point_id), ]), c("match_id", "point_id", "serving_team"))

    out$plays <- left_join(out$plays, dplyr::distinct(who_served, .data$match_id, .data$point_id, .keep_all = TRUE), by = c("match_id", "point_id"))
    out$plays$serving_team <- as.character(out$plays$serving_team) ## to be sure is not factor

    class(out) <- c("datavolley",class(out))
    class(out$plays) <- c("datavolleyplays",class(out$plays))

    ## add play phase
    out$plays$phase <- play_phase(out$plays)

    ## add scores at START of point
    out$plays$home_score_start_of_point <- pmax(ifelse(out$plays$point_won_by %eq% out$plays$home_team, as.integer(out$plays$home_team_score - 1L), as.integer(out$plays$home_team_score)), 0L)
    out$plays$visiting_score_start_of_point <- pmax(ifelse(out$plays$point_won_by %eq% out$plays$visiting_team, as.integer(out$plays$visiting_team_score - 1L), as.integer(out$plays$visiting_team_score)), 0L)

    ## now call custom code parser, if it was provided
    if (!missing(custom_code_parser) && !is.null(custom_code_parser)) {
        cx <- custom_code_parser(out)
        out$plays <- cx$plays
        if (!is.null(cx$messages) && nrow(cx$messages)>0) {
            cx$messages$file_line <- as.character(cx$messages$file_line)
            out$messages <- bind_rows(out$messages,cx$messages)
        }
    }

    ## ensure column ordering
    cls <- c("match_id", "point_id", "time", "video_file_number", "video_time", "code", "team", "player_number", "player_name", "player_id", "skill", "skill_type", "evaluation_code", "evaluation", "attack_code", "attack_description", "set_code", "set_description", "set_type", "start_zone", "end_zone", "end_subzone", "end_cone", "skill_subtype", "num_players", "num_players_numeric", "special_code", "timeout", "end_of_set", "substitution", "point", "home_team_score", "visiting_team_score", "home_setter_position", "visiting_setter_position", "custom_code", "file_line_number", paste0("home_p", 1:6), paste0("visiting_p", 1:6), "start_coordinate", "mid_coordinate", "end_coordinate", "point_phase", "attack_phase", "start_coordinate_x", "start_coordinate_y", "mid_coordinate_x", "mid_coordinate_y", "end_coordinate_x", "end_coordinate_y", paste0("home_player_id", 1:6), paste0("visiting_player_id", 1:6), "set_number", "team_touch_id", "home_team", "visiting_team", "home_team_id", "visiting_team_id", "team_id", "point_won_by", "winning_attack", "serving_team", "phase", "home_score_start_of_point", "visiting_score_start_of_point")
    out$plays <- out$plays[, c(intersect(cls, names(out$plays)), setdiff(names(out$plays), cls))]

    out$messages <- out$messages[,setdiff(names(out$messages),"severity")]
    ## apply additional validation
    if (extra_validation > 0) {
        moreval <- dv_validate(out, validation_level = extra_validation, options = validation_options, file_type = file_type)
        if (!is.null(moreval) && nrow(moreval) > 0) {
            moreval$file_line <- as.character(moreval$file_line)
            out$messages <- bind_rows(out$messages, moreval)
        }
    }
    if (is.null(out$messages)) out$messages <- data.frame(file_line_number=integer(), video_time=numeric(), message=character(), file_line=character(), stringsAsFactors=FALSE) ## should not happen, but just to be sure
    if (!is.null(out$messages) && nrow(out$messages)>0) {
        out$messages$file_line_number <- as.integer(out$messages$file_line_number)
        out$messages <- out$messages[order(out$messages$file_line_number, na.last = FALSE),]
        row.names(out$messages) <- NULL
    }

    ## some final fixes
    ## VS-exported dvw's can have an incorrect end location on reception if serve does not have an end location, but the set has a location
    idx <- out$plays$skill == "Reception" & lag(out$plays$skill) == "Serve" & lead(out$plays$skill) == "Set" & !is.na(out$plays$end_zone) & is.na(lag(out$plays$end_zone)) & out$plays$end_zone == lead(out$plays$end_zone)
    if (any(idx, na.rm = TRUE)) {
        out$plays <- mutate(out$plays, end_zone = case_when(idx ~ NA_integer_, TRUE ~ .data$end_zone),
                            end_subzone = case_when(idx ~ NA_character_, TRUE ~ .data$end_subzone))
        ## and remove the end zone/subzone from the codes on those rows
        temp <- out$plays$code[which(idx)]
        try({
            substr(temp, 11, 12) <- "~~"
            out$plays$code[which(idx)] <- sub("~+$", "", temp)
        })
    }
    if (do_warn) {
        ## spit the messages out
        for (k in 1:nrow(out$messages)) {
            vt <- if (is.na(out$messages$video_time[k])) "" else paste0(" (video time ",out$messages$video_time[k],")")
            cat(paste0("line ",out$messages$file_line_number[k],vt,": ",out$messages$message[k]," (line in file is: \"",out$messages$file_line[k],"\")"),"\n")
        }
    }
    out
}

#' @rdname dv_read
#' @export
read_dv <- dv_read

#' A simple summary of a volleyball match
#'
#' @param object datavolley: datavolley object as returned by \code{dv_read}
#' @param ... : additional arguments (currently these have no effect)
#'
#' @return list of summary items
#'
#' @seealso \code{\link{dv_read}}
#' @examples
#' x <- dv_read(dv_example_file(), insert_technical_timeouts=FALSE)
#' summary(x)
#'
#' @method summary datavolley
#' @export
summary.datavolley <- function(object,...) {
    out <- list(date=object$meta$match$date,league=object$meta$match$league)
    out$teams <- object$meta$teams[,c("team","coach","assistant","sets_won")] ## data.frame(team=object$meta$teams$team,coach=object$meta$teams$coach,assistant=object$meta$teams$assistant,sets_won=object$meta$teams$sets_won,stringsAsFactors=FALSE)
    temp <- object$meta$result$score_home_team>object$meta$result$score_visiting_team
    out$set_scores <- object$meta$result[,c("score_home_team","score_visiting_team")]
    ## make extra sure that set_scores has home team assigned correctly
    if (object$meta$teams$home_away_team[1]!="*") { out$set_scores <- out$set_scores[,2:1] }
    out$set_scores <- na.omit(out$set_scores)
    out$duration <- sum(object$meta$result$duration, na.rm = FALSE)
    class(out) <- "summary.datavolley"
    out
}

#' Print method for summary.datavolley
#'
#' @param x summary.datavolley: a summary.datavolley object as returned by \code{summary.datavolley}
#' @param ... : additional arguments (currently these have no effect)
#' @seealso \code{\link{summary.datavolley}}
#' @method print summary.datavolley
#' @export
print.summary.datavolley <- function(x,...) {
    out <- paste0("Match summary:\nDate: ", x$date, "\nLeague: ", x$league, "\n")
    coaches1 <- paste(Filter(Negate(is.na), c(x$teams$coach[1], x$teams$assistant[1])), collapse = "/")
    coaches1 <- if (length(coaches1) > 0 && nzchar(coaches1)) paste0(" (", coaches1, ")") else ""
    coaches2 <- paste(Filter(Negate(is.na), c(x$teams$coach[2], x$teams$assistant[2])), collapse = "/")
    coaches2 <- if (length(coaches2) > 0 && nzchar(coaches2)) paste0(" (", coaches2, ")") else ""
    out <- paste0(out, "Teams: ", x$teams$team[1], coaches1, "\n       vs\n       ", x$teams$team[2], coaches2, "\n")
    out <- paste0(out, "Result: ", x$teams$sets_won[1], "-", x$teams$sets_won[2], " (", paste(x$set_scores$score_home_team, x$set_scores$score_visiting_team, sep = "-", collapse = ", "), ")\n")
    out <- if (is.na(x$duration)) paste0(out, "Duration: unknown\n") else paste0(out, "Duration: ", x$duration, " minutes\n")
    cat(out)
    invisible(out)
}


#' Summarize a list of volleyball matches
#'
#' @param z list: list of datavolley objects as returned by \code{dv_read}
#'
#' @return named list with various summary indicators, including a competition ladder
#'
#' @seealso \code{\link{dv_read}}
#'
#' @examples
#' \dontrun{
#'   x <- dv_read(dv_example_file(), insert_technical_timeouts=FALSE)
#'   dvlist_summary(list(x,x)) ## same match duplicated twice, just for illustration purposes
#' }
#'
#' @export
dvlist_summary <- function(z) {
    out <- list(number_of_matches = length(z), number_of_sets = sum(vapply(z, function(z) as.integer(sum(z$meta$teams$sets_won)), FUN.VALUE = 1L, USE.NAMES = FALSE)))
    out$date_range <- range(as.Date(bind_rows(lapply(z, function(q) list(date = q$meta$match$date)))$date), na.rm = TRUE)
    teams <- bind_rows(lapply(z, function(q) q$meta$teams[, c("team", "won_match")])) %>%
        group_by(.data$team) %>% dplyr::summarize(played = dplyr::n(), won = sum(.data$won_match), win_rate = .data$won / .data$played)

    temp <- bind_rows(lapply(z, function(q) {
        temp <- q$meta$teams[, c("team", "sets_won")]
        temp$sets_played <- sum(q$meta$teams$sets_won)
        temp
    })) %>% group_by(.data$team) %>%
        dplyr::summarize(sets_played = sum(.data$sets_played), sets_won = sum(.data$sets_won), set_win_rate = .data$sets_won / .data$sets_played)
    teams <- left_join(teams, temp, by = "team")

    temp <- bind_rows(lapply(z, function(q) {
        s <- summary(q)
        s$sc <- colSums(s$set_scores)
        data.frame(team = s$teams$team, points_for = s$sc, points_against = s$sc[2:1])
    }))
    teams <- left_join(teams,
                       temp %>% group_by(.data$team) %>% dplyr::summarize(points_for = as.integer(sum(.data$points_for)), points_against = as.integer(sum(.data$points_against))),
                       by = "team") %>%
        mutate(points_ratio = .data$points_for / .data$points_against) %>%
        dplyr::arrange(.data$team)

    out$ladder <- teams
    class(out) <- "summary.datavolleylist"
    out
}


#' Print method for summary.datavolleylist
#'
#' @param x summary.datavolleylist: a summary.datavolleylist object, as returned by \code{dvlist_summary}
#' @param ... : additional arguments (currently these have no effect)
#' @seealso \code{\link{dvlist_summary}}
#' @method print summary.datavolleylist
#' @export
print.summary.datavolleylist <- function(x,...) {
    cat(sprintf("Number of matches: %d\n",x$number_of_matches))
    cat(sprintf("Match dates: %s to %s\n",x$date_range[1],x$date_range[2]))
    cat(sprintf("Number of sets: %d\n",x$number_of_sets))
    print(x$ladder)
}


#' Extract the plays component from a datavolley object, or assign a new one
#'
#' @param x datavolley: a datavolley object as returned by \code{dv_read}
#' @param value datavolleyplays: new data
#'
#' @return The plays component of x (a data.frame), or a modified version of x with the new plays component inserted
#'
#' @seealso \code{\link{dv_read}}
#'
#' @examples
#' \dontrun{
#'   x <- dv_read(dv_example_file(), insert_technical_timeouts=FALSE)
#'   inspect(plays(x))
#'
#'   p2 <- plays(x)
#'   plays(x) <- p2
#' }
#' @export
plays <- function(x) {
    if ("plays" %in% names(x)) x$plays else stop("input has no plays component")
}

#' @rdname plays
#' @export
`plays<-` <- function(x, value) {
    x$plays <- value
    x
}

#' Convenience function for inspecting the plays component of a datavolley object
#'
#' @param x datavolleyplays: the plays component of a datavolley object as returned by \code{dv_read}
#' @param vars string: which variables to print? "minimal" set or "all"
#' @param maxrows numeric: maximum number of rows to print
#' @param extra character: names of any extra columns to include in the output
#'
#' @seealso \code{\link{dv_read}} \code{\link{plays}}
#'
#' @examples
#' \dontrun{
#'   x <- dv_read(dv_example_file(), insert_technical_timeouts=FALSE)
#'   inspect(plays(x))
#' }
#'
#' @export
inspect <- function(x,vars="minimal",maxrows=100,extra) {
    ##if (!(inherits(x,"datavolleyplays"))) stop("x must be a datavolleyplays object")
    vars <- match.arg(vars,c("minimal","all"))
    cols_to_show <- if (vars=="all") names(x) else c("time","code","team","player_number","player_name","skill","skill_type","evaluation")##,"match_id","set_number")
    if (!missing(extra)) {
        extra <- intersect(extra,names(x))
        cols_to_show <- c(cols_to_show,extra)
    }
    print(x[1:min(nrow(x),maxrows),cols_to_show])
}
openvolley/datavolley documentation built on Jan. 26, 2025, 4:48 p.m.