R/scrum.R

#SUFC Reports
#Scrum Numbers Statistics

scrum <- function(match_report){
  library(pdftools, quietly = TRUE)
  library(stringr, quietly = TRUE)
  
  test <- match_report
  file <- pdf_text(test)
  file_page_3 <- file[3]
  
  keyword <- "SCRUM"
  file_page_3_sub <- sub(paste0(".*",keyword), "", file_page_3)
  file_page_3_sub <- gsub("^ *|(?<= ) | *$", "", file_page_3_sub, perl = TRUE)
  file_page_3_sub <- strsplit(file_page_3_sub, "\n", fixed = TRUE)
  for(i in 1:length(file_page_3_sub[[1]])){
    file_page_3_sub[[1]][i] <- trimws(file_page_3_sub[[1]][i], "both")
  }
  
  scrum <- NULL
  for(i in 1:length(file_page_3_sub[[1]])){
    if(grepl("RETAINED", file_page_3_sub[[1]][i])){
      scrum[i] <- file_page_3_sub[[1]][i]
    } else if(grepl("RETAINED DISRUPTED", file_page_3_sub[[1]][i])){
      scrum[i] <- file_page_3_sub[[1]][i]
    } else if(grepl("TURNOVER", file_page_3_sub[[1]][i])){
      scrum[i] <- file_page_3_sub[[1]][i]
    } else if(grepl("TURNOVER DISRUPTED", file_page_3_sub[[1]][i])){
      scrum[i] <- file_page_3_sub[[1]][i]
    } else if(grepl("RESET", file_page_3_sub[[1]][i])){
      scrum[i] <- file_page_3_sub[[1]][i]
    } else if(grepl("PEN/FK FOR", file_page_3_sub[[1]][i])){
      scrum[i] <- file_page_3_sub[[1]][i]
    } else if(grepl("PEN/FK AGAINST", file_page_3_sub[[1]][i])){
      scrum[i] <- file_page_3_sub[[1]][i]
    } else {
      scrum[i] <- "Empty"
    }
  }
  
  for(i in 1:length(scrum)){
    if(str_count(scrum[i], "RETAINED") == 2){
      scrum[i] <- scrum[i]
    } else if(str_count(scrum[i], "RETAINED DISRUPTED") == 2){
      scrum[i] <- scrum[i]
    } else if(str_count(scrum[i], "TURNOVER") == 2){
      scrum[i] <- scrum[i]
    } else if(str_count(scrum[i], "TURNOVER DISRUPTED") == 2){
      scrum[i] <- scrum[i]
    } else if(str_count(scrum[i], "RESET") == 2){
      scrum[i] <- scrum[i]
    } else if(str_count(scrum[i], "PEN/FK FOR") == 2){
      scrum[i] <- scrum[i]
    } else if(str_count(scrum[i], "PEN/FK AGAINST") == 2){
      scrum[i] <- scrum[i]
    } else {
      scrum[i] <- "Empty"
    }
  }
  
  scrum <- scrum[scrum != "Empty"]
  scrum <- gsub("(RETAINED DISRUPTED)", "RETAINED-DISRUPTED", scrum)
  scrum <- gsub("(TURNOVER DISRUPTED)", "TURNOVER-DISRUPTED", scrum)
  scrum <- gsub("PEN/FK FOR", "PEN/FK-FOR", scrum)
  scrum <- gsub("PEN/FK AGAINST", "PEN/FK-AGAINST", scrum)
  scrum <- strsplit(scrum, " ")
  
  for(i in 1:length(scrum)){
    if(length(scrum[[i]]) == 6){
      scrum[[i]] <- scrum[[i]]
    } else {
      scrum[[i]] <- c(scrum[[i]][1], scrum[[i]][2], scrum[[i]][3], scrum[[i]][length(scrum[[i]]) - 2], scrum[[i]][length(scrum[[i]]) - 1], scrum[[i]][length(scrum[[i]])])
    }
  }
  
  scrum <- data.frame(matrix(unlist(scrum), ncol = 6, byrow = TRUE), stringsAsFactors = FALSE)
  
  file_page_1 <- file[1]
  file_page_1 <- gsub("^ *|(?<= ) | *$", "", file_page_1, perl = TRUE)
  file_page_1 <- strsplit(file_page_1, "\n", fixed = TRUE)
  file_page_1[[1]][2] <- trimws(file_page_1[[1]][2], "both")
  
  names(scrum) <- c("home_attacking", file_page_1[[1]][2], "home_defensive", "away_attacking", file_page_1[[1]][2], "away_defensive")
  
  scrum <- scrum[,c(2,1,3,4,6)]
  scrum
}
AndrewFerris/sufc1863 documentation built on May 5, 2019, 5:59 a.m.