R/ball_runs.R

#SUFC Reports
#Ball Runs Numbers Statistics

ball_runs <- function(match_report){
  library(pdftools, quietly = TRUE)
  library(stringr, quietly = TRUE)
  
  test <- match_report
  file <- pdf_text(test)
  file_page_2 <- file[2]
  
  keyword <- "BALL RUNS"
  file_page_2_sub <- sub(paste0(".*",keyword), "", file_page_2)
  file_page_2_sub <- gsub("^ *|(?<= ) | *$", "", file_page_2_sub, perl = TRUE)
  file_page_2_sub <- strsplit(file_page_2_sub, "\n", fixed = TRUE)
  for(i in 1:length(file_page_2_sub[[1]])){
    file_page_2_sub[[1]][i] <- trimws(file_page_2_sub[[1]][i], "both")
  }
  
  ball_runs <- NULL
  for(i in 1:length(file_page_2_sub[[1]])){
    if(grepl("INTO CONTACT", file_page_2_sub[[1]][i])){
      ball_runs[i] <- file_page_2_sub[[1]][i]
    } else if(grepl("WITHOUT CONTACT", file_page_2_sub[[1]][i])){
      ball_runs[i] <- file_page_2_sub[[1]][i]
    } else if(grepl("LINEBREAKS", file_page_2_sub[[1]][i])){
      ball_runs[i] <- file_page_2_sub[[1]][i]
    } else {
      ball_runs[i] <- "Empty"
    }
  }
  
  ball_runs <- ball_runs[ball_runs != "Empty"]
  ball_runs <- gsub("INTO CONTACT", "INTO-CONTACT", ball_runs)
  ball_runs <- gsub("WITHOUT CONTACT", "WITHOUT-CONTACT", ball_runs)
  ball_runs <- strsplit(ball_runs, " ")
  
  for(i in 1:length(ball_runs[[1]])){
    if(ball_runs[[1]][i] == "INTO-CONTACT"){
      into_contact <- i
    }
  }
  
  for(i in 1:length(ball_runs[[2]])){
    if(ball_runs[[2]][i] == "WITHOUT-CONTACT"){
      without_contact <- i
    }
  }
  
  for(i in 1:length(ball_runs[[3]])){
    if(ball_runs[[3]][i] == "LINEBREAKS"){
      linebreaks <- i
    }
  }
  
  ball_runs[[1]] <- c(ball_runs[[1]][into_contact - 1], ball_runs[[1]][into_contact], ball_runs[[1]][into_contact + 1])
  ball_runs[[2]] <- c(ball_runs[[2]][without_contact - 1], ball_runs[[2]][without_contact], ball_runs[[2]][without_contact + 1])
  ball_runs[[3]] <- c(ball_runs[[3]][linebreaks - 1], ball_runs[[3]][linebreaks], ball_runs[[3]][linebreaks + 1])
  
  ball_runs <- data.frame(matrix(unlist(ball_runs), ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)
  
  file_page_1 <- file[1]
  file_page_1 <- gsub("^ *|(?<= ) | *$", "", file_page_1, perl = TRUE)
  file_page_1 <- strsplit(file_page_1, "\n", fixed = TRUE)
  file_page_1[[1]][2] <- trimws(file_page_1[[1]][2], "both")
  
  names(ball_runs) <- c("home_ball_runs", file_page_1[[1]][2], "away_ball_runs")
  ball_runs <- ball_runs[,c(2, 1, 3)]
  
  ball_runs
}
AndrewFerris/sufc1863 documentation built on May 5, 2019, 5:59 a.m.