data-raw/goalkeeper_season_stats.R

#loading libraries
library(tidyverse)
library(readr)
library(readxl)
library(fs)
library(snakecase)
library(stringr)

##loading in goalkeeper stats

#vector of goalkeeper filenames
keeperfiles <- fs::dir_ls("data-raw", regexp = 'goalkeepers_season')

#reading in all keeper files
goalkeeper_season_stats <- map(keeperfiles, read_xlsx)

#creating a single data frame of all seasons
goalkeeper_season_stats <- bind_rows(goalkeeper_season_stats, .id = "season")

#identifying the season
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  mutate(season = str_extract(season, "[^_]+(?=\\.xlsx$)"))

#correcting the nation column
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  mutate(Nation = str_extract(Nation, "\\s(.*)"))

#removing unneccesary columns
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  select(-Rk, -Matches)

#filtering to only include keepers
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  filter(Pos == "GK")

#creating player ids
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  mutate(person_id = 10000 + group_indices(., Player, Nation))

#renaming percentage columns
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  rename(save_pct = `Save%`, cs_pct = `CS%`)

#converting to snake case
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  rename_all(.funs = to_any_case)

#converting team names to team codes
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  mutate(
    squad = case_when(
      squad == "Boston" | squad == "Boston Breakers" ~ "BOS",
      squad == "Chicago" | squad == "Chicago Red Stars" ~ "CHI",
      squad == "Kansas City" | squad == "FC Kansas City" ~ "KC",
      squad == "Houston" | squad == "Houston Dash" ~ "HOU",
      squad == "North Carolina" ~ "NC",
      squad == "Orlando" | squad == "Orlando Pride" ~ "ORL",
      squad == "Portland" | squad == "Portland Thorns FC" ~ "POR",
      squad == "Reign" | squad == "Seattle" ~ "SEA",
      squad == "Sky Blue" | squad == "Sky Blue FC" ~ "NJ",
      squad == "Utah" ~ "UTA",
      squad == "Washington" | squad == "Washington Spirit" ~ "WAS",
      squad == "Western New York Flash" ~ "WNY"
    ))


##loading in field player stats to retrieve red and yellow card info

#vector of field player filenames
fieldfiles <- fs::dir_ls("data-raw", regexp = 'fieldplayers_overall_season')

#reading in all field player files
fp_overall_season_stats <- map(fieldfiles, read_xlsx)

#creating a single data frame of all seasons
fp_overall_season_stats <- bind_rows(fp_overall_season_stats, .id = "season")

#identifying the season
fieldplayer_overall_season_stats <- fp_overall_season_stats %>%
  mutate(season = str_extract(season, "[^_]+(?=\\.xlsx$)"))

#correcting the nation column
fieldplayer_overall_season_stats <- fieldplayer_overall_season_stats %>%
  mutate(Nation = str_extract(Nation, "\\s(.*)"))

#removing unneccesary columns
fieldplayer_overall_season_stats <- fieldplayer_overall_season_stats %>%
  select(-Rk)

#selecting only keepers
gk_card_stats <- fieldplayer_overall_season_stats %>%
  select(season, Player, Pos, CrdY, CrdR) %>%
  filter(Pos == "GK") %>%
  select(-Pos)

#adding gk red and yellow card stats
goalkeeper_season_stats <- left_join(goalkeeper_season_stats, gk_card_stats,
                                     by = c("player" = "Player",
                                            "season"))

#removing player names
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  dplyr::select(-player)

#final case fix
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  rename_all(.funs = to_any_case)

#renaming team_id
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  rename(team_id = squad)

#reorder columns
goalkeeper_season_stats <- goalkeeper_season_stats[,c(18, 1:17, 19, 20)]

#fix nation column
goalkeeper_season_stats <- goalkeeper_season_stats %>%
  mutate(nation = str_replace_all(nation, " ", ""))

#exporting final GK data
usethis::use_data(goalkeeper_season_stats, overwrite = TRUE)
adror1/nwslR documentation built on Oct. 4, 2022, 3:06 a.m.