R/scrapers.R

scrape.prospects.mlb <- function(year_start = 2012,
                                 year_end = 2018){
  library("jsonlite")
  library("tidyverse")
  year_end <- 2018
  prospect_lists <- paste0("http://m.mlb.com/gen/players/prospects/", seq(year_start, year_end), "/playerProspects.json")

  big_list <- list()


  # Pull Data ---------------------------------------------------------------

  for(i in seq(year_end - year_start + 1)){
    read_page <- read_json(prospect_lists[i])
    bad_struct <- read_page$prospect_players$prospects

    top_hundred_list <- list()

    for(j in seq(length(bad_struct))){
      player <- bad_struct[[j]] %>%
        as.data.frame() %>%
        mutate(player_json = paste0("http://mlb.mlb.com/gen/players/prospects/", prospect_year, "/", player_id, ".json"))

      player$j <- j
      top_hundred_list[[j]] <- player
    }

    big_list[[i]] <- top_hundred_list
  }

  for(k in seq(length(big_list))){
    for(l in seq(length(big_list[[k]]))){
      player_read_json <- read_json(big_list[[k]][l][[1]]$player_json)

      big_list[[k]][l][[1]]$text <- player_read_json$prospect_player$content$default
    }
  }

  # twelve_guys <- do.call(rbind, big_list[[1]])
  # thirteen_guys <- do.call(rbind, big_list[[2]])
  # fourteen_guys <- do.call(rbind, big_list[[3]])
  # fifteen_guys <- do.call(rbind, big_list[[4]])
  # sixteen_guys <- do.call(rbind, big_list[[5]])
  # seventeen_guys <- do.call(rbind, big_list[[6]])
  # eighteen_guys <- do.call(rbind, big_list[[7]])


  # Mutate data -------------------------------------------------------------

  ## TO DO

  # Save data ---------------------------------------------------------------

  ## TO DO

}
forrestdiamond/foRest documentation built on June 1, 2019, 3:56 a.m.