This is my work in progress for parsing MTG Goldfish pages to extract card prices.
suppressMessages(library(tidyverse)) library(xml2) library(reticulate) library(devtools) library(roxygen2) library(here) options(tibble.print_max = Inf) use_python("/usr/bin/python3") #mtgsdk requires python3 basic_lands <- c("Plains", "Island", "Swamp", "Mountain", "Forest") error_log_file_path <- paste0(here(),"/get_price_error_",format(Sys.Date(),"%Y_%m_%d"),".log") if(!file.exists(error_log_file_path)) file.create(error_log_file_path)
Load the package.
load_all() data_dir <- "/home/john/data/mtg/"
Snapcaster Mage from Innistrad will be our sample card.
card_name <- "Snapcaster Mage" set <- "Innistrad"
Get and then print Snapcaster Mage's price history. Note that the assignment is just illustrative. plot_card_price_history
will get the history on its own.
snapcaster_mage_price_history <- get_card_price_history(card_name, set) plot_card_price_history(card_name, set) %>% plot
Now write down the history to disk.
write_card_price_history(card_name = card_name, set = set, directory = data_dir)
The extract_card_info
function will get card information from the mtgsdk object.
extract_card_info <- function(card) { if (!identical(class(card),c("mtgsdk.card.Card", "python.builtin.object"))) stop("Input is not of the correct class, c(\"mtgsdk.card.Card\", \"python.builtin.object\").") list(name = card$name, set_code = card$set, set_name = card$set_name, layout = card$layout, number = card$number, has_alt_names = !is.null(card$names) & length(card$names) > 1) }
mtgsdk <- import("mtgsdk") card_dat <- mtgsdk$Card$all()
#Already saved, no need to rerun. card_dat_parsed <- card_dat %>% map_df(~list(name = .x$name, type = .x$type, set_name = .x$set_name, number = ifelse(is.null(.x$number), NA_character_, .x$number), layout = .x$layout, has_alt_names = !is.null(.x$names) & length(.x$names) > 1 , has_variations = !is.null(.x$variations) & length(.x$variations) > 0)) %>% mutate(split_num = str_extract_all(number,"[0-9]+",simplify=TRUE)) %>% group_by(set_name, split_num) %>% #mutate(split_names = case_when(all(layout == "split", (set_name %in% expansion_sets) | (set_name %in% core_sets)) ~ paste0(name, collapse = " "), TRUE ~ "")) mutate(split_name = case_when(layout == "split" ~ paste0(name, collapse = " "), TRUE ~ NA_character_)) %>% mutate(lookup_name = coalesce(split_name, name)) %>% ungroup saveRDS(card_dat_parsed, file = "card_dat.RDS")
There are some data on particular sets to watch out for included. I will eventually convert these to RDS to properly package them up.
no_go_sets <- read_tsv(file = "no_go_sets.tsv", col_names = "set", col_types = list(col_character())) %>% unlist core_sets <- read_tsv(file = "core_sets.tsv", col_names = "set", col_types = list(col_character())) %>% unlist expansion_sets <- read_tsv(file = "expansion_sets.tsv", col_names = "set", col_types = list(col_character())) %>% unlist
For now bring in the data I've already retrieved and saved.
card_dat_parsed <- readRDS("card_dat.RDS")
Here's an example command to pull all the sets.
all_sets <- card_dat_parsed %>% distinct(set_name) %>% arrange(set_name) %>% unlist(use.names = FALSE) head(all_sets, 10)
And some simple checks:
all(no_go_sets %in% all_sets) all(core_sets %in% all_sets) all(expansion_sets %in% all_sets)
Sample some trial data that you can then attempt to save later.
trial_dat <- card_dat_parsed %>% #filter(!(set_name %in% no_go_sets)) %>% #filter(!(set_name %in% c("Unglued", "Unhinged", "Unstable"))) %>% #Exclude to start with for simplicity. filter((set_name %in% expansion_sets) | (set_name %in% core_sets)) %>% filter(!has_alt_names | str_detect(number, "^[0-9]+$|^[0-9]+a$")) %>% filter(!(name %in% basic_lands) | (set_name %in% c("Unglued", "Unhinged", "Unstable"))) %>% filter((type != "Conspiracy") | (set_name != "Magic: The Gathering—Conspiracy")) %>% #Goldfish doesn't include conspiracies from CNS either. filter(!has_variations) %>% #Figure this out later. This is for cards like Fallen Empires Hymn to Tourach or Alliances Arcane Denial. filter(name != "Kongming, \"Sleeping Dragon\"") %>% #For some reason many of their Kongming links are broken. filter((name != "Phage the Untouchable Avatar") | (set_name != "Vanguard")) %>% #No price history available. filter(!(type %in% c("Scheme", "Ongoing Scheme"))) %>% #No prices. sample_n(100)
safely_write_card_price_history <- safely(write_card_price_history) error_behavior <- function(message) {write(toString(message), error_log_file_path, append=TRUE)} tryCatch({ trial_dat %>% select(name, set_name) %>% pwalk(~ write_card_price_history(card_name = ..1, set = ..2, directory = data_dir)) }, error = error_behavior )
Some cards with multiple names have exceptions to note:
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.