misc/drwho_episodes.R

# Extract data for complete seasons from Dr Who revived era

# Packages ----

library(rvest)
library(tidyverse)


# Extract tables from url ----

# set url
url <- "https://en.wikipedia.org/wiki/List_of_Doctor_Who_episodes_(2005-present)"

# read the HTML code from the website
webpage <- rvest::read_html(url)

# use CSS selectors to scrape the table and convert to data frames
tables <- rvest::html_nodes(webpage, "table.wikitable") %>%
  rvest::html_table(header = TRUE, na.strings = c(NA, ""), convert = TRUE)

# rm(url, webpage)

# Season 01 ----

s01 <- tables[[3]]

names(s01) <- c("story_number", "episode_number", "episode_title",
                 "director", "writer", "first_aired", "production_code",
                 "uk_viewers", "rating")

s01_episodes <- s01 %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 1,
         serial_title = NA,
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = "episode",
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = as.character(production_code),
         duration = c(45, 44, 44, 45, 42, 45, 44, 45, 45, 45, 45, 45, 45)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s01_directors <- s01 %>%
  select(story_number,director)

s01_writers <- s01 %>%
  select(story_number, writer)

rm(s01, url, webpage)

# Season 02 ----

s02 <- tables[[4]]

names(s02) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired", "production_code",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4")

s02_episodes <- s02 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 2,
         serial_title = NA,
         episode_number = c(NA, 1:13),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c("special", rep("episode", 13)),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = as.character(production_code),
         duration = c(60, rep(45, 13))) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s02_directors <- s02 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number,director)

s02_writers <- s02 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number, writer)

rm(s02)

episodes <- rbind(s01_episodes, s02_episodes)
directors <- rbind(s01_directors, s02_directors)
writers <- rbind(s01_writers, s02_writers)

rm(s01_directors, s01_episodes, s01_writers,
   s02_directors, s02_episodes, s02_writers)

# Season 03 ----

s03 <- tables[[5]]

names(s03) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired", "production_code",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4")

s03_episodes <- s03 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 3,
         serial_title = NA,
         episode_number = c(NA, 1:13),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c("special", rep("episode", 13)),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = as.character(production_code),
         duration = c(60, rep(45, 12), 52)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s03_directors <- s03 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number,director)

s03_writers <- s03 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number, writer)

rm(s03)

episodes <- rbind(episodes, s03_episodes)
directors <- rbind(directors, s03_directors)
writers <- rbind(writers, s03_writers)

rm(s03_directors, s03_episodes, s03_writers)


# Season 04 ----

s04 <- tables[[6]]

names(s04) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired", "production_code",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4")

s04_episodes <- s04 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 4,
         serial_title = NA,
         episode_number = c(NA, 1:13),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c("special", rep("episode", 13)),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = as.character(production_code),
         duration = c(72, 50, 50, rep(45, 8), 50, 45, 63)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s04_directors <- s04 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number,director)

s04_writers <- s04 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number, writer)

rm(s04)

episodes <- rbind(episodes, s04_episodes)
directors <- rbind(directors, s04_directors)
writers <- rbind(writers, s04_writers)

rm(s04_directors, s04_episodes, s04_writers)

# Specials ----

sp01 <- tables[[7]]

names(sp01) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired", "production_code",
                "uk_viewers", "rating")

sp01_episodes <- sp01 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = NA,
         serial_title = NA,
         episode_number = 1:5,
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c("special"),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = as.character(production_code),
         duration = c(rep(60, 4), 75)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


sp01_directors <- sp01 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number,director)

sp01_writers <- sp01 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " & ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value)

rm(sp01)

episodes <- rbind(episodes, sp01_episodes)
directors <- rbind(directors, sp01_directors)
writers <- rbind(writers, sp01_writers)

rm(sp01_directors, sp01_episodes, sp01_writers)

# Season 05 ----

s05 <- tables[[8]]

names(s05) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired", "production_code",
                "uk_viewers", "rating")

s05_episodes <- s05 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 5,
         serial_title = NA,
         episode_number = 1:13,
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = "episode",
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = as.character(production_code),
         duration = c(65, rep(45, 4), 50, rep(45, 5), 50, 55)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s05_directors <- s05 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number,director)

s05_writers <- s05 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number, writer)

rm(s05)

episodes <- rbind(episodes, s05_episodes)
directors <- rbind(directors, s05_directors)
writers <- rbind(writers, s05_writers)

rm(s05_directors, s05_episodes, s05_writers)

# Season 06 ----

s06 <- tables[[9]]

names(s06) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired", "production_code",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4")

s06_episodes <- s06 %>%
  filter(!story_number %in% c("Special", "Series", "Part 1", "Part 2")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 6,
         serial_title = NA,
         episode_number = c(NA, 1:13),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c("special", rep("episode", 13)),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = as.character(production_code),
         duration = c(60, rep(45, 6), 50, 50, 45, 45, 50, 45, 45)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s06_directors <- s06 %>%
  filter(!story_number %in% c("Special", "Series", "Part 1", "Part 2")) %>%
  select(story_number,director)

s06_writers <- s06 %>%
  filter(!story_number %in% c("Special", "Series", "Part 1", "Part 2")) %>%
  select(story_number, writer)

rm(s06)

episodes <- rbind(episodes, s06_episodes)
directors <- rbind(directors, s06_directors)
writers <- rbind(writers, s06_writers)

rm(s06_directors, s06_episodes, s06_writers)

# Season 07 ----

s07 <- tables[[10]]

names(s07) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4", "rm5")

s07_episodes <- s07 %>%
  filter(!story_number %in% c("Special (2011)", "Special (2012)", "Series", "Part 1", "Part 2")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 7,
         serial_title = NA,
         episode_number = c(NA, 1:5, NA, 6:13),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c("special", rep("episode", 5), "special", rep("episode", 8)),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = NA,
         duration = c(60, 50, 45, 45, 41, 45, 60, 45, 44, rep(45, 6))) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s07_directors <- s07 %>%
  filter(!story_number %in% c("Special (2011)", "Special (2012)", "Series", "Part 1", "Part 2")) %>%
  select(story_number,director)

s07_writers <- s07 %>%
  filter(!story_number %in% c("Special (2011)", "Special (2012)", "Series", "Part 1", "Part 2")) %>%
  select(story_number, writer)

rm(s07)

episodes <- rbind(episodes, s07_episodes)
directors <- rbind(directors, s07_directors)
writers <- rbind(writers, s07_writers)

rm(s07_directors, s07_episodes, s07_writers)

# Specials ----

sp02 <- tables[[11]]

names(sp02) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired",
                "uk_viewers", "rating")

sp02_episodes <- sp02 %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = NA,
         serial_title = NA,
         episode_number = 1:2,
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = rep("special", 2),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = NA,
         duration = c(77, 60)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


sp02_directors <- sp02 %>%
  select(story_number,director)

sp02_writers <- sp02 %>%
  select(story_number, writer)

rm(sp02)

episodes <- rbind(episodes, sp02_episodes)
directors <- rbind(directors, sp02_directors)
writers <- rbind(writers, sp02_writers)

rm(sp02_directors, sp02_episodes, sp02_writers)

# Season 08 ----

s08 <- tables[[12]]

names(s08) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired",
                "uk_viewers", "rating")

s08_episodes <- s08 %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 8,
         serial_title = NA,
         episode_number = 1:12,
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = "episode",
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = NA,
         duration = c(76, 45, 46, 48, 45, 45, 45, 47, 43, 45, 46, 57)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s08_directors <- s08 %>%
  select(story_number,director)

s08_writers <- s08 %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " & ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value)

rm(s08)

episodes <- rbind(episodes, s08_episodes)
directors <- rbind(directors, s08_directors)
writers <- rbind(writers, s08_writers)

rm(s08_directors, s08_episodes, s08_writers)

# Season 09 ----

s09 <- tables[[13]]

names(s09) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4", "rm5")

s09_episodes <- s09 %>%
  filter(!story_number %in% c("Special (2014)", "Special (2015)", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 9,
         serial_title = NA,
         episode_number = c(NA, 1:12, NA),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c("special", rep("episode", 12), "special"),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = NA,
         duration = c(60, 46, 48, 43, 42, 46, 46, 46, 46, 45, 47, 54, 61, 56)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s09_directors <- s09 %>%
  filter(!story_number %in% c("Special (2014)", "Special (2015)", "Series")) %>%
  select(story_number,director)

s09_writers <- s09 %>%
  filter(!story_number %in% c("Special (2014)", "Special (2015)", "Series")) %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " and ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value)

rm(s09)

episodes <- rbind(episodes, s09_episodes)
directors <- rbind(directors, s09_directors)
writers <- rbind(writers, s09_writers)

rm(s09_directors, s09_episodes, s09_writers)

# Season 10 ----

s10 <- tables[[14]]

names(s10) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4", "rm5")

s10_episodes <- s10 %>%
  filter(!story_number %in% c("Special (2016)", "Special (2017)", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 10,
         serial_title = NA,
         episode_number = c(NA, 1:12, NA),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c("special", rep("episode", 12), "special"),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = NA,
         duration = c(60, 50, 46, 44, 44, 45, 49, 46, 45, 44, 42, 46, 60, 60)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s10_directors <- s10 %>%
  filter(!story_number %in% c("Special (2016)", "Special (2017)", "Series")) %>%
  select(story_number,director)

s10_writers <- s10 %>%
  filter(!story_number %in% c("Special (2016)", "Special (2017)", "Series")) %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " and ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value)

rm(s10)

episodes <- rbind(episodes, s10_episodes)
directors <- rbind(directors, s10_directors)
writers <- rbind(writers, s10_writers)

rm(s10_directors, s10_episodes, s10_writers)

# Season 11 ----

s11 <- tables[[15]]

names(s11) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4", "rm5")

s11_episodes <- s11 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 11,
         serial_title = NA,
         episode_number = c(1:10, NA),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c(rep("episode", 10), "special"),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = NA,
         duration = c(64, 49, 50, 49, 51, 50, 49, 47, 49, 50, 60)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s11_directors <- s11 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number,director)

s11_writers <- s11 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " and ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value)

rm(s11)

episodes <- rbind(episodes, s11_episodes)
directors <- rbind(directors, s11_directors)
writers <- rbind(writers, s11_writers)

rm(s11_directors, s11_episodes, s11_writers)

# Season 12 ----

s12 <- tables[[16]]

names(s12) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4", "rm5")

s12_episodes <- s12 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 12,
         serial_title = NA,
         episode_number = c(1:10, NA),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c(rep("episode", 10), "special"),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = NA,
         duration = c(59, 60, 46, 50, 50, 49, 49, 49, 49, 65, 71)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s12_directors <- s12 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number,director)

s12_writers <- s12 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " and ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value)

rm(s12)

episodes <- rbind(episodes, s12_episodes)
directors <- rbind(directors, s12_directors)
writers <- rbind(writers, s12_writers)

rm(s12_directors, s12_episodes, s12_writers)

# Season 13 ----

s13 <- tables[[17]]

names(s13) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired",
                "uk_viewers", "rating")

s13_episodes <- s13 %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 13,
         serial_title = "Flux",
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = "episode",
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         # uk_viewers = case_when(uk_viewers == "TBD" ~ NA_character_,
         #                        TRUE ~ uk_viewers),
         # rating = case_when(rating == "TBA" ~ NA_character_,
         #                    TRUE ~ rating),
         production_code = NA,
         duration = c(50, 59, 49, 56, 50, 59)) %>%
  mutate(episode_title = case_when(episode_title == "TBA" ~ NA_character_,
                                   TRUE ~ episode_title)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s13_directors <- s13 %>%
  select(story_number,director)

s13_writers <- s13 %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " and ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value)

rm(s13)

episodes <- rbind(episodes, s13_episodes)
directors <- rbind(directors, s13_directors)
writers <- rbind(writers, s13_writers)

rm(s13_directors, s13_episodes, s13_writers)

# 2022 Specials ----

sp03 <- tables[[18]]

names(sp03) <- c("story_number", "episode_number", "episode_title",
                 "director", "writer", "first_aired",
                 "uk_viewers", "rating")

sp03_episodes <- sp03 %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = NA,
         serial_title = NA,
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = "special",
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         # uk_viewers = case_when(uk_viewers == "TBD" ~ NA_character_,
         #                        TRUE ~ uk_viewers),
         # rating = case_when(rating == "TBA" ~ NA_character_,
         #                    TRUE ~ rating),
         production_code = NA,
         duration = c(58, 48, 87)) %>%
  mutate(episode_title = case_when(episode_title == "TBA" ~ NA_character_,
                                   TRUE ~ episode_title)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


sp03_directors <- sp03 %>%
  select(story_number,director)

sp03_writers <- sp03 %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " and ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value)

rm(sp03)

episodes <- rbind(episodes, sp03_episodes)
directors <- rbind(directors, sp03_directors)
writers <- rbind(writers, sp03_writers)

rm(sp03_directors, sp03_episodes, sp03_writers)

# 2023 Specials ----

sp04 <- tables[[19]]

names(sp04) <- c("story_number", "episode_number", "episode_title",
                 "director", "writer", "first_aired",
                 "uk_viewers", "rating")

sp04_episodes <- sp04 %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = NA,
         serial_title = NA,
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = "special",
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         # uk_viewers = case_when(uk_viewers == "TBD" ~ NA_character_,
         #                        TRUE ~ uk_viewers),
         # rating = case_when(rating == "TBA" ~ NA_character_,
         #                    TRUE ~ rating),
         production_code = NA,
         duration = c(57, 54, 62)) %>%
  mutate(episode_title = case_when(episode_title == "TBA" ~ NA_character_,
                                   TRUE ~ episode_title)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


sp04_directors <- sp04 %>%
  select(story_number,director)

sp04_writers <- sp04 %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " and ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value) |>
  mutate(writer = case_when(story_number == 301 ~ "Russell T Davies",
                            .default = writer)) |>
  distinct(story_number, writer)

rm(sp04)

episodes <- rbind(episodes, sp04_episodes)
directors <- rbind(directors, sp04_directors)
writers <- rbind(writers, sp04_writers)

rm(sp04_directors, sp04_episodes, sp04_writers)


# Set variable types ----

drwho_episodes <- drwho_episodes %>%
  mutate(uk_viewers = as.numeric(uk_viewers),
         rating = as.numeric(rating))

# Season 14 ----

s14 <- tables[[20]]

names(s14) <- c("story_number", "episode_number", "episode_title",
                "director", "writer", "first_aired",
                "uk_viewers", "rating", "rm1", "rm2", "rm3", "rm4", "rm5")

s14_episodes <- s14 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number:episode_title, first_aired:rating) %>%
  mutate(era = "revived",
         season_number = 14,
         serial_title = NA,
         episode_number = c(NA, 1:8),
         episode_title = gsub('.*"(.*)".*', "\\1", episode_title),
         type = c("special", rep("episode", 8)),
         first_aired = as.Date(gsub(".*\\((.*)\\).*", "\\1", first_aired)),
         production_code = NA,
         duration = c(55, 46, 49, 44, 47, 43, 44, 44, 54)) %>%
  select(era, season_number, serial_title, story_number, episode_number,
         episode_title, type, everything())


s14_directors <- s14 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number,director)

s14_writers <- s14 %>%
  filter(!story_number %in% c("Special", "Series")) %>%
  select(story_number, writer) %>%
  separate(writer, c("writer1", "writer2"), " and ") %>%
  pivot_longer(!story_number, names_to = "writer_name", values_drop_na = TRUE) %>%
  select(story_number, writer = value)

rm(s14)

drwho_episodes <- rbind(episodes, s14_episodes)
drwho_directors <- rbind(directors, s14_directors)
drwho_writers <- rbind(writers, s14_writers)

rm(s14_directors, s14_episodes, s14_writers)
rm(directors, episodes, writers)
KittJonathan/datardis documentation built on Aug. 23, 2024, 5:22 p.m.