readme.md

forbesListR - An easy way to access the data contained lists maintained by the fine folks at Forbes in R

Why I Built forbesListR?

Forbes is the preiminant maintainer of covering a wide range of business related topics including sports, entertainment, individual wealth, and locations. The lists are chocked full of phenomonal data that can be analyzed, visualized and merged with other data. Upon discovering that Forbes went to the pains of building an API, even though it is undocumented, I decided to build this package to wrap that API and make it as easy as possible for my fellow #rstats brethren to access this data with a few simple functions.

As of now this package has 3 primary functions that I hope become widely used.

If and when Forbes adds new lists I will update this package to include them.

Accessible Lists

Installation

devtools::install_github("abresler/forbesListR")

Usage

Get 2016 NBA Team Valuations

library(forbesListR)
nba_2016_values <- 
  get_year_forbes_list_data(list = "NBA Valuations", year = 2016)

top_10_2016 <- 
  nba_2016_values %>% 
  dplyr::select(year, team, valuation.millions) %>% 
  head(10)

| year| team | valuation.millions| |-----:|:----------------------|-------------------:| | 2016| New York Knicks | 3000| | 2016| Los Angeles Lakers | 2700| | 2016| Chicago Bulls | 2300| | 2016| Boston Celtics | 2100| | 2016| Los Angeles Clippers | 2000| | 2016| Golden State Warriors | 1900| | 2016| Brooklyn Nets | 1700| | 2016| Houston Rockets | 1500| | 2016| Dallas Mavericks | 1400| | 2016| Miami Heat | 1300|

Get Top Venture Capitalists from 2012 to 2016

vcs_2012_2016 <- 
  get_years_forbes_list_data(years = 2012:2016, list_name = "Top VCs")
vcs_2012_2016 %>% 
  dplyr::glimpse()
# Observations: 475
# Variables: 15
# $ year           (int) 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 20...
# $ list           (chr) "Top VCs", "Top VCs", "Top VCs", "Top VCs", "Top VCs", "Top VCs", "Top VCs", "Top VCs", "Top...
# $ name           (chr) "Jim Goetz", "Steve Anderson", "Chris Sacca", "Peter Fenton", "Mary Meeker", "Josh Kopelman"...
# $ last_name      (chr) "Goetz", "Anderson", "Sacca", "Fenton", "Meeker", "Kopelman", "Shen", "Gurley", "Leone", "Th...
# $ company        (chr) "Sequoia Capital", "Baseline Ventures", "Lowercase Capital", "Benchmark", "Kleiner Perkins C...
# $ is.government  (lgl) FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
# $ position       (int) NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
# $ rank           (int) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 2...
# $ age            (int) 50, 47, 40, 43, 56, 44, 46, 49, 58, 48, 54, 59, 50, 51, 44, 52, 54, 48, 48, 51, 45, 64, 42, ...
# $ url.bio.forbes (chr) "http://www.forbes.com/profile/jim-goetz", "http://www.forbes.com/profile/steve-anderson", "...
# $ url.image      (chr) "http://i.forbesimg.com/media/lists/people/jim-goetz_200x200.jpg", "http://i.forbesimg.com/m...
# $ gender         (chr) "M", "M", "M", "M", "F", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M...
# $ country        (chr) "United States", "United States", "United States", "United States", "United States", "United...
# $ description    (chr) "A year-and-a-half after the sale of WhatsApp to Facebook for near $22 billion, Jim Goetz co...
# $ notableDeal    (chr) "WhatsApp", "Instagram", "Twitter", "Twitter", "Facebook", "LinkedIn", "Alibaba", "Uber", "F...

Get Detailed Biography Data for 2015 Top Athletes - Explore Earnings by Maritial Status

library(dplyr)
athletes_2015 <- 
  get_year_list_forbes_bio_data(year = 2015, list_name = 'Athletes')
# Warning: closing unused connection 5 (http://www.forbes.com/profile/russell-westbrook)

athletes_2015 <- 
  athletes_2015 %>% 
  group_by(martial_status) %>% 
  summarise(earnings.millions = sum(salary_earnings.millions, na.rm = T)) %>% 
  arrange(desc(earnings.millions)) %>% 
  ungroup

| martial_status | earnings.millions| |:----------------|------------------:| | Married | 1257.3| | Single | 939.1| | Divorced | 659.0| | Engaged | 58.5| | NA | 33.1|

Sample Data Visualizations

Waffle Chart of Aggregate Professional Sport Valuations by Year

nba_valuations <-
  2012:2016 %>%
  get_years_forbes_list_data(list_name = "NBA Valuations")

nhl_valuations <-
  2012:2016 %>%
  get_years_forbes_list_data(list_name = "NHL Valuations")

mlb_valuations <-
  2012:2016 %>%
  get_years_forbes_list_data(list_name = "MLB Valuations")

nfl_valuations <-
  2012:2016 %>%
  get_years_forbes_list_data(list_name = "NFL Valuations")

soccer_valuations <-
  2012:2016 %>%
  get_years_forbes_list_data(list_name = "Soccer Valuations")

all_valuation_data <-
  nba_valuations %>%
  bind_rows(nhl_valuations) %>%
  bind_rows(mlb_valuations) %>%
  bind_rows(soccer_valuations) %>%
  bind_rows(nfl_valuations) %>%
  mutate(list = list %>% str_replace_all(' Valuations', '')) %>%
  dplyr::filter(year %in% c(2012:2016))
library(waffle) # devtools::install_github("hrbrmstr/waffle")
# Loading required package: ggplot2
library(ggplot2) # devtools::install_github("hadley/ggplot2")

total_by_year <- 
  all_valuation_data %>% 
  dplyr::filter(year %in% c(2012:2015)) %>% 
  group_by(year) %>% 
  summarise(aggregate_valuation.billions = sum(valuation.millions, na.rm = T) / 1000)

totals <- 
  total_by_year$aggregate_valuation.billions / 10

names(totals) <- 
  total_by_year$year

sports_waffle_valuations <- 
  totals %>% 
  waffle(flip = F, size = 2, rows = 8, xlab = '1 square = ~$10B',  pad = 1,
         colors = c("#FF5A5F","#FFB400", "#007A87", "#FFAA91", "#7B0051")) +
  labs(title = "Aggregate Estimated Professional Sports Valuations",
       subtitle = "MLB, NHL, NBA, NFL, & Soccer -- 2012-2015") +
  theme(plot.subtitle = element_text(
    size = 8,
    hjust = 0
  ),
  plot.title = element_text(size = 10, face = "bold.italic")
  )
sports_waffle_valuations

Streamgraph of Valuations by Sport, 2012 - 2016

  summary_cols <-
    c('valuation.millions', 'revenue.millions', 'operating_income.millions', 'debt.millions')

  summary_data <-
    all_valuation_data %>%
    dplyr::rename(sport = list) %>%
    group_by(year, sport) %>%
    summarise_each_(funs(sum(., na.rm = T)), summary_cols) %>%
    mutate(revenue.multiple = valuation.millions / revenue.millions,
           ebitda.multiple = valuation.millions / operating_income.millions) %>%
    ungroup
library(streamgraph) # devtools::install_github('hrbrmstr/streamgraph')

nba_streamgraph <- 
  summary_data %>%
  streamgraph("sport",
              "valuation.millions",
              "year",
              offset = "silhouette",
              interpolate = "step") %>%
  sg_axis_x(1, "year", "%Y") %>%
  sg_fill_brewer(palette = "Spectral") %>%
  sg_legend(show = TRUE, label = "Sport: ")

Choroplething Forbes 2015 Best Cities for Small Business by State

Example from Aaron Miles


library(choroplethr) #install.packages('choroplethr')
library(RColorBrewer)
library(ggplot2)

# Read in Forbes Data
dat <-
  get_year_forbes_list_data(list = "Best Cities for Business", year = 2015)

#Summarize Number of Cities by State
bus <-
  dat %>%
  group_by(state) %>%
  summarize(cities = n_distinct(city)) %>%
  mutate(state = state %>% tolower())


#Read in Choroplethr Data
data("df_state_demographics")

# Merge with cities Data
df_state_demographics <-
  df_state_demographics %>%
  left_join(bus, by = c("region" = "state"))

#Fill NAs as 0 (States with no cities)
df_state_demographics[is.na(df_state_demographics)] <-
  0

#Set value to be city
df_state_demographics <-
  df_state_demographics %>%
  mutate(value = cities)

#Create Plot
choro1 <- StateChoropleth$new(df_state_demographics)
choro1$title = "Forbes' Best Cities for Business by State -- 2015"
choro1$set_num_colors(1)
choro1$ggplot_polygon = geom_polygon(aes(fill = value), color = NA)
choro1$ggplot_scale = scale_fill_gradientn(name = "# Cities",
                                           colours = brewer.pal(8, "Purples"))
choro1$render()



abresler/forbesListR documentation built on May 10, 2019, 4:15 a.m.