knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%"
)
library(dplyr)

wikifacts

Lifecycle: experimental CRAN status Total Downloads R build status Travis build status Codecov test coverage

An R package which gets facts and data from Wikipedia and Wikidata.

Installation

You can install the released version of wikifacts from CRAN with:

``` {r eval = FALSE} install.packages("wikifacts")

And the development version from [GitHub](https://github.com/) with:

``` {r eval = FALSE}
# install.packages("devtools")
devtools::install_github("keithmcnulty/wikifacts")

Functionality

Examples - Query Wikidata

You can send SPARQL queries to Wikidata using wiki_query() and retrieve the results in a dataframe. If you have never queried Wikidata before, here is a good starting point to construct SPARQL queries and you can find lots of examples here.

In this example, a bar chart is created to show the top ten countries according to the number of cities with female mayors, according to data in Wikidata:

library(wikifacts)
library(ggplot2)

mayor_query <- 'SELECT ?countryLabel (count(*) AS ?count)
WHERE
{
    ?city wdt:P31/wdt:P279* wd:Q515 . # find instances of subclasses of city
    ?city p:P6 ?statement .           # with a P6 (head of goverment) statement
    ?statement ps:P6 ?mayor .         # ... that has the value ?mayor
    ?mayor wdt:P21 wd:Q6581072 .      # ... where the ?mayor has P21 (sex or gender) female
    FILTER NOT EXISTS { ?statement pq:P582 ?x }  # ... but the statement has no P582 (end date) qualifier
    ?city wdt:P17 ?country .          # Also find the country of the city

    # If available, get the "ru" label of the country, use "en" as fallback:
    SERVICE wikibase:label {
        bd:serviceParam wikibase:language "en" .
    }
}
GROUP BY ?countryLabel
ORDER BY DESC(?count)
LIMIT 10'

mayors <- wiki_query(mayor_query)

ggplot(mayors, aes(x = count, y = reorder(countryLabel, count))) +
  geom_bar(stat = "identity", fill = "lightblue") +
  labs(x = "Cities with female mayors",
       y = "",
       title = "Top Ten Countries Based on Number Female Mayors",
       caption = paste("Based on Wikidata as of", format(Sys.Date(), "%d %B %Y")))

Or, a more dark topic, the top twenty countries by number of serial killers born there:

serial_killers <- 'SELECT ?countryLabel (COUNT(?human) AS ?count) WHERE { 
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  ?human wdt:P106 wd:Q484188. # occupation: serial killer
  ?human wdt:P19 ?place_of_birth. # get place of birth
  ?place_of_birth wdt:P17 ?country . # map to country
}
GROUP BY ?countryLabel
ORDER BY DESC(?count)
LIMIT 20'



serialkillers <- wiki_query(serial_killers)

ggplot(serialkillers, aes(x = count, y = reorder(countryLabel, count))) +
  geom_bar(stat = "identity", fill = "darkred") +
  labs(x = "Number of Serial Killers",
       y = "",
       title = "Top 20 Countries Based on Serial Killers Born There",
       caption = paste("Based on Wikidata as of", format(Sys.Date(), "%d %B %Y")))

Examples - Getting definitions of terms from Wikipedia

# Create animals dataframe
animals <- data.frame(
  name = c("kangaroo", "kookaburra", "wombat", "tasmanian devil", "quokka")
)

# get definitions from wikipedia
knitr::kable(
  animals %>% 
    dplyr::mutate(definition = wiki_define(name, sentence = 1))
)

Examples - Retrieving facts from Wikipedia Main Pages

cat(wiki_didyouknow())
cat(wiki_randomfact())

Use with cowsay:

cowsay::say(wiki_randomfact())

Generate multiple random facts:

wiki_randomfact(n_facts = 10, bare_fact = TRUE)

Search Wikipedia (launches browser with results):

wiki_search('R (programming language)')


keithmcnulty/wikifacts documentation built on May 2, 2021, 7:34 a.m.