This library is in early development, if you face any problem please open an issue
R Client to access the data portal’s API of King Abdullah Petroleum Studies and Research Center
The documentations of the API can be found here
devtools::install_github("naif-alsader/KapsarcR")
library(KapsarcR)
library(tidyverse)
#> ── Attaching packages ───────────────────────────────────── tidyverse 1.3.0 ──
#> ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
#> ✓ tibble 3.0.3 ✓ dplyr 1.0.0
#> ✓ tidyr 1.1.0 ✓ stringr 1.4.0
#> ✓ readr 1.3.1 ✓ forcats 0.4.0
#> ── Conflicts ──────────────────────────────────────── tidyverse_conflicts() ──
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
datasets<-list_datasets()
The dataset returned from list_datasets()
is not tidy. You can use
clean_dataset()
to get a more tidy dataset
datasets %>%
clean_dataset() %>%
head()
#> # A tibble: 6 x 15
#> dataset_id country title description themes keywords number_of_obser…
#> <chr> <list> <chr> <chr> <list> <list> <int>
#> 1 daily-cha… <NULL> Dail… "Sources:J… <NULL> <chr [1… 895
#> 2 balance-o… <chr [… Bala… "This data… <chr … <chr [8… 7456
#> 3 growth-ra… <chr [… Grow… "<table><t… <chr … <chr [1… 105
#> 4 the-econo… <chr [… The … "About the… <chr … <chr [4… 0
#> 5 electrici… <chr [… Elec… "This data… <chr … <chr [1… 18
#> 6 weekly-us… <chr [… Week… "This data… <chr … <chr [5… 52
#> # … with 8 more variables: count_of_attchments <int>, date_created <chr>,
#> # modified <chr>, reference <chr>, temporal <chr>, fields_label <list>,
#> # fields_type <list>, fields_name <list>
and the column names are
#> [1] "dataset_id" "country" "title"
#> [4] "description" "themes" "keywords"
#> [7] "number_of_observations" "count_of_attchments" "date_created"
#> [10] "modified" "reference" "temporal"
#> [13] "fields_label" "fields_type" "fields_name"
# by Country
datasets<-list_datasets(country = "Saudi Arabia")
# by Theme
datasets<-list_datasets(theme = "Water")
# by Keyword
datasets<-list_datasets(keyword = "Water")
There are two ways filter two arguments:
if both arguments appear together (use operation = and
) the
default value
if either argument one or two appears (use operation = or
)]
datasets<-list_datasets(keyword = "Water", theme = "Water", operation = "or")
datasets<-list_datasets(q = "default.theme = 'Water' or default.theme = 'Transportation'")
datasets<-list_datasets(q = "default.modified > '2019'")
datasets<-list_datasets(q = "default.records_count > 1000000")
The fields that can be used in the query can be found in the meta column
(its actually 4 datasets in one column) of the dataset
datasets$metas$default
or datasets$metas$custom
here is the complete list:
_ default.records_count_, _ default.modified_, _ default.source_domain_address_, _ default.references_, _ default.keyword_, _ default.source_domain_title_, _ default.geographic_reference_, _ default.timezone_, _ default.title_, _ default.parent_domain_, _ default.theme_, _ default.modified_updates_on_data_change_, _ default.metadata_processed_, _ default.data_processed_, _ default.territory_, _ default.description_, _ default.modified_updates_on_metadata_change_, _ default.shared_catalog_, _ default.source_domain_, _ default.attributions_, _ default.geographic_reference_auto_, _ default.publisher_, _ default.language_, _ default.license_, _ default.source_dataset_, _ default.metadata_languages_, _ default.oauth_scope_, _ default.federated_ and _ default.license_url_
_ custom.predecessor_, _ custom.restricted-comment_, _ custom.expected-next-release_, _ custom.country_, _ custom.publisher-periodicity_, _ custom.iso-region_, _ custom.last-checked-date_, _ custom.related-datasets_, _ custom.contact_, _ custom.data-classification_, _ custom.source-copyrights_, _ custom.unit-of-measure_ and _ custom.discontinued-data_
_ dcat.created_, _ dcat.issued_, _ dcat.temporal_, _ dcat.granularity_, _ dcat.contributor_, _ dcat.publisher_type_, _ dcat.contact_email_, _ dcat.accrualperiodicity_, _ dcat.spatial_, _ dcat.dataquality_, _ dcat.contact_name_ and _ dcat.creator_
df <- get_dataset(datasets$dataset_id[1])
library(purrr)
df_list <- map(datasets$dataset_id, get_dataset)
attachments <- get_attachments(datasets$dataset_id[1])
attachments_list <- map(datasets$dataset_id, get_attachments)
download_attachments(attachments)
map(attachments_list, download_attachments)
Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.