Description Usage Arguments Value Examples
A group of functions that use rvest::html_nodes
to extract information
from the Eurostat Comext bulk download repository.
scraplinks
, the main function, extracts links from a web page.
1 2 3 4 5 6 7 8 9 10 11 12 | scraplinks(url)
scrapcomextfoldername(pattern, urlparameter = "dir")
extractfilepath(url, urlparameter = "downfile")
scraplistoffilesincomext(folderurl, urlparameter = "downfile")
scraplistoffilesincomextfolder(
comextfolderpath = getOption("comext")["datafolder"],
extension = ".7z"
)
|
url |
character, an url |
pattern |
character string containing a regular expression, see |
folderurl |
character url of the comext folder of interest |
comextfolderpath |
path on the comext site (subfolder of the "comext" folder) |
extension |
character file extension of interest |
parameter |
character the url parameter where the file path is located |
a data frame of link text and urls
a character vector containing the name of the folder
A comext folder name
a character vector
a data frame containing folder paths and file names
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | ## Not run:
scraplinks("http://localhost/")
glinks <- scraplinks("http://google.com/")
## End(Not run)
## Not run: # Scrap the name of Comext recent and archive folders
# Name of the most recent monthly folder
scrapcomextfoldername(format(Sys.Date(),"\\[%Y"))
# Character escape needed, because "[" and "]" have a special meaning in a regular expression
# Name of the monthly data archive folder.
scrapcomextfoldername("S1\\]")
# Name of the yearly data archive folder
scrapcomextfoldername("S2\\]")
## End(Not run)
# Extract the file path form a Eurostat URL
eurostat_url_1 <- "http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&downfile=comext%2F201706%2Fdata%2Fnc201702.7z"
extractfilepath(eurostat_url_1, "downfile")
eurostat_url_2 <- "http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=comext%2F201706%2Fdata%2Fnc201702.7z"
extractfilepath(eurostat_url_2, "file")
extractfilepath(eurostat_url_2, "nonesense") # returns NA
## Not run: # List files in the given comext folder
# Most recent data folder (url will change through time, this example will break)
recentfiles <- scraplistoffilesincomext("http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&dir=comext%2F201706%2Fdata")
str(recentfiles)
# Archive folder
archive <- scraplistoffilesincomext("http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&dir=comext%2F2016S1%2Fdata")
## End(Not run)
## Not run:
# List files available on the comext metadata page
comextmetadata <- scraplistoffilesincomextfolder(comextfolderpath = getOption("comext")["metadatafolder"],
extension = ".txt")
# List files available on the comext COMEXT_DATA/PRODUCTS page
comextcontent <- scraplistoffilesincomextfolder(comextfolderpath = getOption("comext")["datafolder"]) %>%
# Extract year and month information from the file name
mutate(year = as.numeric(substr(file,5,8)),
month = as.numeric(substr(file,9,10)))
comextcontent$file
# keep only monhly data
comextmonthly <- comextcontent %>%
filter(month < 20)
# filter yearly data
comextyearly <- comextcontent %>%
filter(month > 20)
# keep only monthly data from the past 4 years
comextmonthlyrecent <- comextcontent %>%
filter(year > as.numeric(format(Sys.time(), "%Y")) - 5 &
month < 20)
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.