Nothing
round_url <- function(rounds, format = NULL) { # nocov start
# Get unique rounds to avoid repeting rounds
rounds <- sort(unique(rounds))
# Check rounds
check_rounds(rounds)
round_codes <- paste0("ESS", rounds)
# Extract download urls from selected_rounds
round_links <- sort(grep(pattern = paste0(round_codes, collapse = "|"),
x = get_rounds_link(.global_vars$ess_website),
value = TRUE))
full_url <- get_download_url(round_links, format)
full_url
} # nocov end
country_url <- function(country, rounds, format = NULL) { # nocov start
# And also the rounds for that country
check_country_rounds(country, rounds)
# Only select the rounds that match this:
# /download.html\\?file= for the downoad section
# ESS[0-9] for any of the rounds
# [A-Z]{1,1} followed by a capital letter for
# the country name, as in DK
# (.*) anything in between
# [0-9]{4, } for the year of round
country_regex <- "^/download.html\\?file=ESS[0-9][A-Z]{1,2}(.*)[0-9]{4, }$"
full_urls <- grab_url(country, rounds, country_regex, format)
full_urls
} # nocov end
country_url_sddf <- function(country, rounds, format = NULL) { # nocov start
# And also the rounds for that country
check_country_sddf_rounds(country, rounds)
# Only select the rounds that match this:
# /download.html\\?file= for the downoad section
# ESS[0-9]{1,} for any of the rounds
# [A-Z]{1,1} followed by a capital letter for
# the country name, as in DK
# (.*) anything in between
# [0-9]{4, } for the year of round
sddf_regex <-
"^/download.html\\?file=ESS[0-9]{1,}_[A-Z]{1,2}_SDDF(.*)[0-9]{4, }$"
full_urls <- grab_url(country, rounds, sddf_regex, format)
full_urls
} # nocov end
country_url_sddf_late_rounds <- function(country, rounds, format = NULL) { # nocov start
# And also the rounds for that country
check_country_sddf_rounds(country, rounds)
# No need to add regex as in country_url because grabbing the rounds
# returns the direct paths
full_urls <- grab_url_sddf_late_rounds(rounds, format)
full_urls
} # nocov end
grab_url <- function(country, rounds, regex, format) { # nocov start
# Get unique rounds to avoid repeting rounds
rounds <- sort(unique(rounds))
# Get unique country to avoid repetitions
country <- sort(unique(country))
# Returns the chosen countries html that contains
# the links to all round website.
country_round_html <- extract_country_html(country,
.global_vars$country_index)
# Go deeper in the node to grab that countries url to the rounds
country_node <- xml2::xml_find_all(country_round_html, "//ul //li //a")
# Here we have all href from the website
country_href <- xml2::xml_attrs(country_node, "href")
incomplete_links <-
sort(grep(regex,
country_href,
value = TRUE))
# Extract round numbers
round_numbers <- as.numeric(string_extract(incomplete_links,
"[0-9]{1,2}"))
# Build final ESS round links
round_links <- incomplete_links[which(round_numbers %in% rounds)]
# Using each separate round html, extract the stata or spss direct link
# to download the data. Return the full direct path.
full_urls <- get_download_url(round_links, format)
full_urls
} # nocov end
grab_url_sddf_late_rounds <- function(rounds, format) { #nocov start
# Get unique rounds to avoid repeting rounds
rounds <- sort(unique(rounds))
# Returns the all late SDDF rounds main page
late_sddf_mainpage <-
get_href(
.global_vars$ess_website,
.global_vars$sddf_index,
"//ul [@class='docliste']//a"
)
# Here we have all late SDDF main websites
late_sddf_href <- xml2::xml_attrs(late_sddf_mainpage, "href")
# Extract round numbers
round_numbers <-
as.numeric(
string_extract(late_sddf_href, "[0-9]{1,}")
)
# Build final ESS round links
round_links <- late_sddf_href[which(round_numbers %in% rounds)]
# Using each separate round html, extract the stata or spss direct link
# to download the data. Return the full direct path.
full_urls <- get_download_url(round_links, format)
full_urls
} #nocov end
# Given a country/round website such as https://www.europeansocialsurvey.org/download.html?file=ESS1ES&c=ES&y=2002,
# extracts the stata (or spss, in that specific order if one is not available) path and
# returns the complete url path to download the data
get_download_url <- function(round_links, format) { # nocov start
# Create empty character slot with the same length as round links
# to populate it in the loop
format.files <- character(length(round_links))
# Build paths for each round
for (index in seq_along(round_links)) {
download_page <- safe_GET(paste0(.global_vars$ess_website,
round_links[index]))
html_ess <- xml2::read_html(download_page)
z <- xml2::xml_text(xml2::xml_find_all(html_ess, "//a/@href"))
format.files[index] <- format_preference(z, format)
}
full_urls <- sort(paste0(.global_vars$ess_website, format.files))
full_urls
} # nocov end
# Function accepts the chosen country and the list
# of all available countries and returns the html doc
# for the chosen country that contains the whole list of
# links to download that countries rounds.
extract_country_html <- function(chosen_module, module_index) { # nocov start
country_refs <- get_href(.global_vars$ess_website, module_index)
# Returns "/data/country.html?c=ukraine" for all countries
all_module_links <-
xml2::xml_attr(
country_refs,
"href"
)
all_module_countries <- xml2::xml_text(country_refs)
# Build full url for chosen country
chosen_module_link <-
paste0(
all_module_links[which(chosen_module == all_module_countries)]
# index where the country because all_module_links and
# all_module_countries are in the same order
)
# Extract html from country link to donwnload rounds
module_rounds <- safe_GET(chosen_module_link)
module_round_html <- xml2::read_html(module_rounds)
module_round_html
} # nocov end
# Function to grab <a href="/data/country.html?c=latvia">Latvia</a>
# for each country. The default href path should take all td lists
# of class labelled (most ESS HTML lists) but for SDDF this changes
# to all li lists of clas docliste
get_href <- function(ess_website,
module_index,
href_xpath = "//td [@class='label']//a") { # nocov start
download_page <- safe_GET(paste0(ess_website, module_index))
country_node <-
xml2::xml_find_all(
xml2::read_html(download_page),
href_xpath
)
country_node
} # nocov end
# This will return a link like "/download.html?file=ESS8e01&y=2016"
# for every round available in the ess website.
get_rounds_link <- function(ess_website) { # nocov start
download_page <- safe_GET(paste0(ess_website, "/data/download.html?r="))
html_ess <- xml2::read_html(download_page)
z <- xml2::xml_text(xml2::xml_find_all(html_ess, "//a/@href"))
downloads <- unique(grep("^/download.html(.*)[0-9]{4, }$", z, value = TRUE))
downloads
} # nocov end
# Function to automatically clean attributes such as
# <a> something <\a>. This will clean to: something
# if a and /a are specified as arguments.
clean_attr <- function(x, ...) { # nocov start
other_attrs <- paste0((list(...)), collapse = "|")
gsub(paste0(">|", "<|", other_attrs), "", x)
} # nocov end
# Function to find regex in x with preference in `formats`
format_preference <- function(x, format) { # nocov start
format <- if (is.null(format)) c("stata", "spss", "sas") else format
# If the user didn't supply the format (user can only supply only one
# format and the format arg was manually imputed from above)
if (length(format) > 1) {
# Read each format and return the one that has a match
for (i in format) {
format_link <- grep(i, x, value = TRUE)
if (length(format_link) > 0) return(format_link)
}
} else {
# If the user supplied the format, try reading that format
# return the link if found or an error otherwise
format_link <- grep(format, x, value = TRUE)
if (length(format_link) > 0) {
return(format_link)
} else {
stop(paste0("Format '", format, "' not available."))
}
}
} # nocov end
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.