#' Log in to the Gesis website
#'
#' @param username Your Gesis username
#' @param password Your Gesis password
#'
#' @details
#' The username and password can also be stored as environment variables
#' "GESIS_USER" and "GESIS_PASS" so as not to store these in plaintext in a
#' script.
#'
#' @return A session object
#' @export
#' @import rvest
#' @import xml2
#'
#' @examples
#' \dontrun{s <- login("my_gesis_username", "my_gesis_password")}
login <- function(username = "", password = "") {
if(username == "") username <- Sys.getenv("GESIS_USER")
if(password == "") password <- Sys.getenv("GESIS_PASS")
if(any(username == "", password == "")) {
stop("Please provide username and/or password.", call. = FALSE)
}
url <- "https://dbk.gesis.org/dbksearch/login.asp"
s <- html_session(url)
form <- html_form(s)[[1]]
form <- set_values(form, user = username, pass = password)
form$url <- ""
suppressMessages(submit_form(s, form))
}
#' Download a Gesis data set
#'
#' @param s A session object created with login()
#' @param doi The unique identifier(s) for the data set(s), which might be its
#' Digital Object Identifier (DOI), as in '10.4232/1.12959', or its GESIS/ZACAT
#' identifier, as in '6925'.
#' @param path Directory to which to download the file
#' @param filetype The filetype to download (usually available: .dta/.por/.sav)
#' @param purpose The purpose for downloading the data. See details.
#' @param quiet Whether to output download message.
#'
#' @details Datasets reposited with GESIS are uniquely identified with a
#' numberic identifier called a "DOI". This identifier appears both in the URL
#' for a dataset's website, and on the website itself.
#'
#' In addition to accepting the terms of use, you need to input a purpose for
#' downloading a data set. The options are as follows:
#'
#' 1. for scientific research (incl. PhD)
#' 2. for reserach with commercial mandate
#' 3. for teaching as lecturer
#' 4. for my academic studies
#' 5. for my final exam (e.g. bachelor or master)
#' 6. for professional training and qualification
#'
#' @return Nothing
#' @export
#' @import rvest
#' @import xml2
#' @import httr
#'
#' @examples
#' \dontrun{s <- login("my_gesis_username", "my_gesis_password")
#' download_dataset(s, doi = "0078")}
download_dataset <- function(s, doi, path = ".", filetype = ".dta",
purpose = 1, quiet = FALSE) {
for(d in doi) {
d <- get_gesis_id(d, quiet)
url <- paste0("https://dbk.gesis.org/dbksearch/SDesc2.asp?db=E&no=", d)
s <- jump_to(s, url)
stop_for_status(s)
s <- suppressMessages(
follow_link(s, xpath = sprintf("//a[contains(text(), '%s')]", filetype))
)
stop_for_status(s)
form <- html_form(s)[[1]]
form <- set_values(form, zweck = 1, projectok = 1)
form$url <- ""
s <- suppressMessages(submit_form(s, form))
stop_for_status(s)
if(!quiet) message("Downloading DOI: ", d)
filename <- gsub("^.*?\"|\"", "", s$response$headers$`content-disposition`)
filename <- file.path(path, filename)
writeBin(content(s$response, "raw"), filename)
}
}
#' Download the codebook for a Gesis data set
#'
#' @param doi The unique identifier(s) for the data set(s), which might be its
#' Digital Object Identifier (DOI), as in '10.4232/1.12959', or its GESIS/ZACAT
#' identifier, as in '6925'.
#' @param path Directory to which to download the file
#' @param quiet Whether to output download message.
#'
#' @return Nothing
#' @export
#' @import rvest
#' @import xml2
#' @import httr
#'
#' @examples
#' download_codebook(doi = "0078")
download_codebook <- function(doi, path = ".", quiet = FALSE) {
for(d in doi) {
d <- get_gesis_id(d, quiet)
url <- paste0("https://dbk.gesis.org/dbksearch/SDesc2.asp?db=E&no=", d)
nodename <- paste0("ZA", d, "_cdb.pdf")
page <- read_html(url)
node <- html_nodes(page, xpath = "//a[contains(text(), '_cdb')]")
node <- subset(node, html_text(node) == nodename)
node <- paste0("https://dbk.gesis.org/dbksearch/", html_attr(node, "href"))
resp <- GET(node)
if(!quiet) message("Downloading codebook for DOI: ", d)
filename <- gsub("^.*?\"|\"", "", resp$headers$`content-disposition`)
filename <- file.path(path, filename)
writeBin(content(resp, "raw"), filename)
}
}
#' Get a dataframe with all available groups of studies
#'
#' @return A dataframe
#' @export
#'
#' @import rvest
#' @import xml2
#'
#' @examples
#' \dontrun{
#' groups <- get_study_groups()
#' head(groups)
#' }
get_study_groups <- function() {
url <- "https://dbk.gesis.org/dbksearch/gdesc.asp?db=e"
page <- httr::GET(url)
httr::stop_for_status(page)
page <- httr::content(page)
node <- html_nodes(page, xpath = "//*[@id='ppagingtab']/div[1]/table")
df <- html_table(node)[[1]]
class(df) <- c("tbl_df", "tbl", "data.frame")
df[, 2:3]
}
#' Get a dataframe of all individual data sets within a group of studies
#'
#' @param group_no The group number (usually obtained from get_study_groups())
#'
#' @return A dataframe
#' @export
#'
#' @import rvest
#' @import xml2
#'
#' @examples
#' # Get DOIs and titles for all Eurobarometer studies
#' eurobars <- get_datasets("0008")
#' head(eurobars)
get_datasets <- function(group_no) {
url <- paste0("https://dbk.gesis.org/dbksearch/GDESC2.asp?db=e&no=", group_no)
page <- read_html(url)
nodes <- html_nodes(page, xpath = "//li//a[contains(@href, 'no=')]")
text <- html_nodes(page, xpath = "//li//a[contains(@href, 'no=')]//parent::li")
text <- html_text(text, TRUE)
doi <- substr(text, 3, 6)
title <- substr(text, 8, stop = 10000L)
df <- data.frame(doi, title, stringsAsFactors = FALSE)
class(df) <- c("tbl_df", "tbl", "data.frame")
df[-1, ]
}
#' @param x A DOI or GESIS identifier.
#' @return Hopefully, a string made of a valid 4-digit GESIS identifier.
#' @keywords internal
get_gesis_id <- function(x, quiet = FALSE) {
# if user submitted a string of the form '10.4232/1.13048',
# convert DOI to GESIS identifier
if (grepl("/", x)) {
x <- GET(paste0("https://doi.org/", x))
x <- x$all_headers[[1]]$headers$location
x <- gsub("(.*)no=(\\d{4})(.*)", "\\2", x)
if(!quiet) message("Resolved DOI to GESIS identifier: ", x)
}
# if GESIS identifier is of the form '990',
# add trailing 0 before returning
ifelse(nchar(x) == 3, paste0("0", x), x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.