#' Get Survey Datasets for DCPO
#'
#' \code{get_surveys} downloads the survey datasets to be used to estimate public opinion across countries and over time
#'
#' @param vars a data frame (or, optionally, a vector of survey names or a .csv file) of survey items
#' @param datapath path to the directory that will house the raw survey datasets
#' @param chime play chime when complete?
#
#' @details \code{get_surveys}, when passed a data frame of survey items, downloads the
#' source survey datasets, converts them to .RData format, and saves them to a specified
#' directory for later use by \code{dcpo_setup}. When constructing a list of surveys, one
#' should be sure to use the name employed in \code{DCPO}'s built-in \code{surveys_data} database:
#' \code{View(surveys_data)}.
#'
#' @return the function downloads datasets
#'
#' @import icpsrdata
#' @import pewdata
#' @import ropercenter
#' @import gesisdata
#' @import rvest
#' @import RSelenium
#' @importFrom rio convert export
#' @importFrom stringr str_replace str_subset str_detect regex
#' @importFrom essurvey download_rounds
#' @importFrom dataverse get_dataset get_file
#' @importFrom httr content
#' @importFrom purrr walk walk2 pwalk
#' @importFrom haven read_por read_dta
#' @importFrom foreign read.spss
#' @importFrom tools file_ext file_path_sans_ext
#' @importFrom utils unzip download.file
#'
#' @export
get_surveys <- function(vars,
datapath = "../data/dcpo_surveys",
chime = TRUE) {
if ("data.frame" %in% class(vars)) {
vars_table <- vars
} else if (file.exists(vars[1])) {
vars_table <- readr::read_csv(vars, col_types = "cccc")
} else {
vars_table <- tibble(survey = vars)
}
ds <- surveys_data %>%
filter(survey %in% vars_table$survey) %>%
mutate(dl_dir = file.path(datapath,
paste0(archive, "_files"),
paste0(surv_program, "_files")),
new_dir = file.path(dl_dir, file_id),
file_exists = purrr::map_lgl(new_dir, function(x) {
list.files(path = x) %>%
str_subset(".RData") %>%
length() > 0
})) %>%
filter(!file_exists)
# Gesis
gesis_ds <- ds %>%
filter(archive == "gesis")
if (nrow(gesis_ds) > 0) {
gesis_sp <- gesis_ds %>%
select(surv_program) %>%
unique() %>%
unlist()
walk(gesis_sp, function(sp) {
gesis_sp_files <- gesis_ds %>%
filter(surv_program == sp) %>%
select(file_id) %>%
unlist()
gesisdata::gesis_download(file_id = gesis_sp_files,
download_dir = file.path(datapath,
"gesis_files",
paste0(sp, "_files")))
Sys.sleep(2)
})
}
# ICPSR
icpsr_ds <- ds %>%
filter(archive == "icpsr")
if (nrow(icpsr_ds) > 0) {
pwalk(icpsr_ds, function(file_id, dl_dir, read_ascii_args, wt, ...) {
icpsr_id <- file_id %>% str_replace("ICPSR_", "") %>% as.numeric(file_id)
icpsrdata::icpsr_download(icpsr_id, download_dir = dl_dir)
new_dir <- file.path(dl_dir, paste0("ICPSR_", icpsr_id %>% sprintf("%05d", .)))
new_dir2 <- file.path(dl_dir, paste0("ICPSR_", icpsr_id %>% sprintf("%05d", .)), "DS0001")
data_file <- list.files(path = new_dir2) %>% str_subset("\\.dta") %>% last()
if (is.na(data_file)) {
data_file <- list.files(path = new_dir2) %>%
str_subset("\\.por") %>%
last()
}
if (is.na(data_file)) {
data_file <- list.files(path = new_dir2) %>%
str_subset("\\.txt") %>%
last()
file_path <- file.path(new_dir2, data_file)
x <- do.call(ropercenter::read_ascii, eval(parse(text = read_ascii_args)))
if (!is.na(wt)) {
x <- x %>%
mutate(weight0 = as.numeric(x[[ds$wt]] %>% stringr::str_trim()),
weight = weight0/mean(weight0))
}
rio::export(x, file.path(new_dir, str_replace(data_file, "txt$", "RData")))
}
if (str_detect(data_file, ".por")) {
# workaround for rio bug importing .por
haven::read_por(file.path(new_dir2, data_file)) %>%
rio::export(str_replace(file.path(new_dir, data_file), ".por", ".RData"))
} else {
tryCatch(rio::convert(file.path(new_dir2, data_file),
str_replace(file.path(new_dir, data_file), ".dta", ".RData")),
error = function(c) suppressWarnings(
foreign::read.dta(file.path(new_dir2, data_file),
convert.factors = FALSE) %>%
rio::export(str_replace(file.path(new_dir, data_file), ".dta", ".RData"))
)
)
}
})
}
# Pew
pew_ds <- ds %>%
filter(archive == "pew")
if (nrow(pew_ds) > 0) {
pew_sp <- pew_ds %>%
select(surv_program) %>%
unique() %>%
unlist()
walk(pew_sp, function(sp) {
pew_sp_files <- pew_ds %>%
filter(surv_program == sp) %>%
select(file_id) %>%
unlist()
pewdata::pew_download(area = sp,
file_id = pew_sp_files,
download_dir = file.path(datapath,
"pew_files",
paste0(sp, "_files")))
Sys.sleep(2)
})
}
# Roper Center
roper_ds <- ds %>%
filter(archive == "roper")
if (nrow(roper_ds) > 0) {
roper_sp <- roper_ds %>%
select(surv_program) %>%
unique() %>%
unlist()
walk(roper_sp, function(sp) {
roper_sp_files <- roper_ds %>%
filter(surv_program == sp) %>%
select(file_id) %>%
unlist()
ropercenter::roper_download(file_id = roper_sp_files,
download_dir = file.path("../data/dcpo_surveys/roper_files",
paste0(sp, "_files")))
Sys.sleep(2)
})
}
# Roper ASCII files
roper_ascii_files <- roper_ds %>%
filter(!is.na(read_ascii_args)) %>%
pull(file_id)
if (length(roper_ascii_files) > 0) {
walk(roper_ascii_files, function(file) {
ra_ds <- ds %>%
filter(file_id %in% file)
dir_path <- file.path("../data/dcpo_surveys/roper_files",
paste0(ra_ds$surv_program, "_files"),
file)
file_path <- file.path(dir_path, list.files(dir_path, pattern = ".dat"))
x <- do.call(read_ascii, eval(parse(text = ra_ds$read_ascii_args)))
if (!is.na(ra_ds$wt)) {
x <- x %>%
mutate(weight0 = as.numeric(weight %>% stringr::str_trim()),
weight = if_else(is.na(weight0), 1, weight0/mean(weight0, na.rm = TRUE)))
}
rio::export(x, str_replace(file_path, "dat$", "RData"))
})
}
# European Social Survey
ess_ds <- ds %>%
filter(surv_program == "ess" & is.na(data_link))
if (nrow(ess_ds) > 0) {
pwalk(ess_ds, function(survey, new_dir, dl_dir, ...) {
dir.create(new_dir, recursive = TRUE, showWarnings = FALSE)
suppressWarnings(essurvey::download_rounds(rounds = as.numeric(str_extract(survey, "\\d+")),
output_dir = dl_dir))
data_file <- list.files(path = new_dir) %>%
str_subset("\\.dta") %>%
last()
tryCatch(rio::convert(file.path(new_dir, data_file),
paste0(tools::file_path_sans_ext(file.path(new_dir, data_file)), ".RData")),
error = function(c) {
haven::read_dta(file.path(new_dir, data_file), encoding = "latin1") %>%
rio::export(paste0(tools::file_path_sans_ext(file.path(new_dir, data_file)), ".RData"))
}
)
})
}
# Dataverse
dataverse_ds <- ds %>%
filter(archive == "dataverse")
if (nrow(dataverse_ds) > 0) {
pwalk(dataverse_ds, function(file_id, data_link, new_dir, ...) {
dir.create(new_dir, recursive = TRUE, showWarnings = FALSE)
dataverse_server <- str_extract(data_link, "data[.a-z]*")
dataverse_doi <- str_extract(data_link, "(?<=Id=).*$")
Sys.setenv("DATAVERSE_SERVER" = dataverse_server)
dataverse_info <- dataverse::get_dataset(dataverse_doi)
dataverse_ids <- dataverse_info$files %>%
janitor::clean_names() %>%
select(label, id)
walk2(dataverse_ids$label, dataverse_ids$id, function(name, id) {
name2 <- ifelse(!(any(str_detect(dataverse_ids$label, "dta"))) & tools::file_ext(name) == "tab",
paste0(tools::file_path_sans_ext(name), ".dta"),
name)
if (file_id == "autnes2017") {
f <- dataverse::get_file(file = id, dataset = dataverse_doi, key = Sys.getenv("AUSSDA_KEY"))
} else {
f <- dataverse::get_file(file = id, dataset = dataverse_doi)
}
writeBin(as.vector(f), file.path(new_dir, name2))
})
data_file <- list.files(path = new_dir) %>% str_subset("dta") %>% last()
if (file.exists(file.path(new_dir, data_file))) {
tryCatch(haven::read_dta(file.path(new_dir, data_file), encoding = "latin1") %>%
rio::export(file.path(new_dir, paste0(file_id, ".RData"))),
error = function(c) suppressWarnings(
rio::convert(file.path(new_dir, str_replace(data_file, ".dta", ".sas7bdat")),
file.path(new_dir, paste0(file_id, ".RData")))
)
)
} else {
zip_file <- list.files(path = new_dir) %>% str_subset("(STATA|stata).*zip$")
utils::unzip(file.path(new_dir, zip_file), exdir = new_dir)
data_file <- list.files(path = new_dir) %>% str_subset("dta") %>% last()
haven::read_dta(file.path(new_dir, data_file), encoding = "latin1") %>%
rio::export(file.path(new_dir, paste0(file_id, ".RData")))
}
})
}
# Misc
misc_ds <- ds %>%
filter(archive == "misc")
if (nrow(misc_ds) > 0) {
pwalk(misc_ds, function(new_dir, data_link, cb_link, file_id, ...) {
if (!(is.na(data_link))) {
dir.create(new_dir, recursive = TRUE, showWarnings = FALSE)
dl_file <- str_extract(data_link, "[^//]*$")
download.file(data_link, file.path(new_dir, dl_file))
if (str_detect(dl_file, "zip$")) {
utils::unzip(file.path(new_dir, dl_file), exdir = new_dir)
unlink(file.path(new_dir, list.files(new_dir, ".zip")))
}
data_file <- list.files(path = new_dir) %>%
str_subset(regex("\\.dta", ignore_case = TRUE)) %>%
last()
if (is.na(data_file)) {
data_file <- list.files(path = new_dir) %>%
str_subset(regex("\\.sav", ignore_case = TRUE)) %>%
last()
}
if (tools::file_ext(data_file) != "") {
tryCatch(rio::convert(file.path(new_dir, data_file),
paste0(file.path(new_dir, file_id), ".RData")),
error = function(c) {
haven::read_dta(file.path(new_dir, data_file), encoding = "latin1") %>%
rio::export(paste0(file.path(new_dir, file_id), ".RData"))
}
)
}
if (!is.na(cb_link)) {
download.file(cb_link, file.path(new_dir, paste0(file_id, ".pdf")))
}
}
})
}
# UK Data Service
# ukds_ds <- ds %>%
# filter(archive == "ukds")
# ukds_sp <- ukds_ds %>%
# select(surv_program) %>%
# unique() %>%
# unlist()
# walk(ukds_sp, function(sp) {
# ukds_sp_files <- ukds_ds %>%
# filter(surv_program == sp) %>%
# select(file_id) %>%
# unlist()
# ukds::ukds_download(file_id = ukds_sp_files,
# download_dir = file.path("../data/dcpo_surveys/ukds_files",
# paste0(sp, "_files")))
# })
# Poland GSS
pgss_ds <- ds %>%
filter(survey == "pgss")
if (nrow(pgss_ds) > 0) {
new_dir <- pgss_ds$new_dir[1]
dir.create(new_dir, recursive = TRUE, showWarnings = FALSE)
login_link <- "http://www.ads.org.pl/log.php?id=91"
s <- html_session(login_link)
s1 <- html_form(s)[[1]] %>%
set_values(log = getOption("ads_login"),
pas = getOption("ads_password"))
s2 <- submit_form(s, s1) %>%
jump_to("http://www.ads.org.pl/dnldal.php?id=91&nazwa=P0091SAV.zip")
file_dir <- file.path(new_dir, "P0091SAV.zip")
writeBin(httr::content(s2$response, "raw"), file_dir)
unzip(file_dir, exdir = new_dir)
unlink(file_dir)
data_file <- list.files(path = new_dir) %>%
str_subset("\\.sav") %>%
last()
suppressWarnings(
foreign::read.spss(file.path(new_dir, data_file),
to.data.frame = TRUE,
use.value.labels = FALSE) %>%
rio::export(paste0(tools::file_path_sans_ext(file.path(new_dir, data_file)), ".RData"))
)
}
# WVS
wvs_ds <- ds %>%
filter(survey == "wvs_combo")
if (nrow(wvs_ds) > 0) {
new_dir <- wvs_ds$new_dir[1]
# build path to chrome's default download directory
if (Sys.info()[["sysname"]]=="Linux") {
default_dir <- file.path("home", Sys.info()[["user"]], "Downloads")
} else {
default_dir <- file.path("", "Users", Sys.info()[["user"]], "Downloads")
}
# get list of current default download directory contents
dd_old <- list.files(default_dir)
# create target directory if necessary
dir.create(new_dir, recursive = TRUE, showWarnings = FALSE)
nd_old <- list.files(new_dir)
wvs_page <- "http://www.worldvaluessurvey.org/WVSDocumentationWVL.jsp"
rD <- RSelenium::rsDriver(browser = "chrome")
remDr <- rD[["client"]]
remDr$navigate(wvs_page)
webElem <- remDr$findElement(using = "tag name", "body")
webElem$clickElement()
webElem$sendKeysToElement(list(key = "end"))
elem <- remDr$findElements(using = "tag name", "iframe")
remDr$switchToFrame(elem[[1]])
elem1 <- remDr$findElements(using = "tag name", "iframe")
remDr$switchToFrame(elem1[[1]])
remDr$findElement(using = "partial link text", "stata")$clickElement()
Sys.sleep(3)
remDr$findElement(using = "name", "LINOMBRE")$sendKeysToElement(list(getOption("pew_name")))
remDr$findElement(using = "name", "LIEMPRESA")$sendKeysToElement(list(getOption("pew_org")))
remDr$findElement(using = "name", "LIEMAIL")$sendKeysToElement(list(getOption("pew_email")))
acad_proj <- "a"
remDr$findElement(using = 'xpath', "//select")$sendKeysToElement(list(acad_proj))
webElem <- remDr$findElement(using = "tag name", "body")
webElem$clickElement()
webElem$sendKeysToElement(list(key = "end"))
remDr$findElement(using = "name", "LIAGREE")$clickElement()
remDr$findElement(using = "class", "AJDocumentDownloadBtn")$clickElement()
remDr$acceptAlert()
# check that download has completed
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
wait <- TRUE
tryCatch(
while(all.equal(stringr::str_detect(dd_new, "\\.part$"), logical(0))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}, error = function(e) 1 )
while(any(stringr::str_detect(dd_new, "\\.crdownload$"))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}
# unzip into specified directory and convert to .RData
unzip(file.path(default_dir, dd_new), exdir = file.path(new_dir))
unlink(file.path(default_dir, dd_new))
data_file <- list.files(new_dir)[!list.files(new_dir) %in% nd_old]
haven::read_dta(file.path(new_dir, data_file), encoding = "latin1") %>%
rio::export(file.path(new_dir, paste0(wvs_ds$survey[1], ".RData")))
remDr$close()
rD[["server"]]$stop()
}
wvs4_ds <- ds %>%
filter(survey == "wvs4_swe")
if (nrow(wvs4_ds) > 0) {
new_dir <- wvs4_ds$new_dir[1]
# build path to chrome's default download directory
if (Sys.info()[["sysname"]]=="Linux") {
default_dir <- file.path("home", Sys.info()[["user"]], "Downloads")
} else {
default_dir <- file.path("", "Users", Sys.info()[["user"]], "Downloads")
}
# get list of current default download directory contents
dd_old <- list.files(default_dir)
# create target directory if necessary
dir.create(new_dir, recursive = TRUE, showWarnings = FALSE)
nd_old <- list.files(new_dir)
wvs_page <- "http://www.worldvaluessurvey.org/WVSDocumentationWV4.jsp"
rD <- RSelenium::rsDriver(browser = "chrome")
remDr <- rD[["client"]]
remDr$navigate(wvs_page)
webElem <- remDr$findElement(using = "tag name", "body")
webElem$clickElement()
webElem$sendKeysToElement(list(key = "end"))
elem <- remDr$findElements(using = "tag name", "iframe")
remDr$switchToFrame(elem[[1]])
elem1 <- remDr$findElements(using = "tag name", "iframe")
remDr$switchToFrame(elem1[[1]])
Sys.sleep(2)
remDr$findElement(using = "partial link text", "stata")$clickElement()
Sys.sleep(3)
remDr$findElement(using = "name", "LINOMBRE")$sendKeysToElement(list(getOption("pew_name")))
remDr$findElement(using = "name", "LIEMPRESA")$sendKeysToElement(list(getOption("pew_org")))
remDr$findElement(using = "name", "LIEMAIL")$sendKeysToElement(list(getOption("pew_email")))
acad_proj <- "a"
remDr$findElement(using = 'xpath', "//select")$sendKeysToElement(list(acad_proj))
webElem <- remDr$findElement(using = "tag name", "body")
webElem$clickElement()
webElem$sendKeysToElement(list(key = "end"))
remDr$findElement(using = "name", "LIAGREE")$clickElement()
remDr$findElement(using = "class", "AJDocumentDownloadBtn")$clickElement()
remDr$acceptAlert()
# check that download has completed
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
wait <- TRUE
tryCatch(
while(all.equal(stringr::str_detect(dd_new, "\\.part$"), logical(0))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}, error = function(e) 1 )
while(any(stringr::str_detect(dd_new, "\\.crdownload$"))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}
# unzip into specified directory and convert to .RData
unzip(file.path(default_dir, dd_new), exdir = file.path(new_dir))
unlink(file.path(default_dir, dd_new))
data_file <- list.files(new_dir)[!list.files(new_dir) %in% nd_old]
haven::read_dta(file.path(new_dir, data_file), encoding = "latin1") %>%
rio::export(file.path(new_dir, paste0(wvs4_ds$survey[1], ".RData")))
remDr$close()
rD[["server"]]$stop()
}
wvs6_ds <- ds %>%
filter(survey == "wvs6_bahrain")
if (nrow(wvs6_ds) > 0) {
new_dir <- wvs6_ds$new_dir[1]
# build path to chrome's default download directory
if (Sys.info()[["sysname"]]=="Linux") {
default_dir <- file.path("home", Sys.info()[["user"]], "Downloads")
} else {
default_dir <- file.path("", "Users", Sys.info()[["user"]], "Downloads")
}
# get list of current default download directory contents
dd_old <- list.files(default_dir)
# create target directory if necessary
dir.create(new_dir, recursive = TRUE, showWarnings = FALSE)
nd_old <- list.files(new_dir)
wvs_page <- "http://www.worldvaluessurvey.org/WVSDocumentationWV6.jsp"
rD <- RSelenium::rsDriver(browser = "chrome")
remDr <- rD[["client"]]
remDr$navigate(wvs_page)
webElem <- remDr$findElement(using = "tag name", "body")
webElem$clickElement()
webElem$sendKeysToElement(list(key = "end"))
elem <- remDr$findElements(using = "tag name", "iframe")
remDr$switchToFrame(elem[[1]])
elem1 <- remDr$findElements(using = "tag name", "iframe")
remDr$switchToFrame(elem1[[1]])
Sys.sleep(2)
remDr$findElement(using = "partial link text", "2016 01 01 (Stata")$clickElement()
Sys.sleep(3)
remDr$findElement(using = "name", "LINOMBRE")$sendKeysToElement(list(getOption("pew_name")))
remDr$findElement(using = "name", "LIEMPRESA")$sendKeysToElement(list(getOption("pew_org")))
remDr$findElement(using = "name", "LIEMAIL")$sendKeysToElement(list(getOption("pew_email")))
acad_proj <- "a"
remDr$findElement(using = 'xpath', "//select")$sendKeysToElement(list(acad_proj))
webElem <- remDr$findElement(using = "tag name", "body")
webElem$clickElement()
webElem$sendKeysToElement(list(key = "end"))
remDr$findElement(using = "name", "LIAGREE")$clickElement()
remDr$findElement(using = "class", "AJDocumentDownloadBtn")$clickElement()
remDr$acceptAlert()
# check that download has completed
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
wait <- TRUE
tryCatch(
while(all.equal(stringr::str_detect(dd_new, "\\.part$"), logical(0))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}, error = function(e) 1 )
while(any(stringr::str_detect(dd_new, "\\.crdownload$"))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}
# unzip into specified directory and convert to .RData
unzip(file.path(default_dir, dd_new), exdir = file.path(new_dir))
unlink(file.path(default_dir, dd_new))
data_file <- list.files(new_dir)[!list.files(new_dir) %in% nd_old]
haven::read_dta(file.path(new_dir, data_file), encoding = "latin1") %>%
rio::export(file.path(new_dir, paste0(wvs4_ds$survey[1], ".RData")))
remDr$close()
rD[["server"]]$stop()
}
wvs7_ds <- ds %>%
filter(survey == "wvs7")
if (nrow(wvs7_ds) > 0) {
new_dir <- wvs7_ds$new_dir[1]
# build path to chrome's default download directory
if (Sys.info()[["sysname"]]=="Linux") {
default_dir <- file.path("home", Sys.info()[["user"]], "Downloads")
} else {
default_dir <- file.path("", "Users", Sys.info()[["user"]], "Downloads")
}
# get list of current default download directory contents
dd_old <- list.files(default_dir)
# create target directory if necessary
dir.create(new_dir, recursive = TRUE, showWarnings = FALSE)
nd_old <- list.files(new_dir)
wvs_page <- "http://www.worldvaluessurvey.org/WVSDocumentationWV7.jsp"
rD <- RSelenium::rsDriver(browser = "chrome")
remDr <- rD[["client"]]
remDr$navigate(wvs_page)
webElem <- remDr$findElement(using = "tag name", "body")
webElem$clickElement()
webElem$sendKeysToElement(list(key = "end"))
Sys.sleep(7)
elem <- remDr$findElements(using = "tag name", "iframe")
remDr$switchToFrame(elem[[1]])
Sys.sleep(5)
elem1 <- remDr$findElements(using = "tag name", "iframe")
remDr$switchToFrame(elem1[[1]])
Sys.sleep(2)
remDr$findElement(using = "partial link text", "stata")$clickElement()
Sys.sleep(3)
remDr$findElement(using = "name", "LINOMBRE")$sendKeysToElement(list(getOption("pew_name")))
remDr$findElement(using = "name", "LIEMPRESA")$sendKeysToElement(list(getOption("pew_org")))
remDr$findElement(using = "name", "LIEMAIL")$sendKeysToElement(list(getOption("pew_email")))
acad_proj <- "a"
remDr$findElement(using = 'xpath', "//select")$sendKeysToElement(list(acad_proj))
webElem <- remDr$findElement(using = "tag name", "body")
webElem$clickElement()
webElem$sendKeysToElement(list(key = "end"))
remDr$findElement(using = "name", "LIAGREE")$clickElement()
remDr$findElement(using = "class", "AJDocumentDownloadBtn")$clickElement()
remDr$acceptAlert()
# check that download has completed
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
wait <- TRUE
tryCatch(
while(all.equal(stringr::str_detect(dd_new, "\\.part$"), logical(0))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}, error = function(e) 1 )
while(any(stringr::str_detect(dd_new, "\\.crdownload$"))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}
# unzip into specified directory and convert to .RData
unzip(file.path(default_dir, dd_new), exdir = file.path(new_dir))
unlink(file.path(default_dir, dd_new))
data_file <- list.files(new_dir)[!list.files(new_dir) %in% nd_old]
haven::read_dta(file.path(new_dir, data_file), encoding = "latin1") %>%
rio::export(file.path(new_dir, paste0(wvs7_ds$survey[1], ".RData")))
remDr$close()
rD[["server"]]$stop()
}
# LatinoBarometro
lb_ds <- ds %>%
filter(surv_program == "lb")
if (nrow(lb_ds) > 0) {
pwalk(lb_ds, function(year, survey, dl_dir, new_dir, ...) {
# build path to chrome's default download directory
if (Sys.info()[["sysname"]]=="Linux") {
default_dir <- file.path("home", Sys.info()[["user"]], "Downloads")
} else {
default_dir <- file.path("", "Users", Sys.info()[["user"]], "Downloads")
}
# get list of current default download directory contents
dd_old <- list.files(default_dir)
# create target directory if necessary
dir.create(new_dir, recursive = TRUE, showWarnings = FALSE)
nd_old <- list.files(new_dir)
lb_link <- "http://www.latinobarometro.org/latContents.jsp"
rD <- RSelenium::rsDriver(browser = "chrome")
remDr <- rD[["client"]]
# download file
remDr$navigate(lb_link)
remDr$findElement(using = "link text", "Banco de Datos")$clickElement()
Sys.sleep(3)
remDr$findElement(using = "css selector", paste0("a[href*='", year, "_dta", "']"))$clickElement()
Sys.sleep(3)
if (file_year<=2015) {
remDr$findElement(using = "name", "FANOMBRE")$sendKeysToElement(list(getOption("pew_name")))
remDr$findElement(using = "name", "FAEMPRESA")$sendKeysToElement(list(getOption("pew_org")))
remDr$findElement(using = "name", "FAEMAIL")$sendKeysToElement(list(getOption("pew_email")))
proj_acad <- "p"
remDr$findElement(using = 'xpath', "//select")$sendKeysToElement(list(proj_acad))
remDr$findElement(using = "name", "FAAGREE")$clickElement()
remDr$findElement(using = "class", "LATBoton")$clickElement()
Sys.sleep(3)
remDr$findElement(using = "class", "LATBoton")$clickElement()
}
# check that download has completed
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
wait <- TRUE
tryCatch(
while(all.equal(stringr::str_detect(dd_new, "\\.part$"), logical(0))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}, error = function(e) 1 )
while(any(stringr::str_detect(dd_new, "\\.crdownload$"))) {
Sys.sleep(1)
dd_new <- list.files(default_dir)[!list.files(default_dir) %in% dd_old]
}
# unzip into specified directory and convert to .RData
unzip(file.path(default_dir, dd_new), exdir = file.path(new_dir))
unlink(file.path(default_dir, dd_new))
data_file <- list.files(path = new_dir) %>%
str_subset(".*[Ee]ng.*\\.dta") %>%
last()
haven::read_dta(file.path(new_dir, data_file), encoding = "latin1") %>%
rio::export(paste0(file.path(new_dir, survey), ".RData"))
# close session
remDr$close()
rD[["server"]]$stop()
})
}
# TODO: should return dataframe of surveys not downloaded
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.