data_source <- "https://adminportal.springernature.com/metadata/books"
Data is sourced from
r paste0("[Springer Nature Metadata]", "(", data_source, ")")
.
production = TRUE
, use production data (SpringerNature_Books*
).production = FALSE
, use test data (Tests_Springer*
).source("https://raw.githubusercontent.com/jeksterslabds/jeksterslabRutils/master/R/util_txt2file.R") source("https://raw.githubusercontent.com/jeksterslabds/jeksterslabRterm/master/R/term_user_lib.R") term_user_lib()
repos <- "https://cran.rstudio.org" if (!require("remotes")) { install.packages( "remotes", repos = repos ) } pkg <- c( "jeksterslabRpkg", "jeksterslabRutils", "jeksterslabRlib" ) foo <- function(pkg) { if (!require(pkg, character.only = TRUE)) { remotes::install_github( paste0( "jeksterslabds/", pkg ) ) } } invisible( lapply( X = pkg, FUN = foo ) )
tmp_dir <- tempdir() root <- pkg_find_root(pkg_name = "jeksterslabRlib") dir <- "/media/jeksterslib/books/springer" production <- TRUE chkfiles <- FALSE par <- TRUE ncores <- parallel::detectCores() - 1 if (production) { pattern <- "^SpringerNature_Books*" } else { pattern <- "^Test_Springer*" } cat( paste0( "Parameters:\n", "\t", "dir = ", "\"", dir, "\"", "\n", "\t", "production = ", production, "\n", "\t", "chkfiles = ", chkfiles, "\n", "\t", "par = ", par, "\n", "\t", "ncores = ", ncores, "\n" ) )
cat( "Binding Springer Books data from SpringerNature...\n" ) files <- list.files( pattern = glob2rx( paste0( pattern, ".zip" ) ) ) for (i in seq_along(files)) { unzip( zipfile = files[i], exdir = tmp_dir ) } springer_books <- util_bind( dir = tmp_dir, format = "xlsx", pattern = pattern, fn_column = FALSE, save = FALSE ) springer_books_doi <- unique( springer_books[["DOI URL"]] ) springer_books <- springer_books[!duplicated(springer_books[["DOI URL"]]), ]
cat( paste0( "Checking downloaded files in ", dir, "...", "\n" ) ) springer_books_available <- lib_springer_files(dir = dir) doi_pdf <- lib_remove_doi_http( springer_books_available[["available_pdf"]][["DOI URL"]] ) doi_epub <- lib_remove_doi_http( springer_books_available[["available_epub"]][["DOI URL"]] ) doi_root_pdf <- lib_remove_doi_prefix(doi = doi_pdf) doi_root_epub <- lib_remove_doi_prefix(doi = doi_epub)
if (chkfiles) { cat( "Checking and deleting invalid PDF documents...\n" ) util_check_file_type( dir = dir, fn = paste0( doi_root_pdf, ".pdf" ), file_type = "PDF document", remove_files = TRUE, par = par, ncores = ncores ) }
if (chkfiles) { cat( "Checking and deleting invalid EPUB documents...\n" ) util_check_file_type( dir = dir, fn = paste0( doi_root_epub, ".epub" ), file_type = "EPUB document", remove_files = TRUE, par = par, ncores = ncores ) }
lib_springer_files
to get available files after deleting invalid filesif (chkfiles) { springer_books_available <- lib_springer_files( dir = dir ) doi_pdf <- lib_remove_doi_http( springer_books_available[["available_pdf"]][["DOI URL"]] ) doi_epub <- lib_remove_doi_http( springer_books_available[["available_epub"]][["DOI URL"]] ) } cat( paste0( length(doi_pdf), " ", "available PDF files.\n", length(doi_epub), " ", "available EPUB files.\n" ) )
extdata
springer_books_fn <- file.path( root, "inst", "extdata", "springer_books.Rds" ) cat( paste0( "Saving ", springer_books_fn, "...\n" ) ) saveRDS( object = springer_books, file = springer_books_fn, compress = "xz" )
springer_books_available_fn <- file.path( root, "inst", "extdata", "springer_books_available.Rds" ) cat( paste0( "Saving ", springer_books_available_fn, "...\n" ) ) saveRDS( object = springer_books_available, file = springer_books_available_fn, compress = "xz" )
data
springer_books_doi_fn <- file.path( root, "data", "springer_books_doi.rda" ) cat( paste0( "Saving ", springer_books_doi_fn, "...\n" ) ) save( springer_books_doi, file = springer_books_doi_fn, compress = "xz" )
tempdir
util_clean_tempdir()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.