varLabel_url <- function(url,file){
if (do::file.name(file)=='rxq_drug.varLabel'){
wait <- TRUE
while (wait){
html <- tryCatch(xml2::read_html(url), error=function(e) 'e')
wait <- ifelse(is.character(html),TRUE,FALSE)
}
firs_publish <- html |>
rvest::html_elements(xpath = '//div[@id="PageHeader"]//h5') |>
set::grep_and('First Published') |>
rvest::html_text() |>
do::fmt(x = '#/ ') |>
do::Replace0('\t','\n','\r')
last_revise <- html |>
rvest::html_elements(xpath = '//div[@id="PageHeader"]//h5') |>
set::grep_and('Last Revised') |>
rvest::html_text() |>
do::fmt(x = '#/ ') |>
do::Replace0('\t','\n','\r')
df <- html |>
rvest::html_elements(xpath = '//div[@id="Sections"]') |>
rvest::html_table() |>
do::list1() |>
as.data.frame()
colnames(df) <- tolower(colnames(df))
for (i in 1:ncol(df)) {
df[,i] <- tolower(df[,i])
}
df <- df[,c("variable name","label")]
df <- df[nchar(df[[1]])>0,]
colnames(df)[1] <- 'variable'
df <- cbind(df,url)
suppressWarnings(write.table(firs_publish,file,row.names = FALSE,col.names = FALSE,quote = FALSE))
suppressWarnings(write.table(last_revise,file,row.names = FALSE,col.names = FALSE,append = TRUE,quote = FALSE))
suppressWarnings(write.table(df,file,append = TRUE,sep = '\t',row.names = FALSE))
invisible('ok')
}else{
if (tools::file_ext(url)=='pdf'){
pdf <- paste0(do::Replace0(file,tools::file_ext(file)),'pdf')
if (file.exists(pdf)) file.remove(pdf)
if (file.exists(pdf)) unlink(pdf,force = TRUE)
cat(crayon::bgWhite(' pdf'))
nullcon <- file(nullfile(), open = "wb")
sink(nullcon, type = "message")
# download
download.file(url,pdf)
sink(type = "message")
close(nullcon)
if (!file.exists(file)) varLab_NULL("#firs_publish:pdf",'#last_revise:pdf',file)
return(invisible('pdf'))
}
wait <- TRUE
while (wait) {
html <- tryCatch(xml2::read_html(url), error=function(e) 'e')
wait <- ifelse(is.character(html),TRUE,FALSE)
}
firs_publish <- html |>
rvest::html_elements(xpath = '//div[@id="PageHeader"]//h5') |>
set::grep_and('First Published') |>
rvest::html_text() |>
do::fmt(x = '#/ ') |>
do::Replace0('\t','\n','\r')
last_revise <- html |>
rvest::html_elements(xpath = '//div[@id="PageHeader"]//h5') |>
set::grep_and('Last Revised') |>
rvest::html_text() |>
do::fmt(x = '#/ ') |>
do::Replace0('\t','\n','\r')
codebook <- html |>
rvest::html_elements(xpath = '//div[@id="Codebook"]//div[@class="pagebreak"]')|>
set::grep_and(c('dl','table'))
codebook
if (do::file.name(file) %in% c('p_imq.varLabel','imq_j.varLabel','alb_cr_g.varLabel')){
codebook <- html |>
rvest::html_elements(xpath = '//div[@id="Codebook"]//div[@class="pagebreak"]')
}
if (length(codebook)==0){
varLab_NULL(firs_publish,last_revise,file)
return(invisible('no codebook'))
}
df <- codebook |>
rvest::html_elements(xpath = 'dl') |>
lapply(dl) |>
do.call(what = plyr::rbind.fill)
df
if (nrow(df)==0){
varLab_NULL(firs_publish,last_revise,file)
return(invisible('no varLabel'))
}else{
df <- cbind(df,url)
suppressWarnings(write.table(firs_publish,file,row.names = FALSE,col.names = FALSE,quote = FALSE))
suppressWarnings(write.table(last_revise,file,row.names = FALSE,col.names = FALSE,append = TRUE,quote = FALSE))
suppressWarnings(write.table(df,file,append = TRUE,sep = '\t',row.names = FALSE))
invisible('ok')
}
}
}
varLab_NULL <- function(firs_publish,last_revise,file){
df <- data.frame(variable=1,label=2)
df <- df[-c(1:nrow(df)),]
suppressWarnings(write.table(firs_publish,file,row.names = FALSE,col.names = FALSE,quote = FALSE))
suppressWarnings(write.table(last_revise,file,row.names = FALSE,col.names = FALSE,append = TRUE,quote = FALSE))
suppressWarnings(write.table(x = df,file = file,sep = '\t',eol = '\n',row.names = FALSE,append = TRUE))
}
dl <- function(li){
title <- li |>
rvest::html_elements('dt') |>
rvest::html_text(TRUE) |>
do::Trim(':') |>
tolower() |>
do::Replace(' {1,}',' ')
title[title == "variable name"] <- 'variable'
title[title == "sas label"] <- 'label'
title[title == "english text"] <- 'description'
title[title == "english instructions"] <- 'instructions'
title
cont <- li |>
rvest::html_elements('dd')|>
rvest::html_text(TRUE) |>
do::Trim(':') |>
tolower() |>
do::Replace(' {1,}',' ') |>
do::Replace0('\r','\n','\t')
if (anyDuplicated(title)){
duptitle <- names(table(title))[table(title) >1]
for (i in duptitle) {
ck <- which(title==i)
dupcont <- paste0(cont[ck],collapse = ';\n')
title <- title[-ck[-1]]
cont <- cont[-ck[-1]]
cont[ck[1]] <- dupcont
}
}
matrix(cont,nrow = 1,dimnames = list(NULL,title)) |>
data.frame(check.names = FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.