read_text<-function(x,sour){
if(grepl(".pdf",x)){
text<-suppressMessages(pdftools::pdf_text(x)) #%>%
if(sum(nchar(text))==0){
#return NA if pdf contains no text (e.g. no OCR)
return(NA)
}else{
text %>% readr::read_lines()#%>%
#paste(collapse = " ")
}
}else if(sour=="boj"){
suppressMessages(xml2::read_html(x,encoding = "utf8"))%>%
rvest::html_nodes(xpath = '//*[@id="contents-skip"]') %>%
rvest::html_text()%>%
readr::read_lines()#%>%
#paste(collapse = " ")
}else {
clean_html(x)%>%
rvest::html_nodes('font:nth-child(1),font:nth-child(3),i,p,h1,h2,h3') %>%
rvest::html_text()%>%
stringr::str_replace_all(.,"\u0092","'") %>%
stringr::str_remove_all(.,'\"') %>%
stringr::str_remove_all(.,"\t") #%>%
#paste(collapse = " ")
}
}
clean_html<-function(x){
if(stringr::str_detect(x,"#")){
x1<-suppressMessages(xml2::read_html(x))
}else{
x1<-suppressMessages(xml2::read_html(url(x)))
}
#fedheading
heading<-x1%>% rvest::html_nodes(".jumbotron.hidden-xs")
xml2::xml_remove(heading)
#fed sidebar
navbar<-x1 %>% rvest::html_nodes(".nav__header")
xml2::xml_remove(navbar)
#ecb footer
footer_ecb<-x1 %>% rvest::html_nodes(".ecb-footerBottom")
xml2::xml_remove(footer_ecb)
adress_ecb<-x1 %>% rvest::html_nodes(".address-box.-top-arrow")
xml2::xml_remove(adress_ecb)
cookies_ecb<-x1 %>% rvest::html_nodes(".ecb-cookieConsent.hidden")
xml2::xml_remove(cookies_ecb)
out<-x1
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.