#' @import dplyr
#' @importFrom stringr str_detect
#' @importFrom urltools domain
chk_list <- function(name_){
name <- NULL
tar_name %>%
dplyr::pull(name) %>%
stringr::str_detect(name_) %>%
any() %>%
return()
}
#' @importFrom urltools domain
#' @import stringr
#' @export
url_to_name <- function(target_url) {
target_url %>%
urltools::domain() %>%
stringr::str_remove("www.?\\.") %>%
stringr::str_replace_all("[^a-zA-Z0-9]", "_")
}
#' @import dplyr
support_info <- function(config) {
config %>%
.[4:9] %>%
dplyr::as_tibble() %>%
dplyr::mutate_all(as.character) %>%
list(content = .,
error = all(is.na(.))) %>%
return()
}
encoding_info <- function(config){
config$encoding
}
#' @import dplyr
#' @import tidyr
content_for_use <- function(x) {
dplyr::mutate(x, rowid = c("node", "attr")) %>%
tidyr::gather(where, value, -rowid) %>%
tidyr::spread(rowid, value) %>%
dplyr::select(where, node, attr) %>%
dplyr::filter(!is.na(node))
}
#' @importFrom stringr str_detect
is_url <- function(target_url){
stringr::str_detect(
target_url,
"^(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*\\.[a-z]{2,5}(:[0-9]{1,5})?(\\/.*)?$"
)
}
#' @importFrom yaml read_yaml
yml_template <- function(){
system.file("", "template.yml",
package = "newspaper") %>%
yaml::read_yaml()
}
#' yml start
#'
#' start
#'
#' @param target_url urls
#' @param open open options
#'
#' @importFrom stringr str_c
#' @importFrom urltools scheme domain
#' @importFrom httr GET
#' @importFrom rstudioapi navigateToFile
#' @importFrom utils browseURL
#'
#' @export
yml_start <- function(target_url, open = T){
yml_template() -> temp
url_to_name(target_url) -> name
filename <- str_c("./inst/yaml/" ,name, ".yml")
stringr::str_c(
urltools::scheme(target_url),
"://",
urltools::domain(target_url)
) -> site
temp$name <- name
temp$site <- site
temp$encoding <- get_encoding(target_url)
temp$body$attr <- "pass"
yaml::write_yaml(x = temp,
file = filename,
fileEncoding = "UTF-8")
if (open & interactive()) {
rstudioapi::navigateToFile(file = filename)
utils::browseURL(target_url)
}
print(name)
}
#' @export
get_config <- function(name){
system.file("yaml",
stringr::str_c(name, ".yml"),
package = "newspaper") %>%
purrr::when(
. == "" ~ stop("There's no config file for this site yet."),
~ .
) %>%
yaml::read_yaml()
}
#' @import httr
#' @importFrom purrr when
get_encoding <- function(target_url) {
target_url %>% httr::GET() %>%
httr::content("raw") %>%
rawToChar() %>%
stringr::str_remove_all("[^a-zA-Z0-9]") %>%
tolower() %>%
stringr::str_extract_all("utf8|euckr") %>%
.[[1]] %>% unique() %>%
purrr::when(
identical(., character(0)) ~ NA,
length(.) != 1 ~ NA,
"utf8" == . ~ "UTF-8",
"euckr" == . ~ "EUC-KR"
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.