# Copyright 2011-2024 Meik Michalke <meik.michalke@hhu.de>
#
# This file is part of the R package roxyPackage.
#
# roxyPackage is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# roxyPackage is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with roxyPackage. If not, see <http://www.gnu.org/licenses/>.
#' Generate RSS feeds from R NEWS files
#'
#' This function should take either HTML or Rd files and return a valid RSS 2.0 XML file.
#'
#' @param news Character string, path to the R NEWS file to be converted.
#' @param rss Character string, path to the RSS.xml file to be written. If \code{NULL}, results are written to stdout().
#' @param html Logical, whether \code{news} is in HTML or Rd format. If \code{NULL}, guess this from the file ending.
#' @param encoding Character string, how the feed is encoded.
#' @param channel A named character vector with information on this RSS feed:
#' \describe{
#' \item{\code{title}:}{Title of the feed, probably the package name.}
#' \item{\code{link}:}{URL to the package web page, e.g. its repository site.}
#' \item{\code{description}:}{Descriptions of the feed, e.g. the package.}
#' \item{\code{language}:}{Optional, a valid RSS language code, see \code{http://www.rssboard.org/rss-language-codes}.}
#' \item{\code{atom}:}{Optional, full URL to the RSS feed on the web, used for \code{atom:link rel="self"}.}
#' }
#' @return No return value, writes a file or to stdout()
#' @importFrom tools Rd2HTML
#' @importFrom XiMpLe parseXMLTree node XMLNode XMLTree
#' @export
#' @examples
#' \dontrun{
#' channel.info <- c(
#' title="roxyPackage",
#' link="http://R.reaktanz.de/pckg/roxyPackage",
#' description="Utilities to Automate Package Builds",
#' atom="http://R.reaktanz.de/pckg/roxyPackage/rss.xml")
#' rss.tree <- news2rss("~/R/roxyPackage/NEWS.Rd",
#' channel=channel.info)
#' }
news2rss <- function(
news,
rss=NULL,
html=NULL,
encoding="UTF-8",
channel=c(
title="",
link="",
description="",
language="",
atom=""
)
){
# check if all necessary information is present
for (need.info in c("title", "link", "description")){
if(!need.info %in% names(channel)){
missing.info <- TRUE
} else if(identical(channel[[need.info]], "")){
missing.info <- TRUE
} else {
missing.info <- FALSE
}
if(isTRUE(missing.info)){
stop(simpleError(paste0("news: missing info on RSS channel: '", need.info, "', no RSS feed generated!")))
} else {}
}
if(file_test("-f", news)){
## try to see if this is an Rd or HTML file
if(is.null(html)){
if(isTRUE(grepl("html$", news, ignore.case=TRUE))){
html <- TRUE
} else if(isTRUE(grepl("rd$", news, ignore.case=TRUE))){
html <- FALSE
# package <- gsub(".*\\\\name\\{([^\\}]+)\\}.*", "\\1", Rd.raw, perl=TRUE)
} else {
stop(simpleError("news: unknown file type, consider setting 'html' accordingly!"))
}
} else {}
## html should now be either TRUE or FALSE
if(isTRUE(html)){
html.raw <- paste(readLines(news), collapse=" ")
} else {
html.raw <- paste(capture.output(tools::Rd2HTML(Rd=news)), collapse=" ")
}
## some ugly hacks
# at least until R 2.14 <ul> and <ol> blocks missed a closing </li>, so
# XiMpLe::parseXMLTree() comes to really strange results
# we'll try to fix this, before we parse the object
html.tags <- XML.single.tags(html.raw)
tag.li.open <- tolower(html.tags) %in% "<li>"
tag.li.close <- tolower(html.tags) %in% "</li>"
if(sum(tag.li.open) > sum(tag.li.close)){
tag.ul.close <- tolower(html.tags) %in% "</ul>"
tag.ol.close <- tolower(html.tags) %in% "</ol>"
if(sum(tag.li.open) == sum(tag.li.close) + sum(tag.ul.close) + sum(tag.ol.close)){
# this should do it:
html.tags[tag.ul.close] <- "</li></ul>"
html.tags[tag.ol.close] <- "</li></ol>"
} else {
# hm, seems there's more wrong with this file, better stop here
stop(simpleError("news: the NEWS.html file looks really strange, missing </li>s, please check!"))
}
}
# <meta> and <link> probably are also not in XML format
for (this.tag in c("link", "meta")){
html.tags <- gsub(
paste0("^<", this.tag, " ([^>]+)([^/])>"),
paste0("<", this.tag, " \\1\\2 />"),
html.tags,
ignore.case=TRUE, perl=TRUE)
}
## get HTML tree
# if this works it should be relatively easy to make an RSS XML object from this object
html.tree <- parseXMLTree(html.tags, object=TRUE)
# News for Package '*': XiMpLe::node(html.tree, node=list("html","body","div","h2"))
# Changes in * version *: XiMpLe::node(html.tree, node=list("html","body","div","h3"))
#
# everything between two <h3>s is news for one release
# we only need the body child nodes to search for news
# this node() call returns a named list
html.body <- node(html.tree, node=list("html","body","div"), what="children")
# check if entries are inside a <main> tag
if(!is.null(XMLScan(html.body, "main"))){
html.body <- node(html.body, node=list("main"), what="children")
} else {}
# now go through the child nodes and return everything from one <h3> to another
news.start <- which(names(html.body) %in% "h3")
# how many news nodes are there?
num.all.news <- length(news.start)
# append the number of all nodes plus one as last "news.start" for the loop
news.start <- c(news.start, length(html.body)+1)
xml.channel.items <- unlist(sapply(1:num.all.news, function(num.this.news){
this.news.start <- news.start[num.this.news]
# news end the node before the next news start
this.news.end <- news.start[num.this.news+1]-1
this.news.range <- html.body[(this.news.start+1):this.news.end]
this.news.title.raw <- slot(slot(html.body[[this.news.start]], "children")[[1]], "value")
# try to generate a unique, unchanging guid -- we take version string
this.news.guid <- guid_(
gsub("[[:space:]]*", "", gsub(".*Changes in (.*) version (.*)", "\\1\\2", this.news.title.raw, ignore.case=TRUE, perl=TRUE)),
isPermaLink="false"
)
# scan title for release dates
this.news.hasDate <- grepl(".*(\\()?([[:alnum:]]{4}-[[:alnum:]]{2}-[[:alnum:]]{2})(\\))?.*", this.news.title.raw, perl=TRUE)
if(isTRUE(this.news.hasDate)){
this.news.date.raw <- gsub(".*(\\()?([[:alnum:]]{4}-[[:alnum:]]{2}-[[:alnum:]]{2})(\\))?.*", "\\2", this.news.title.raw, perl=TRUE)
this.news.date <- pubDate_(dateRFC2822(this.news.date.raw))
# remove date from the title
this.news.title.raw <- gsub("[[:space:]]+(\\()?([[:alnum:]]{4}-[[:alnum:]]{2}-[[:alnum:]]{2})(\\))?", "", this.news.title.raw, perl=TRUE)
} else {
this.news.date <- NULL
}
this.news.title <- title_(this.news.title.raw)
this.news.link <- link_(channel[["link"]])
# place description in CDATA to be able to use HTML formatting
this.news.range.CDATA <- CDATA_(.children=this.news.range)
this.news.desc <- description_(this.news.range.CDATA)
this.news.node <- item_(this.news.title, this.news.link, this.news.date, this.news.guid, this.news.desc)
}))
xml.channel.title <- title_(channel[["title"]])
xml.channel.link <- link_(channel[["link"]])
roxyPackage.version <- read.dcf(file.path(find.package("roxyPackage"), "DESCRIPTION"), fields="Version")
xml.channel.generator <- generator_(paste0("roxyPackage (", roxyPackage.version, ")"))
xml.channel.desc <- description_(CDATA_(channel[["description"]]))
xml.channel.lang <- xml.channel.atom <- NULL
if("language" %in% names(channel)){
if(!identical(channel[["language"]], "")){
xml.channel.lang <- language_(channel[["language"]])
} else {}
} else {}
rss.node.attrs <- list(version="2.0")
if("atom" %in% names(channel)){
if(!identical(channel[["atom"]], "")){
xml.channel.atom <- atomlink_(href=channel[["atom"]], rel="self", type="application/rss+xml")
rss.node.attrs <- append(rss.node.attrs, list("xmlns:atom"="http://www.w3.org/2005/Atom"))
} else {}
} else {}
xml.channel <- rss_(
channel_(
attrs=rss.node.attrs,
.children=append(
list(
xml.channel.title,
xml.channel.link,
xml.channel.atom,
xml.channel.desc,
xml.channel.generator,
xml.channel.lang),
xml.channel.items)
)
)
rss.tree <- XMLTree(xml.channel, xml=list(version="1.0", encoding=encoding))
# uncomment for debugging:
# rss.tree <- xml.channel.items
if(!is.null(rss)){
cat(pasteXML(rss.tree), file=rss)
message(paste0("news: updated RSS feed ", rss))
return(invisible(NULL))
} else {
return(rss.tree)
}
} else {
warning(paste0("news: ", news," does not exist, no RSS file created!"), call.=FALSE)
}
return(invisible(NULL))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.