#' Parse an http header
#'
#' @param header Header text
#' @return Named character vector
#' @author jefferis
#' @export
parseurlheader<-function(header){
if(regexpr("OK",header[1])<0) return(NULL)
tt=header[grep(":",header)]
tt=sub("\\r","",tt)
names=sub("^([^:]+): .*","\\1",tt)
values=sub("^[^:]+: (.*)","\\1",tt)
l=list()
l[names]=values
l
}
#' Extract the links from text of a web page
#'
#' @details The baseurl is normally just the original url (although a different
#' url is sometime explicitly specified in the html body).
#' @details absolute depends on getRelativeUrl from the \code{XML} package.
#' @param body Raw text of web page
#' @param linktype class of link to find (e.g. href,src)
#' @param regex Regular expression to filter links
#' @param fixed Whether regular expression is fixed
#' @param rooturl Base url for expansion of relative links
#' @param absolute Whether to convert relative urls to absolute
#' @param USE.NAMES Return relative links as names when absolute = FALSE
#' @return character verctor of urls
#' @author jefferis
#' @export
#' @seealso \code{\link{grep},\link{getHTMLLinks}}
#' @importFrom XML getRelativeURL parseURI
extract_links<-function(body,linktype="href",regex=NULL,fixed=FALSE,
rooturl=attr(body,'url'),absolute=TRUE,USE.NAMES=FALSE){
t2=body[grep(linktype,body,fixed=fixed)]
t3=unlist(strsplit(t2,"><")) # split lines with multiple html fields
t4=t3[grep(linktype,t3,fixed=fixed)] # just keep the ones that still match linktype
links=sub(paste(".*",linktype,"=\"([^\"]+).*",sep=""),"\\1",t4)
if(!is.null(regex)) links=links[grep(regex,links,fixed=fixed)]
if(absolute && !is.null(rooturl)) {
if(parseURI(rooturl)$path=="") rooturl=paste(rooturl,'/',sep='')
links=getRelativeURL(links,rooturl)
if(!USE.NAMES) names(links)<-NULL
}
links
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.