## q## ############################################################
## ## ## Script needed to create the look_up table containing ##
## ## ## Species name and the corresponding URL on the BiolFlor ##
## ## Website ##
## ############################################################
## ## function called to retrieve name of genus/species and the
## ## corresponding url
## extract_biolflor<-function(url,replaced,replacement){
## ## download raw html
## pag<-htmlParse(getURL(url))
## ## for genus/specie the nodes of interest are those
## ## with class="contentilink", thus we extract only
## ## those from the html
## raw_source<-xpathApply(pag,"//*[@class='contentlink']")
## ## from the extracted tree we retrieve, for each element
## ## the name of the species/genus and its url, and store the
## ## result in a list
## list_extracted<-sapply(raw_source, function(el) {
## return(list(c(xmlValue(el),xmlGetAttr(el, "href"))))
## }
## )
## ## the list is converted to a data.frame
## df_extracted<-ldply(list_extracted)
## ## on the raw html only 'relative' URLs are present, thus
## ## they need to be made "absolute"
## df_extracted$V2<-gsub(replaced,replacement,df_extracted$V2)
## ## the resultin data.frame is returned
## return(df_extracted)
## }
## url_genus<-"http://www.ufz.de/biolflor/overview/gattung.jsp"
## replaced="^\\."
## replace_genus="http://www.ufz.de/biolflor/overview"
## replace_species="http://www.ufz.de/biolflor/"
## ## extract data.frame containing genera and their urls
## genera<-extract_biolflor(url_genus,replaced,replace_genus)
## ## an empty dataframe is created which will contain species names
## ## and their urls
## biolflor_lu<-data.frame(V1=character(0),V2=character(0))
## ## the dataframe is filled in with this loop
## ## DO NOT RUN (it takes time)
## for(i in genera$V2){
## ext<-extract_biolflor(i,replaced,replace_species)
## biolflor_lu<-rbind(biolflor_lu,ext)}
## biolflor_lu<-data.frame(biolflor_lu,name=(gsub("^((X [a-zA-Z]+ [a-zA-Z\\-]+)|([a-zA-Z]+ [a-zA-Z\\-]+)|([a-zA-Z]+ x [a-zA-Z\\-]+)) .*","\\1",biolflor_lu$V1,perl=F)))
## biolflor_lu$name<-as.character(biolflor_lu$name)
## ## REMEMBER TO REMOVE "annotations" COLUMN FROM ALL THE
## ## DATAFRAMES!!!!!!!!!
## ## OTHERWISE YOU'LL HAVE PROBLEMS WITH RCMD check
## save(biolflor_lu,file="../data/biolflor_lu.rda")
## ## From species names we now extract only Genus and Species
## ## (It's easier this way to work with TNRS)
## biolflor_check<-tnrs(biolflor_lu$name,source="iPlant_TNRS",verbose=FALSE)
## save(biolflor_check,file = "data/biolflor_check.rda")
## biolflor_lookup<-merge(biolflor_check[,1:6],biolflor_lu,by.x="submittedname",by.y="name")
## save(biolflor_lookup,file="data/biolflor_lookup.rda")
## ###
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.