Nothing
parse_taxa <- function(taxa){
parse_taxon <- function(taxon){
replace_space <- function(x) {
gsub("[[:space:]]+", " ", gsub("^[[:space:]]+|[[:space:]]+$", "", x))
}
if(length(taxon) > 1){
stop("Only one taxon allowed")
}
GENUS <- ""
SPECIES <- ""
AUTHOR_OF_SPECIES <- ""
INFRASPECIFIC_RANK <- ""
INFRASPECIFIC_EPITHET <- ""
AUTHOR_OF_INFRASPECIFIC_RANK <- ""
taxon <- gsub(" +", " ", replace_space(taxon))
### Parse Genus
gap1 <- regexpr(pattern = " ", text = taxon)
GENUS <- replace_space(substr(taxon, start = 1, stop = (gap1-1)))
### The rest part
part1 <- replace_space(substr(taxon, start = gap1 + 1, stop = nchar(taxon)))
gap2 <- regexpr(pattern = " ", text = part1)
### Parse Species
if(gap2 < 0){
SPECIES <- replace_space(substr(part1, start = gap2 + 1, stop = nchar(part1)))
author_temp <- ""
} else {
SPECIES <- replace_space(substr(part1, start = 1, stop = (gap2 - 1)))
author_temp <- replace_space(substr(part1, start = gap2 + 1, stop = nchar(part1)))
}
AUTHOR_OF_SPECIES <- author_temp
gap3 <- regexpr(pattern = " ", text = author_temp)
if(grepl("var\\. ", taxon)|grepl("subsp\\. ", taxon)|grepl(" f\\. ", taxon)){
if(grepl("var\\. ", taxon)){
INFRASPECIFIC_RANK <- "var."
gap_var <- regexpr(pattern = "var\\. ", text = author_temp) + nchar("var.")
AUTHOR_OF_SPECIES <- replace_space(substr(author_temp, start = 1, stop = gap_var - nchar("var.") -1))
part_INFRASP_EP_AUTHOR_OF_INFRASP <- replace_space(substr(author_temp, start = gap_var + 1, stop = nchar(author_temp)))
} else {
if(grepl("subsp\\. ", taxon)){
INFRASPECIFIC_RANK <- "subsp."
gap_subsp <- regexpr(pattern = "subsp\\. ", text = author_temp) + nchar("subsp.")
AUTHOR_OF_SPECIES <- replace_space(substr(author_temp, start = 1, stop = gap_subsp - nchar("subsp.") -1))
part_INFRASP_EP_AUTHOR_OF_INFRASP <- replace_space(substr(author_temp, start = gap_subsp + 1, stop = nchar(author_temp)))
} else {
if(substr(author_temp, start = 1, stop = nchar("f.")) == "f."){
INFRASPECIFIC_RANK <- "f."
gap_f <- regexpr(pattern = "f\\. ", text = author_temp) + nchar("f.")
position_f <- regexpr(pattern = "f\\.", text = taxon)[[1]][1] ### the first f. is the forma
position_white_space <- regexpr(pattern = "", text = taxon)[[1]]
location_species_end <- position_white_space[2]
AUTHOR_OF_SPECIES_temp <- replace_space(substr(taxon, start = location_species_end, stop = position_f - 1))
AUTHOR_OF_SPECIES <- ifelse(is.na(AUTHOR_OF_SPECIES_temp), "", AUTHOR_OF_SPECIES_temp)
part_INFRASP_EP_AUTHOR_OF_INFRASP <- replace_space(substr(author_temp, start = gap_f + 1, stop = nchar(author_temp)))
} else {
INFRASPECIFIC_RANK <- ""
part_INFRASP_EP_AUTHOR_OF_INFRASP <- ""
AUTHOR_OF_SPECIES <- replace_space(substr(author_temp, start = 1, stop = nchar(author_temp)))
}
}
}
} else {
part_INFRASP_EP_AUTHOR_OF_INFRASP <- ""
}
##part_INFRASP_EP_AUTHOR_OF_INFRASP <- replace_space(substr(author_temp, start = gap_var_or_subsp + 1, stop = nchar(author_temp)))
gap4 <- regexpr(pattern = " ", text = part_INFRASP_EP_AUTHOR_OF_INFRASP)
if(gap4 > 0){
INFRASPECIFIC_EPITHET <- replace_space(substr(part_INFRASP_EP_AUTHOR_OF_INFRASP, start = 1, stop = gap4 - 1 ))
AUTHOR_OF_INFRASPECIFIC_RANK <- replace_space(substr(part_INFRASP_EP_AUTHOR_OF_INFRASP, start = gap4 + 1, stop = nchar(part_INFRASP_EP_AUTHOR_OF_INFRASP)))
} else {
INFRASPECIFIC_EPITHET <- replace_space(substr(part_INFRASP_EP_AUTHOR_OF_INFRASP, start = 1, stop = nchar(part_INFRASP_EP_AUTHOR_OF_INFRASP)))
if(INFRASPECIFIC_EPITHET %in% strsplit(AUTHOR_OF_SPECIES, " ")[[1]]){
INFRASPECIFIC_EPITHET <- ""
}
}
if(!grepl(" ", taxon)){
GENUS = taxon
SPECIES <- ""
AUTHOR_OF_SPECIES <- ""
INFRASPECIFIC_RANK <- ""
INFRASPECIFIC_EPITHET <- ""
AUTHOR_OF_INFRASPECIFIC_RANK <- ""
}
res <- c(taxon ,
GENUS ,
SPECIES ,
AUTHOR_OF_SPECIES ,
INFRASPECIFIC_RANK ,
INFRASPECIFIC_EPITHET ,
AUTHOR_OF_INFRASPECIFIC_RANK )
names(res) <- c("TAXON_PARSED" ,
"GENUS_PARSED" ,
"SPECIES_PARSED" ,
"AUTHOR_OF_SPECIES_PARSED" ,
"INFRASPECIFIC_RANK_PARSED" ,
"INFRASPECIFIC_EPITHET_PARSED" ,
"AUTHOR_OF_INFRASPECIFIC_RANK_PARSED" )
return(res)
}
res <- data.frame(t(sapply(taxa, parse_taxon, USE.NAMES = FALSE)), stringsAsFactors = FALSE)
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.