Nothing
#' @title Checking species names for misspelling and synonyms
#' @description The function cross-references two vectors of species names
#' checking for possible synonyms, misspelled names, and genus-species or
#' species-subspecies correspondence.
#' @usage namesCompare(vec1,vec2,proportion=0.15)
#' @param vec1,vec2 a vector of species names. Genus names only are also
#' allowed. Generic name and specific epithet must be separated by '_'. Note
#' that \code{vec2} is used as the reference. Incomplete or suspicious names
#' are better placed in \code{vec1} (see example below).
#' @param proportion the maximum proportion of different characters between any
#' \code{vec1-vec2} names pair to consider it a possible misspelling.
#' @importFrom utils adist
#' @export
#' @return The function returns a \code{list} including:
#' @return \strong{$genus} if \code{vec1} includes genera names which miss
#' specific epithet, this object lists all the species in \code{vec2}
#' belonging to each of the genera.
#' @return \strong{$subspecies} if \code{vec1} includes subspecies (i.e. two
#' epithets after genus name), this object lists species in \code{vec2}
#' possibly corresponding to each of the subspecies.
#' @return \strong{$epithet} lists species with matching epithets as possible
#' synonyms.
#' @return \strong{$misspelling} lists possible misspelled names. For each
#' proposed mismatched names pair the proportion of characters in the
#' \code{vec1} differing from the string in \code{vec2} is returned.
#' @author Silvia Castiglione, Carmela Serio, Antonella Esposito
#' @examples
#' \dontrun{
#' names(DataFelids$statefel)->nams
#' nams[c(19,12,37,80,43)]<-c("Puma_yagouaroundi","Felis_manul","Catopuma",
#' "Pseudaelurus","Panthera_zdansky")
#' nams<-nams[-81]
#'
#' namesCompare(nams,names(DataFelids$statefel))
#' namesCompare(names(DataFelids$statefel),nams)
#' }
namesCompare<-function(vec1,vec2,proportion=0.15){
if(!is.null(ncol(vec1))){
if(ncol(vec1)>1) warning(paste("vec1:",ncol(vec1),"columns supplied, only the first one will be used"))
vec1[,1]->vec1
}
if(!is.null(ncol(vec2))){
if(ncol(vec2)>1) warning(paste("vec2:",ncol(vec2),"columns supplied, only the first one will be used"))
vec2[,1]->vec2
}
if(all(vec1%in%vec2)) stop("vec1 and vec2 species perfectly match")
vec1[which(!vec1%in%vec2)]->vec1
vec2[which(!vec2%in%vec1)]->vec2
### GENERA CHECK ###
if(any(!grepl("_",vec1))){
do.call(rbind,lapply(vec1[!grepl("_",vec1)],function(x)
data.frame(vec1=rep(x,length(grep(x,vec2))),vec2=vec2[grep(x,vec2)])))->genera
if(nrow(genera)<1) genera<-NULL
}else genera<-NULL
vec1[which(!vec1%in%genera[,1])]->vec1
### SUBSPECIES CHECK ###
if(any(sapply(strsplit(vec1,"_"),length)>2)){
vec1[which(sapply(strsplit(vec1,"_"),length)>2)]->suspv1
strsplit(vec1,"_")[which(sapply(strsplit(vec1,"_"),length)>2)]->suspv1_split
do.call(rbind,mapply(j=suspv1_split,w=suspv1,function(j,w){
combn(j,2,function(k) paste(k,collapse="_"))[1:2]->check
if(any(check%in%vec2)) data.frame(vec1=rep(w,length(which(vec2%in%check))),vec2=vec2[which(vec2%in%check)]) else{
adcheck2 <- adist(check,vec2)/as.integer(nchar(check))
adcheck2 <- data.frame(vec1=check,vec2=vec2[apply(adcheck2, 1, which.min)],proportion=apply(adcheck2, 1, min))
if(any(adcheck2$proportion<=proportion)){
data.frame(vec1=rep(w,length(which(adcheck2$proportion<=proportion))),
vec2=adcheck2[which(adcheck2$proportion<=proportion),2])
} else NULL
}
},SIMPLIFY = FALSE))->subspecies
vec1[which(!vec1%in%subspecies[,1])]->vec1
} else subspecies<-NULL
### MISSPELLING CHECK ###
ad1 <- adist(vec1,vec2)/nchar(vec1)
ad1.dataframe <- data.frame(vec1,vec2=vec2[apply(ad1, 1, which.min)],proportion=apply(ad1, 1, min))
ad1.dataframe[which(ad1.dataframe$proportion<=proportion),]->ad1.dataframe
misspelling <- ad1.dataframe[order(ad1.dataframe[,3]),]
if(nrow(misspelling)<1) misspelling<-NULL
### epithet CHECK ###
vec1[which(sapply(strsplit(vec1,"_"),length)>=2)]->vec1
vec2[which(sapply(strsplit(vec2,"_"),length)>=2)]->vec2
do.call(rbind,lapply(1:length(vec2),function(k){
strsplit(vec2,"_")[[k]]->ep2
if(length(ep2)>2) {
if(any(duplicated(ep2))) data.frame(ind=k, sp=ep2[2]) else data.frame(ind=rep(k,2), sp=ep2[2:3])
} else data.frame(ind=k, sp=ep2[2])
}))->episp2
do.call(rbind,lapply(1:length(strsplit(vec1,"_")),function(k){
if(length(strsplit(vec1,"_")[[k]])>2) {
strsplit(vec1,"_")[[k]][2:3]->ep
if(any(duplicated(ep))) ep[1]->ep
} else strsplit(vec1,"_")[[k]][2]->ep
if(any(episp2[,2]%in%ep)) data.frame(vec1=rep(vec1[k],length(which(episp2[,2]%in%ep))),
vec2=vec2[episp2[which(episp2[,2]%in%ep),1]]) else {
adepi <- adist(ep,episp2[,2])/nchar(ep)
adepi<-data.frame(ep1=ep,ep2=vec2[episp2[apply(adepi, 1, which.min),1]],proportion=apply(adepi, 1, min))
if(any(adepi$proportion<=proportion)){
data.frame(vec1=rep(vec1[k],length(which(adepi$proportion<=proportion))),
vec2=adepi[which(adepi$proportion<=proportion),2])
} else NULL
}
}))->epithet
return(list(genus=genera,subspecies=subspecies,epithet=epithet,misspelling=misspelling))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.