R/auto_name_choice.R

Defines functions auto_name_choice

## Requirement: 'tibble' + 'stringr'

# Info: Automatically detect and solve conflicts between the taxa names given by the different databases (function complete_taxonomy).
# Info: It takes the most common name of the conflict and changes the name of all other synonyms or conflicts.

auto_name_choice = function(data){
  
  data = data.frame(data)
  
  for(i in 1:ncol(data)){
    
    if(length(unique(data[grepl(" or ", data[, i]),][, i])) != 0){
      
      conflicts = unique(data[grepl(" or ", data[, i]),][, i])
      
      separated_conflicts = strsplit(conflicts, " or ")
      
      for(j in 1:length(separated_conflicts)){
        
        names = stringr::word(separated_conflicts[[j]], 1)
        
        for(k in 1:length(names)){
          
          choice = stringr::word(names(table(data[grepl(names[[k]], data[, i]),][, i]))[[1]], 1)
          
        }
        
        position = which(data[, i] %in% c(conflicts[[j]], names))
        
        data[, i] = replace(data[, i], position, choice)
        
      }
      
    }
    
  }
  
  tibble::tibble(data)
  
}
Eliot-RUIZ/eDNAevaluation documentation built on Dec. 17, 2021, 6:25 p.m.