R/gnr_df.r
In algaeClassify: Tools to Query the 'Algaebase' Online Database, Standardize Phytoplankton Taxonomic Data, and Perform Functional Group Classifications

Documented in gnr_df

#' Wrapper function to apply gnr_simple across a data.frame or list of species
#' names
#'
#' Provides convienent output with a row per name. To streamline merging with
#' original data.
#'
#' @param df data.frame containing names to check
#' @param name.column integer or character string with column name containing
#' 		  species names
#' @param sourceid integer vector with data source ids.
#' see https://resolver.globalnames.org/sources/
#' @param best_match boolean. Should the best match be returned based on score?
#' @param fuzzy_uninomial boolean. Use fuzzy matching for uninomial names?
#' @param canonical If TRUE, names do not include authorship or date
#' @param with_context If TRUE, Match scores are weighted for taxonomic
#' consistency
#' @param higher boolean: Return higher taxonomic classifications?
#'
#' @export gnr_df
#'
#' @return new data.frame original names (input_name), 1/0 flag for an exact
#' match,the best match (match_name, and other output from gnr_simple().
#' 			Will contain a row of NAs if no matches were found for a name.
#'
#' @examples
#' data(lakegeneva)
#' #example dataset with 50 rows
#'
#' lakegeneva<- genus_species_extract(lakegeneva,'phyto_name')
#' lakegeneva$genus_species <- trimws(paste(lakegeneva$genus,
#'	lakegeneva$species))
#'
#' #checking for matches from all GNRS sources, first 5 rows:
#' lakegeneva.namematches <- gnr_df(lakegeneva,"genus_species")
#' lakegeneva.namematches


gnr_df<-function(df,name.column,sourceid=NULL,best_match=TRUE,
                 fuzzy_uninomial=TRUE, canonical=TRUE,with_context=TRUE,higher=FALSE)
{
  #base API string
  gnrs.string<-"https://verifier.globalnames.org/api/v1/verifications/"

  name<-df[,name.column]

  gnrs.name<-gsub(' ','+',name)
  if(length(gnrs.name)>1){
    gnrs.name<-paste(gnrs.name,collapse='%7C')
  }
  gnrs.string<-paste0(gnrs.string,gnrs.name,'?')
  if(best_match){
    gnrs.string<-paste0(gnrs.string,"best_match_only=",best_match,"%26")
  }
  if(!is.null(sourceid)){
    data_sources<-paste(sourceid,collapse='%7C')
    gnrs.string<-paste0(gnrs.string,"data_source_ids=",data_sources,"%26")
  }
  if(!fuzzy_uninomial){
    gnrs.string<-paste0(gnrs.string,"fuzzy_uninomial=",fuzzy_uninomial,"%26")
  }
  gnrs.string<-paste0(gnrs.string,"species_group=true")
  con<-curl::curl(gnrs.string)

  results<-jsonlite::prettify(try(readLines(con,warn=FALSE),silent=TRUE))
  if(is(results,"try-error"))
  {
    close(con)
    stop("No matches") #throw error
  }

  close(con)

  #transform to r list of lists
  result.list<-jsonlite::fromJSON(results)

  results.output.list<-result.list$names$bestResult

  #results dataframe
  output<-data.frame(input_name=NULL,
                     match_name=NULL,
                     match_score=NULL,
                     match_type=NULL,
                     data_source=NULL,
                     data_import_date=NULL)
  if(higher){
    output<-cbind(data.frame(Kingdom=NULL,Phylum=NULL,Class=NULL,Order=NULL,Family=NULL),
               output)
  }
  for(i in 1:nrow(results.output.list)){
    if(is.null(results.output.list[i,])){
      tmp<-data.frame(input_name=name[i],
                         match_name=NA,
                         match_score=NA,
                         match_type=NA,
                         data_source=NA,
                         data_import_date=NA
      )
      if(higher){
        tmp<-cbind(data.frame(Kingdom=NA,Phylum=NA,Class=NA,Order=NA,Family=NA),
                   tmp)
      }
      output<-rbind(output,tmp)
    }else{
      results.output<-result.list$names$bestResult[i,]
      match.name<-if(canonical){results.output$currentCanonicalFull}else{
        results.output$currentName}

    tmp<-data.frame(input_name=name[i],
                    match_name=match.name,
                    match_score=results.output$sortScore,
                    match_type=result.list$names$matchType[i],
                    data_source=results.output$dataSourceTitleShort,
                    data_import_date=results.output$entryDate
    )

    if(higher){
      n<-nrow(results.output)
      higher.df<-data.frame(Kingdom=rep(NA,n),Phylum=rep(NA,n),Class=rep(NA,n),
                            Order=rep(NA,n),Family=rep(NA,n))
      higher.groups.keep<-tolower(names(higher.df))
      higher.taxonomy<-results.output$classificationPath
      higher.taxonomy<-sapply(higher.taxonomy,strsplit,split='|',fixed=TRUE)
      higher.classes<-results.output$classificationRanks
      higher.classes<-sapply(higher.classes,strsplit,split='|',fixed=TRUE)
      for(i in 1:n){
        higher.df[i,]<-higher.taxonomy[[i]][match(higher.groups.keep,
                                                  tolower(higher.classes[[i]]))]
        }
        tmp<-cbind(higher.df,tmp)
      }
    output<-rbind(output,tmp)

    }


  }

  return(output)
}

Any scripts or data that you put into this service are public.

algaeClassify documentation built on June 8, 2025, 1:30 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

algaeClassify
Tools to Query the 'Algaebase' Online Database, Standardize Phytoplankton Taxonomic Data, and Perform Functional Group Classifications

R/gnr_df.r
In algaeClassify: Tools to Query the 'Algaebase' Online Database, Standardize Phytoplankton Taxonomic Data, and Perform Functional Group Classifications

Defines functions gnr_df

Documented in gnr_df

Try the algaeClassify package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

algaeClassify Tools to Query the 'Algaebase' Online Database, Standardize Phytoplankton Taxonomic Data, and Perform Functional Group Classifications

R/gnr_df.r In algaeClassify: Tools to Query the 'Algaebase' Online Database, Standardize Phytoplankton Taxonomic Data, and Perform Functional Group Classifications

Defines functions gnr_df

Documented in gnr_df

Try the algaeClassify package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

algaeClassify
Tools to Query the 'Algaebase' Online Database, Standardize Phytoplankton Taxonomic Data, and Perform Functional Group Classifications

R/gnr_df.r
In algaeClassify: Tools to Query the 'Algaebase' Online Database, Standardize Phytoplankton Taxonomic Data, and Perform Functional Group Classifications