library(tidyverse)
library(rscopus)
library(wosr)
library(rbace)
library(data.table)
library(httr)
library(XML)
library(jsonlite)
library(taxize)
library(gtools)
library(ggpubr)

remotes::install_github("jessicatytam/specieshindex", build_vignettes = TRUE, dependencies = TRUE)
library(specieshindex)
create_query_string_T_scopus <- function(genus,
                                         species = NULL,
                                         synonyms,
                                         additionalkeywords){
  if (missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('TITLE("', genus, ' ', species = paste0(species), '")'))
  } 
  if (!missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('TITLE("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')'))
  }
  if (missing(additionalkeywords) & !missing(synonyms)) {
    temp_string <- paste0('TITLE("', genus, ' ', species = paste0(species), '" OR "', synonyms[1], '"')
    if (length(synonyms)==1) {
      return(paste0(temp_string, ')'))
    }
    else {
      for (i in 2:length(synonyms)){
        temp_string <- paste0(temp_string, ' OR "', synonyms[i], '"')
      }
      return(paste0(temp_string, ')'))
    }
  }
  if (!missing(additionalkeywords) & !missing(synonyms)) {
    return(paste0('TITLE(("', genus, ' ', species = paste0(species), '" OR "',
                  synonyms, '") AND ', additionalkeywords, ')'))
  } 
}
create_query_string_TAK_scopus <- function(genus,
                                           species = NULL,
                                           synonyms,
                                           additionalkeywords){
  if (missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('TITLE-ABS-KEY("', genus, ' ', species = paste0(species), '")'))
  } 
  if (!missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('TITLE-ABS-KEY("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')'))
  }
  if (missing(additionalkeywords) & !missing(synonyms)) {
    temp_string <- paste0('TITLE-ABS-KEY("', genus, ' ', species = paste0(species), '" OR "', synonyms[1], '"')
    if (length(synonyms)==1) {
      return(paste0(temp_string, ')'))
    }
    else {
      for (i in 2:length(synonyms)){
         temp_string <- paste0(temp_string, ' OR "', synonyms[i], '"')
      }
      return(paste0(temp_string, ')'))
    }
  }
  if (!missing(additionalkeywords) & !missing(synonyms)) {
    return(paste0('TITLE-ABS-KEY(("', genus, ' ', species = paste0(species), '" OR "',
                  synonyms, '") AND ', additionalkeywords, ')'))
  } 
}
create_query_string_T_wos <- function(genus,
                                      species = NULL,
                                      synonyms,
                                      additionalkeywords){
  if (missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('TI = "', genus, ' ', species = paste0(species), '"'))
  } 
  if (!missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('TI = ("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')'))
  }
  if (missing(additionalkeywords) & !missing(synonyms)) {
    temp_string <- paste0('TI = ("', genus, ' ', species = paste0(species), '" OR "', synonyms[1], '")')
    if (length(synonyms)==1) {
      return(paste0(temp_string))
    }
    else {
      for (i in 2:length(synonyms)){
        temp_string <- paste0('TI = ("', genus, ' ', species = paste0(species), '" OR "', synonyms[i], '")')
      }
      return(paste0(temp_string))
    }
  }
  if (!missing(additionalkeywords) & !missing(synonyms)) {
    return(paste0('TI = (("', genus, ' ', species = paste0(species), '" OR "',
                  synonyms, '") AND ', additionalkeywords, ')'))
  } 
}
create_query_string_TAK_wos <- function(genus,
                                        species = NULL,
                                        synonyms,
                                        additionalkeywords){
  if (missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('TI = "', genus, ' ', species = paste0(species),
                  '" OR AB = "', genus, ' ', species = paste0(species),
                  '" OR AK = "', genus, ' ', species = paste0(species), '"'))
  } 
  if (!missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('TI = ("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')',
                  ' OR AB = ("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')',
                  ' OR AK = ("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')'))
  }
  if (missing(additionalkeywords) & !missing(synonyms)) {
    temp_string <- paste0('TI = ("', genus, ' ', species = paste0(species), '" OR "', synonyms[1], '")',
                          ' OR AB = ("', genus, ' ', species = paste0(species), '" OR "', synonyms[1], '")',
                          ' OR AK = ("', genus, ' ', species = paste0(species), '" OR "', synonyms[1], '")')
    if (length(synonyms)==1) {
      return(paste0(temp_string))
    }
    else {
      for (i in 2:length(synonyms)){
        temp_string <- paste0('TI = ("', genus, ' ', species = paste0(species), '" OR "', synonyms[i], '")',
                              ' OR AB = ("', genus, ' ', species = paste0(species), '" OR "', synonyms[i], '")',
                              ' OR AK = ("', genus, ' ', species = paste0(species), '" OR "', synonyms[i], '")')
      }
      return(paste0(temp_string))
    }
  }
  if (!missing(additionalkeywords) & !missing(synonyms)) {
    return(paste0('TI = (("', genus, ' ', species = paste0(species), '" OR "',
                  synonyms, '") AND ', additionalkeywords, ')',
                  ' OR AB = (("', genus, ' ', species = paste0(species), '" OR "',
                  synonyms, '") AND ', additionalkeywords, ')',
                  ' OR AK = (("', genus, ' ', species = paste0(species), '" OR "',
                  synonyms, '") AND ', additionalkeywords, ')'))
  } 
}
create_query_string_T_base <- function(genus,
                                       species = NULL,
                                       synonyms,
                                       additionalkeywords){
  if (missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('dctitle:"', genus, ' ', species = paste0(species), '"'))
  } 
  if (!missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('dctitle:("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')'))
  }
  if (missing(additionalkeywords) & !missing(synonyms)) {
    temp_string <- paste0('dctitle:("', genus, ' ', species = paste0(species), '" OR ', synonyms[1])
    if (length(synonyms)==1) {
      return(paste0(temp_string, ')'))
    }
    else {
      for (i in 2:length(synonyms)){
        temp_string <- paste0(temp_string, ' OR ', synonyms[i])
      }
      return(paste0(temp_string, ')'))
    }
  }
  if (!missing(additionalkeywords) & !missing(synonyms)) {
    return(paste0('dctitle:(("', genus, ' ', species = paste0(species), '" OR ',
                  synonyms, ') AND ', additionalkeywords, ')'))
  } 
}
create_query_string_TAK_base <- function(genus,
                                         species = NULL,
                                         synonyms,
                                         additionalkeywords){
  if (missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('dctitle:"', genus, ' ', species = paste0(species), '"',
                  ' OR dcdescription:"', genus, ' ', species = paste0(species), '"',
                  ' OR dcsubject:"', genus, ' ', species = paste0(species), '"'))
  } 
  if (!missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('dctitle:("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')',
                  ' OR dcdescription:("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')',
                  ' OR dcsubject:("', genus, ' ', species = paste0(species), '" AND ', additionalkeywords, ')'))
  }
  if (missing(additionalkeywords) & !missing(synonyms)) {
    temp_string <- paste0('dctitle:("', genus, ' ', species = paste0(species), '" OR ', synonyms[1], ')',
                          ' OR dcdescription:("', genus, ' ', species = paste0(species), '" OR ', synonyms[1], ')',
                          ' OR dcsubject:("', genus, ' ', species = paste0(species), '" OR ', synonyms[1], ')')
    if (length(synonyms)==1) {
      return(paste0(temp_string))
    }
    else {
      for (i in 2:length(synonyms)){
        temp_string <- paste0('dctitle:("', genus, ' ', species = paste0(species), '" OR ',
                              synonyms[i], ')',
                              ' OR dcdescription:("', genus, ' ', species = paste0(species), '" OR ',
                              synonyms[i], ')',
                              ' OR dcsubject:("', genus, ' ', species = paste0(species), '" OR ',
                              synonyms[i], ')')
      }
      return(paste0(temp_string))
    }
  }
  if (!missing(additionalkeywords) & !missing(synonyms)) {
    return(paste0('dctitle:(("', genus, ' ', species = paste0(species), '"', ' OR ',
                  synonyms, ') AND ', additionalkeywords, ')',
                  ' OR dcdescription:(("', genus, ' ', species = paste0(species), '"', ' OR ',
                  synonyms, ') AND ', additionalkeywords, ')',
                  ' OR dcsubject:(("', genus, ' ', species = paste0(species), '"', ' OR ',
                  synonyms, ') AND ', additionalkeywords, ')'))
  } 
}
create_query_string_T_lens <- function(genus, species, synonyms, additionalkeywords, size = 50000){
  if (missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "\\"', genus, ' ', species, '\\"",
                      "fields": ["title"]
                  }
              }]
          }
      },
      "size": ', size, '
  }'))
  } 
  if (!missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "\\"', genus, ' ', species, '\\" AND ', additionalkeywords, '",
                      "fields": ["title"]
                  }
              }]
          }
      },
      "size": ', size, '
  }'))
  }
  if (missing(additionalkeywords)&!missing(synonyms)) {
    temp_string <- paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "\\"', genus, ' ', species, '\\" OR \\"', synonyms[1], '\\"",
                      "fields": ["title"]
                  }
              }]
          }
      },
      "size": ', size, '
  }')
    if (length(synonyms)==1) {
      return(paste0(temp_string))
    }
    else {
      for (i in 2:length(synonyms)){
        temp_string <- paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "\\"', genus, ' ', species, '\\" OR \\"', synonyms[i], '\\"",
                      "fields": ["title"]
                  }
              }]
          }
      },
      "size": ', size, '
  }')
      }
      return(paste0(temp_string))
    }
  }
  if (!missing(additionalkeywords) & !missing(synonyms)) {
    return(paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "(\\"', genus, ' ', species, '\\" OR \\"', synonyms, '\\") AND ', additionalkeywords, '",
                      "fields": ["title"]
                  }
              }]
          }
      },
      "size": ', size, '
  }'))
  } 
}
create_query_string_TAK_lens <- function(genus, species, synonyms, additionalkeywords, size = 50000){
  if (missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "\\"', genus, ' ', species, '\\"",
                      "fields": ["title", "abstract", "keyword"],
                      "default_operator": "or"
                  }
              }]
          }
      },
      "size": ', size, '
  }'))
  } 
  if (!missing(additionalkeywords) & missing(synonyms)) {
    return(paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "\\"', genus, ' ', species, '\\" AND ', additionalkeywords, '",
                      "fields": ["title", "abstract", "keyword"],
                    "default_operator": "or"
                  }
              }]
          }
      },
      "size": ', size, '
  }'))
  }
  if (missing(additionalkeywords) & !missing(synonyms)) {
    temp_string <- paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "\\"', genus, ' ', species, '\\" OR \\"', synonyms[1], '\\"",
                      "fields": ["title", "abstract", "keyword"],
                    "default_operator": "or"
                  }
              }]
          }
      },
      "size": ', size, '
  }')
    if (length(synonyms)==1) {
      return(paste0(temp_string))
    }
    else {
      for (i in 2:length(synonyms)){
        temp_string <- paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "\\"', genus, ' ', species, '\\" OR \\"', synonyms[i], '\\"",
                      "fields": ["title", "abstract", "keyword"],
                    "default_operator": "or"
                  }
              }]
          }
      },
      "size": ', size, '
  }')
      }
      return(paste0(temp_string))
    }
  }
  if (!missing(additionalkeywords) & !missing(synonyms)) {
    return(paste0('{
    "query": {
          "bool": {
              "must": [{
                  "query_string": {
                      "query": "(\\"', genus, ' ', species, '\\" OR \\"', synonyms, '\\") AND ', additionalkeywords, '",
                      "fields": ["title", "abstract", "keyword"],
                    "default_operator": "or"
                  }
              }]
          }
      },
      "size": ', size, '
  }'))
  } 
}
scopus_request_t <- function(request, species = NULL) {
  rscopus::scopus_search(query = paste0(create_query_string_T_scopus(genus,
                                                                     species = paste0(species),
                                                                     synonyms,
                                                                     additionalkeywords),
                                        paste0(request)),
                         api_key = apikey,
                         verbose = TRUE,
                         wait_time = 3)
}
scopus_request_tak <- function(request, species = NULL) {
  rscopus::scopus_search(query = paste0(create_query_string_TAK_scopus(genus,
                                                                       species = paste0(species),
                                                                       synonyms,
                                                                       additionalkeywords),
                                        paste0(request)),
                         api_key = apikey,
                         verbose = TRUE,
                         wait_time = 3)
}
#species check
sp_check_old <- function(genus, species) {
  findname <- taxize::gnr_resolve(sci = paste(genus, species),
                                  data_source_ids = list("1", "3", "4", "12")) #check if the species exist
  if (length(findname>0)) {
    findname <- findname[order(-findname$score),]
  } else {
    stop("Species not found on CoL, ITIS, NCBI, or EoL. Please check your spelling and try again.")
  }
  if (findname$score[1]>=0.75) {
    print(paste("Species found on CoL, ITIS, NCBI, or EoL."))
  } else {
    stop("Species not found on CoL, ITIS, NCBI, or EoL. Please check your spelling and try again.")
  }
}
#genus check
genus_check <- function(genus) {
  findname <- taxize::gnr_resolve(sci = genus,
                                  data_source_ids = list("1", "3", "4", "12")) #check if the species exist
  if (length(findname>0)) {
    findname <- findname[order(-findname$score),]
  } else {
    stop("Genus not found on CoL, ITIS, NCBI, or EoL. Please check your spelling and try again.")
  }
  if (findname$score[1]>=0.75) {
    print(paste("Genus found on CoL, ITIS, NCBI, or EoL."))
  } else {
    stop("Genus not found on CoL, ITIS, NCBI, or EoL. Please check your spelling and try again.")
  }
}
#species check
sp_check <- function(genus, species = NULL) {
  findname <- taxize::gnr_resolve(sci = paste(genus, species = paste(species)),
                                  data_source_ids = list("1", "3", "4", "12")) #check if the species exist
  if (length(findname>0)) {
    findname <- findname[order(-findname$score),]
  } else {
    stop("Species not found on CoL, ITIS, NCBI, or EoL. Please check your spelling and try again.")
  }
  if (findname$score[1]>=0.75) {
    print(paste("Species found on CoL, ITIS, NCBI, or EoL."))
  } else {
    stop("Species not found on CoL, ITIS, NCBI, or EoL. Please check your spelling and try again.")
  }
}
#theme for ggplot
spindex_plot_theme <- function() {
  ggplot2::theme(title = element_blank(),
                 axis.title.x = element_blank(),
                 axis.text.x = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.line.x = element_line(colour = "grey20"),
                 axis.title.y = element_blank(),
                 axis.text.y = element_text(size = 10),
                 strip.background = element_rect(fill = "white"),
                 strip.text = element_text(size = 16,
                                           face = "bold",
                                           hjust = -0.02),
                 legend.title = element_text(size = 12,
                                             face = "bold"),
                 legend.key = element_rect(fill = "white"),
                 plot.background = element_rect(fill = "white"),
                 panel.background = element_rect(fill = "white"),
                 panel.grid.major.y = element_line(colour = "grey90"),
                 panel.grid.minor.y = element_line(colour = "grey90",
                                                   linetype = "longdash"),
                 panel.grid.major.x = ggplot2::element_blank(),
                 panel.grid.minor.x = ggplot2::element_blank())
}
#theme for ggplot
sppub_plot_theme <- function() {
  ggplot2::theme(title = element_blank(),
                 axis.title.x = element_text(size = 12,
                                             face = "bold"),
                 axis.text.x = element_text(size = 10),
                 axis.ticks.x = element_blank(),
                 axis.line.x = element_line(colour = "grey20"),
                 axis.title.y = element_text(size = 12,
                                             face = "bold"),
                 axis.text.y = element_text(size = 10),
                 legend.key = element_rect(fill = "white"),
                 plot.background = element_rect(fill = "white"),
                 panel.background = element_rect(fill = "white"),
                 panel.grid.major.y = element_line(colour = "grey90"),
                 panel.grid.minor.y = element_line(colour = "grey90",
                                                   linetype = "longdash"),
                 panel.grid.major.x = ggplot2::element_blank(),
                 panel.grid.minor.x = ggplot2::element_blank())
}
#count() wrapper
Count <- function(db, search, genus, species = NULL, synonyms, additionalkeywords) {
  if (missing(db)) {
    stop('Pick a database by setting db = "scopus" / "wos" / "base".')
  }
  if (missing(search)) {
    stop('Pick the search field(s) by setting search = "t" for title only or search = "tak" for title, absract, and keywords')
  }
  if (missing(genus)) {
    stop('Genus is missing from your query.')
  }
  if (db == "scopus" & search == "t") {
    countsp <- Count_scopus(search = "t", genus, species, synonyms, additionalkeywords)
  } else if (db == "scopus" & search == "tak") {
    countsp <- Count_scopus(search = "tak", genus, species, synonyms, additionalkeywords)
  } else if (db == "wos" & search == "t") {
    countsp <- Count_wos(saerch = "t", genus, species, synonyms, additionalkeywords)
  } else if (db == "wos" & search == "tak") {
    countsp <- Count_wos(saerch = "tak", genus, species, synonyms, additionalkeywords)
  } else if (db == "base" & search == "t") {
    countsp <- Count_base(search = "t", genus, species, synonyms, additionalkeywords)
  } else if (db == "base" & search == "tak") {
    countsp <- Count_base(search = "tak", genus, species, synonyms, additionalkeywords)
  } 
  return(countsp)
}
#fetch() wrapper
Fetch <- function(db, search, genus, species = NULL, synonyms, additionalkeywords, language = 0) {
  if (missing(db)) {
    stop('Pick a database by setting db = "scopus" / "wos" / "base".')
  }
  if (missing(search)) {
    stop('Pick the search field(s) by setting search = "t" for title only or search = "tak" for title, absract, and keywords')
  }
  if (missing(genus)) {
    stop('Genus is missing from your query.')
  }
  if (db == "scopus" & search == "t") {
    fetchsp <- FetchT_scopus(genus, species, synonyms, additionalkeywords, language)
  } else if (db == "scopus" & search == "tak") {
    fetchsp <- FetchTAK_scopus(genus, species, synonyms, additionalkeywords)
  } else if (db == "wos" & search == "t") {
    fetchsp <- FetchT_wos(genus, species, synonyms, additionalkeywords)
  } else if (db == "wos" & search == "tak") {
    fetchsp <- FetchTAK_wos(genus, species, synonyms, additionalkeywords)
  } else if (db == "base") {
    stop("Data extraction is not available for BASE")
  } 
  return(fetchsp)
}
Count_scopus <- function(search,
                         genus,
                         species = NULL,
                         synonyms,
                         additionalkeywords,
                         datatype = "application/xml") {
  sp_check(genus,
           species = paste0(species))
  if (search == "t") {
    response <- httr::GET("http://api.elsevier.com/content/search/scopus",
                      query = list(apiKey = apikey,
                                   query = create_query_string_T_scopus(genus,
                                                                        species = paste0(species),
                                                                        synonyms,
                                                                        additionalkeywords),
                                   httpAccept = "application/xml")) #format the URL to be sent to the API
  } else if (search == "tak") {
    response <- httr::GET("http://api.elsevier.com/content/search/scopus",
                      query = list(apiKey = apikey,
                                   query = create_query_string_TAK_scopus(genus,
                                                                          species = paste0(species),
                                                                          synonyms,
                                                                          additionalkeywords),
                                   httpAccept = "application/xml")) #format the URL to be sent to the API
  } else {
    stop('Set search = "t" for title-only searches, or "tak" for searches in the title, abstract, and keywords.')
  }
  httr::stop_for_status(response) #pass any HTTP errors to the R console
  response_data <- XML::xmlParse(response) #parse the data to extract values
  resultCount <- as.numeric(XML::xpathSApply(response_data,"//opensearch:totalResults", XML::xmlValue)) #get the total number of search results for the string
  return(resultCount)
}
#code from Fonti (modified)
CountSpT_scopus <- function(genus, species, synonyms, additionalkeywords, datatype = "application/xml") {
  sp_check(genus, species)
  response <- httr::GET("http://api.elsevier.com/content/search/scopus",
                      query = list(apiKey = apikey,
                                   query = create_query_string_T_scopus(genus, species, synonyms, additionalkeywords),
                                   httpAccept = "application/xml")) #format the URL to be sent to the API
  httr::stop_for_status(response) #pass any HTTP errors to the R console
  response_data <- XML::xmlParse(response) #parse the data to extract values
  resultCount <- as.numeric(XML::xpathSApply(response_data,"//opensearch:totalResults", XML::xmlValue)) #get the total number of search results for the string
  return(resultCount)
}
#code from Fonti (modified)
CountSpTAK_scopus <- function(genus, species, synonyms, additionalkeywords, datatype = "application/xml") {
  sp_check(genus, species)
  response <- httr::GET("http://api.elsevier.com/content/search/scopus",
                      query = list(apiKey = apikey,
                                   query = create_query_string_TAK_scopus(genus, species, synonyms, additionalkeywords),
                                   httpAccept = "application/xml")) #format the URL to be sent to the API
  httr::stop_for_status(response) #pass any HTTP errors to the R console
  response_data <- XML::xmlParse(response) #parse the data to extract values
  resultCount <- as.numeric(XML::xpathSApply(response_data,"//opensearch:totalResults", XML::xmlValue)) #get the total number of search results for the string
  return(resultCount)
}
#this function downloads citation data from scopus
FetchT_scopus <- function(genus,
                         species = NULL,
                         synonyms,
                         additionalkeywords,
                         language = 0) {
  count <- Count_scopus(search = "t",
                        genus,
                        species = paste0(species),
                        synonyms,
                        additionalkeywords) #check the number of records
  print(paste(count, "records found."))
  if (count < 1) {
    noCitations <- data.frame(citations = 0)
    return(noCitations)
  }
  if (language == 1) {
    lang <- read.csv(file = "data/languages.csv", header = T)[-c(1)]
    datalist <- data.frame()
    for (j in 1:length(lang$language)) {
      response <- httr::GET("http://api.elsevier.com/content/search/scopus",
                          query = list(apiKey = apikey,
                                       query = paste0(create_query_string_T_scopus(genus,
                                                                                   species = paste0(species),
                                                                                   synonyms,
                                                                                   additionalkeywords),
                                                      " AND LANGUAGE(", lang$language[j], ")"),
                                       httpAccept = "application/xml")) #format the URL to be sent to the API
      httr::stop_for_status(response) #pass any HTTP errors to the R console
      response_data <- XML::xmlParse(response) #parse the data to extract values
      resultCount <- as.numeric(XML::xpathSApply(response_data,"//opensearch:totalResults", XML::xmlValue)) #get the total number of search results for the string
      if (resultCount > 0) {
        #search begins
        search2020 <- scopus_request_t('" AND PUBYEAR > 2019 AND LANGUAGE(", lang$language[j], ")"')
        search2020df <- rscopus::entries_to_citation_df(search2020$entries)
        searchloopdf = data.frame()
        for (i in 2019:1990) {
          searchloop <- scopus_request_t('" AND PUBYEAR = ", i, " AND LANGUAGE(", lang$language[j], ")"')
          searchlooplist <- rscopus::entries_to_citation_df(searchloop$entries)
          searchloopdf <- dplyr::bind_rows(searchloopdf,
                                           searchlooplist)
          }
        search1985 <- scopus_request_t('" AND PUBYEAR > 1984 AND PUBYEAR < 1990 AND LANGUAGE(", lang$language[j], ")"')
        search1985df <- rscopus::entries_to_citation_df(search1985$entries)
        search1980 <- scopus_request_t('" AND PUBYEAR > 1979 AND PUBYEAR < 1985 AND LANGUAGE(", lang$language[j], ")"')
        search1980df <- rscopus::entries_to_citation_df(search1980$entries)
        search1975 <- scopus_request_t('" AND PUBYEAR > 1974 AND PUBYEAR < 1980 AND LANGUAGE(", lang$language[j], ")"')
        search1975df <- rscopus::entries_to_citation_df(search1975$entries)
        search1970 <- scopus_request_t('" AND PUBYEAR > 1969 AND PUBYEAR < 1975 AND LANGUAGE(", lang$language[j], ")"')
        search1970df <- rscopus::entries_to_citation_df(search1970$entries)
        search_old <- scopus_request_t('" AND PUBYEAR < 1970 AND LANGUAGE(", lang$language[j], ")"')
        search_olddf <- rscopus::entries_to_citation_df(search_old$entries)
        langlist <- dplyr::bind_rows(search2020df,
                                     searchloopdf,
                                     search1985df,
                                     search1980df,
                                     search1975df,
                                     search1970df,
                                     search_olddf)
        langlist$language <- lang$language[j]
        #search ends
        datalist <- dplyr::bind_rows(datalist,
                                     langlist)
      }
      }
    datalist <- datalist[!is.na(datalist$title), ] #remove NA papers
  } else {
    #loop if count is under 5000
    if (count <= 5000) {
      step_size <- 1000 #the number of records to retrieve in each loop
      start_record <- 0
      datalist = data.frame()
      looprepeat <- ceiling(count/step_size)-1 #the number of loop times, rounded up to the nearest integer
      #loop starts
      for (i in 0:looprepeat) { 
        print(paste("starting iteration: ", i, " Note: iteration size is ", step_size, " records, which runs of 200 records inside each iteration."))
        print(paste("Fetching records now."))
        search <- rscopus::scopus_search(query = create_query_string_T_scopus(genus,
                                                                              species = paste0(species),
                                                                              synonyms,
                                                                              additionalkeywords),
                                         api_key = apikey,
                                         verbose = TRUE,
                                         max_count = step_size,
                                         start = step_size*i,
                                         wait_time = 3)
        start_record <- as.numeric(summary(search)[1,1]) #move the pointer of starting record for each iteration to a new value
        searchdf <- rscopus::entries_to_citation_df(search$entries)
        list <- data.frame(searchdf)
        datalist <- dplyr::bind_rows(datalist,
                                     list)
        #loop ends
        }} else {
          #search begins
          search2020 <- scopus_request_t('" AND PUBYEAR > 2019"')
          search2020df <- rscopus::entries_to_citation_df(search2020$entries)
          searchloopdf = data.frame()
          for (i in 2019:1990) {
            searchloop <- scopus_request_t('" AND PUBYEAR = ", i')
            searchlooplist <- rscopus::entries_to_citation_df(searchloop$entries)
            searchloopdf <- dplyr::bind_rows(searchloopdf, searchlooplist)
            }
          search1985 <- scopus_request_t('" AND PUBYEAR > 1984 AND PUBYEAR < 1990"')
          search1985df <- rscopus::entries_to_citation_df(search1985$entries)
          search1980 <- scopus_request_t('" AND PUBYEAR > 1979 AND PUBYEAR < 1985"')
          search1980df <- rscopus::entries_to_citation_df(search1980$entries)
          search1975 <- scopus_request_t('" AND PUBYEAR > 1974 AND PUBYEAR < 1980"')
          search1975df <- rscopus::entries_to_citation_df(search1975$entries)
          search1970 <- scopus_request_t('" AND PUBYEAR > 1969 AND PUBYEAR < 1975"')
          search1970df <- rscopus::entries_to_citation_df(search1970$entries)
          search_old <- scopus_request_t('" AND PUBYEAR < 1970"')
          search_olddf <- rscopus::entries_to_citation_df(search_old$entries)
          datalist <- dplyr::bind_rows(search2020df,
                                       searchloopdf,
                                       search1985df,
                                       search1980df,
                                       search1975df,
                                       search1970df,
                                       search_olddf)
          #search ends  
        }
    }
  returned <- dim(datalist)[1]
  print(paste(returned, "records retrived in total."))
  return(datalist)
}
#this function downloads citation data from scopus
FetchTAK_scopus <- function(genus,
                            species = NULL,
                            synonyms,
                            additionalkeywords,
                            language = 0) {
  count <- Count_scopus(search = "tak",
                        genus,
                        species = paste0(species),
                        synonyms,
                        additionalkeywords) #check the number of records
  print(paste(count, "records found."))
  if (count < 1) {
    noCitations <- data.frame(citations = 0)
    return(noCitations)
  }
  if (language == 1) {
    lang <- read.csv(file = "data/languages.csv", header = T)[-c(1)]
    datalist <- data.frame()
    for (j in 1:length(lang$language)) {
      response <- httr::GET("http://api.elsevier.com/content/search/scopus",
                          query = list(apiKey = apikey,
                                       query = paste0(create_query_string_TAK_scopus(genus,
                                                                                     species = paste0(species),
                                                                                     synonyms,
                                                                                     additionalkeywords),
                                                      " AND LANGUAGE(", lang$language[j], ")"),
                                       httpAccept = "application/xml")) #format the URL to be sent to the API
      httr::stop_for_status(response) #pass any HTTP errors to the R console
      response_data <- XML::xmlParse(response) #parse the data to extract values
      resultCount <- as.numeric(XML::xpathSApply(response_data,"//opensearch:totalResults", XML::xmlValue)) #get the total number of search results for the string
      if (resultCount > 0) {
        #search begins
        search2020 <- scopus_request_tak('" AND PUBYEAR > 2019 AND LANGUAGE(", lang$language[j], ")"')
        search2020df <- rscopus::entries_to_citation_df(search2020$entries)
        searchloopdf = data.frame()
        for (i in 2019:1990) {
          searchloop <- scopus_request_tak('" AND PUBYEAR = ", i, " AND LANGUAGE(", lang$language[j], ")"')
          searchlooplist <- rscopus::entries_to_citation_df(searchloop$entries)
          searchloopdf <- dplyr::bind_rows(searchloopdf,
                                           searchlooplist)
          }
        search1985 <- scopus_request_tak('" AND PUBYEAR > 1984 AND PUBYEAR < 1990 AND LANGUAGE(", lang$language[j], ")"')
        search1985df <- rscopus::entries_to_citation_df(search1985$entries)
        search1980 <- scopus_request_tak('" AND PUBYEAR > 1979 AND PUBYEAR < 1985 AND LANGUAGE(", lang$language[j], ")"')
        search1980df <- rscopus::entries_to_citation_df(search1980$entries)
        search1975 <- scopus_request_tak('" AND PUBYEAR > 1974 AND PUBYEAR < 1980 AND LANGUAGE(", lang$language[j], ")"')
        search1975df <- rscopus::entries_to_citation_df(search1975$entries)
        search1970 <- scopus_request_tak('" AND PUBYEAR > 1969 AND PUBYEAR < 1975 AND LANGUAGE(", lang$language[j], ")"')
        search1970df <- rscopus::entries_to_citation_df(search1970$entries)
        search_old <- scopus_request_tak('" AND PUBYEAR < 1970 AND LANGUAGE(", lang$language[j], ")"')
        search_olddf <- rscopus::entries_to_citation_df(search_old$entries)
        langlist <- dplyr::bind_rows(search2020df,
                                     searchloopdf,
                                     search1985df,
                                     search1980df,
                                     search1975df,
                                     search1970df,
                                     search_olddf)
        langlist$language <- lang$language[j]
        #search ends
        datalist <- dplyr::bind_rows(datalist,
                                     langlist)
      }
      }
    datalist <- datalist[!is.na(datalist$title), ] #remove NA papers
  } else {
    #loop if count is under 5000
    if (count <= 5000) {
      step_size <- 1000 #the number of records to retrieve in each loop
      start_record <- 0
      datalist = data.frame()
      looprepeat <- ceiling(count/step_size)-1 #the number of loop times, rounded up to the nearest integer
      #loop starts
      for (i in 0:looprepeat) { 
        print(paste("starting iteration: ", i, " Note: iteration size is ", step_size, " records, which runs of 200 records inside each iteration."))
        print(paste("Fetching records now."))
        search <- rscopus::scopus_search(query = create_query_string_TAK_scopus(genus,
                                                                                species = paste0(species),
                                                                                synonyms, 
                                                                                additionalkeywords),
                                         api_key = apikey,
                                         verbose = TRUE,
                                         max_count = step_size,
                                         start = step_size*i,
                                         wait_time = 3)
        start_record <- as.numeric(summary(search)[1,1]) #move the pointer of starting record for each iteration to a new value
        searchdf <- rscopus::entries_to_citation_df(search$entries)
        list <- data.frame(searchdf)
        datalist <- dplyr::bind_rows(datalist,
                                     list)
        #loop ends
        }} else {
          #search begins
          search2020 <- scopus_request_tak('" AND PUBYEAR > 2019"')
          search2020df <- rscopus::entries_to_citation_df(search2020$entries)
          searchloopdf = data.frame()
          for (i in 2019:1990) {
            searchloop <- scopus_request_tak('" AND PUBYEAR = ", i')
            searchlooplist <- rscopus::entries_to_citation_df(searchloop$entries)
            searchloopdf <- dplyr::bind_rows(searchloopdf, searchlooplist)
            }
          search1985 <- scopus_request_tak('" AND PUBYEAR > 1984 AND PUBYEAR < 1990"')
          search1985df <- rscopus::entries_to_citation_df(search1985$entries)
          search1980 <- scopus_request_tak('" AND PUBYEAR > 1979 AND PUBYEAR < 1985"')
          search1980df <- rscopus::entries_to_citation_df(search1980$entries)
          search1975 <- scopus_request_tak('" AND PUBYEAR > 1974 AND PUBYEAR < 1980"')
          search1975df <- rscopus::entries_to_citation_df(search1975$entries)
          search1970 <- scopus_request_tak('" AND PUBYEAR > 1969 AND PUBYEAR < 1975"')
          search1970df <- rscopus::entries_to_citation_df(search1970$entries)
          search_old <- scopus_request_tak('" AND PUBYEAR < 1970"')
          search_olddf <- rscopus::entries_to_citation_df(search_old$entries)
          datalist <- dplyr::bind_rows(search2020df,
                                       searchloopdf,
                                       search1985df,
                                       search1980df,
                                       search1975df,
                                       search1970df,
                                       search_olddf)
          #search ends  
        }
    }
  returned <- dim(datalist)[1]
  print(paste(returned, "records retrived in total."))
  return(datalist)
}
Count_wos <- function(search,
                      genus,
                      species = NULL,
                      synonyms,
                      additionalkeywords) {
  sp_check(genus,
           species = paste0(species))
  if (search == "t") {
    count <- wosr::query_wos(query = create_query_string_T_wos(genus,
                                                               species = paste0(species),
                                                               synonyms,
                                                               additionalkeywords),
                             sid = sid) 
  } else if (search == "tak") {
    count <- wosr::query_wos(query = create_query_string_TAK_wos(genus,
                                                                 species = paste0(species),
                                                                 synonyms,
                                                                 additionalkeywords),
                             sid = sid) 
  } else {
    stop('Set search = "t" for title-only searches, or "tak" for searches in the title, abstract, and keywords.')
  }
  return(count)
}
CountSpT_wos <- function(genus, species, synonyms, additionalkeywords) {
  sp_check(genus, species)
  count <- wosr::query_wos(query = create_query_string_T_wos(genus, species, synonyms, additionalkeywords),
                           sid = sid) 
  return(count)
}
CountSpTAK_wos <- function(genus, species, synonyms, additionalkeywords) {
  sp_check(genus, species)
  count <- wosr::query_wos(query = create_query_string_TAK_wos(genus, species, synonyms, additionalkeywords),
                           sid = sid) 
  return(count)
}
#this function downloads citation data from wos
FetchT_wos <- function(genus,
                       species = NULL,
                       synonyms,
                       additionalkeywords) {
  count <- Count_wos(search = "t",
                     genus,
                     species = paste0(species),
                     synonyms,
                     additionalkeywords) #check the number of records
  print(paste(count, "records found."))
  if (count < 1) {
    noCitations <- data.frame(citations = 0)
    return(noCitations)
  }
  query <- wosr::pull_wos(query = create_query_string_T_wos(genus,
                                                            species = paste0(species), 
                                                            synonyms,
                                                            additionalkeywords),
                          sid = sid) 
  results <- data.table::rbindlist(query, fill = TRUE)
  results <- data.table::setDT(results)[, lapply(data.table::.SD, function(x) toString(na.omit(x))), by = ut]
  #renaming columns
  names(results)[names(results) == "tot_cites"] <- "citations"
  names(results)[names(results) == "doc_type"] <- "description"
  names(results)[names(results) == "date"] <- "cover_date"
  #showing final list of records
  returned <- nrow(results)
  print(paste(returned, "records retrived in total."))
  return(results)
}
#this function downloads citation data from wos
FetchTAK_wos <- function(genus,
                         species = NULL,
                         synonyms,
                         additionalkeywords) {
  count <- Count_wos(search = "tak",
                     genus,
                     species = paste0(species),
                     synonyms,
                     additionalkeywords) #check the number of records
  print(paste(count, "records found."))
  if (count < 1) {
    noCitations <- data.frame(citations = 0)
    return(noCitations)
  }
  query <- wosr::pull_wos(query = create_query_string_TAK_wos(genus,
                                                              species = paste0(species), 
                                                              synonyms,
                                                              additionalkeywords),
                          sid = sid) 
  results <- data.table::rbindlist(query, fill = TRUE)
  results <- data.table::setDT(results)[, lapply(data.table::.SD, function(x) toString(na.omit(x))), by = ut]
  #renaming columns
  names(results)[names(results) == "tot_cites"] <- "citations"
  names(results)[names(results) == "doc_type"] <- "description"
  names(results)[names(results) == "date"] <- "cover_date"
  #showing final list of records
  returned <- nrow(results)
  print(paste(returned, "records retrived in total."))
  return(results)
}
Count_base <- function(search,
                       genus,
                       species = NULL,
                       synonyms,
                       additionalkeywords) {
  sp_check(genus,
           species = paste0(species))
  if (search == "t") {
    response <- httr::GET("https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi",
                        query = list(func = "PerformSearch",
                                     query = create_query_string_T_base(genus,
                                                                        species = paste0(species),
                                                                        synonyms,
                                                                        additionalkeywords)))
  } else if (search == "tak") {
    response <- httr::GET("https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi",
                        query = list(func = "PerformSearch",
                                     query = create_query_string_TAK_base(genus,
                                                                          species = paste0(species),
                                                                          synonyms,
                                                                          additionalkeywords)))
  } else {
    stop('Set search = "t" for title-only searches, or "tak" for searches in the title, abstract, and keywords.')
  }
  httr::stop_for_status(response) #pass any HTTP errors to the R console
  response_data <- XML::xmlParse(response)
  resultCount <- as.numeric(XML::xpathSApply(response_data, "//response/result/@numFound"))
  return(resultCount)
}
CountSpT_base <- function(genus, species, synonyms, additionalkeywords) {
  sp_check(genus, species)
  response <- httr::GET("https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi",
                        query = list(func = "PerformSearch",
                                     query = create_query_string_T_base(genus, species, synonyms, additionalkeywords)))
  httr::stop_for_status(response) #pass any HTTP errors to the R console
  response_data <- XML::xmlParse(response)
  resultCount <- as.numeric(XML::xpathSApply(response_data, "//response/result/@numFound"))
  return(resultCount)
}
CountSpTAK_base <- function(genus, species, synonyms, additionalkeywords) {
  sp_check(genus, species)
  response <- httr::GET("https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi",
                        query = list(func = "PerformSearch",
                                     query = create_query_string_TAK_base(genus, species, synonyms, additionalkeywords)))
  httr::stop_for_status(response) #pass any HTTP errors to the R console
  response_data <- XML::xmlParse(response)
  resultCount <- as.numeric(XML::xpathSApply(response_data, "//response/result/@numFound"))
  return(resultCount)
}
CountSpT_lens <- function(genus, species, synonyms, additionalkeywords, size = 50000) {
  sp_check(genus, species)
  response <- httr::POST(url = "https://api.lens.org/scholarly/search",
                         add_headers(.headers = c("Authorization" = token,
                                                  "Content-Type" = "application/json")),
                         body = create_query_string_T_lens(genus, species, synonyms, additionalkeywords, size))
  lens_content <- jsonlite::fromJSON(rawToChar(response$content))
  if (!is.null(lens_content$total)) {
    resultCount <- as.numeric(lens_content$total)
  } else {
    resultCount <- 0
  }
  return(resultCount)
}
CountSpTAK_lens <- function(genus, species, synonyms, additionalkeywords, size = 50000) {
  sp_check(genus, species)
  response <- httr::POST(url = "https://api.lens.org/scholarly/search",
                         add_headers(.headers = c("Authorization" = token,
                                                  "Content-Type" = "application/json")),
                         body = create_query_string_TAK_lens(genus, species, synonyms, additionalkeywords, size))
  lens_content <- jsonlite::fromJSON(rawToChar(response$content))
  if (!is.null(lens_content$total)) {
    resultCount <- as.numeric(lens_content$total)
  } else {
    resultCount <- 0
  }
  return(resultCount)
}
#this function downloads citation data from lens
FetchSpT_lens <- function(genus, species, synonyms, additionalkeywords, size = 50000) {
  findname <- taxize::gnr_resolve(sci = c(genus, species)) #check if the species exist
  dplyr::case_when(
    findname$submitted_name %in% findname$matched_name ~ print(paste("Species found on the Encyclopedia of Life."))
  )
  results <- lens2r::get_scholarly_df(query = create_query_string_T_lens(genus, species, synonyms, additionalkeywords, size),
                                      token = token)
  #renaming columns
  names(results)[names(results) == "scholarly_citations_count"] <- "citations"
  names(results)[names(results) == "source.title"] <- "journal"
  names(results)[names(results) == "publication_type"] <- "description"
  names(results)[names(results) == "date_published"] <- "cover_date"
  #replacing NA with 0
  for (i in 1:nrow(results)) {
    if (is.na(results$citations[i])) {
      results$citations[i] <- 0
    }
  }
  #clean cover_date
  results$cover_date <- substr(results$cover_date, 1, 10)
  #showing final list of records
  returned <- nrow(results)
  print(paste(returned, "records retrived in total."))
  return(results)
}
#this function downloads citation data from lens
FetchSpTAK_lens <- function(genus, species, synonyms, additionalkeywords, size = 50000) {
  findname <- taxize::gnr_resolve(sci = c(genus, species)) #check if the species exist
  dplyr::case_when(
    findname$submitted_name %in% findname$matched_name ~ print(paste("Species found on the Encyclopedia of Life."))
  ) 
  results <- lens2r::get_scholarly_df(query = create_query_string_TAK_lens(genus, species, synonyms, additionalkeywords, size),
                                      token = token)
  #renaming columns
  names(results)[names(results) == "scholarly_citations_count"] <- "citations"
  names(results)[names(results) == "source.title"] <- "journal"
  names(results)[names(results) == "publication_type"] <- "description"
  names(results)[names(results) == "year_published"] <- "cover_date"
  #replacing NA with 0
  for (i in 1:nrow(results)) {
    if (is.na(results$citations[i])) {
      results$citations[i] <- 0
    }
  }
  #clean cover_date
  results$cover_date <- substr(results$cover_date, 1, 10)
  #showing final list of records
  returned <- nrow(results)
  print(paste(returned, "records retrived in total."))
  return(results)
}
#this function calculates the total number of publications
TotalPub <- function(data) {
  total <- nrow(data) #counts the number of publications by summing the records
  return(total)
}
#this function calculates the total number of citations
TotalCite <- function(data) {
  data$citations <- as.numeric(data$citations) #changes the class to numerical for calculation
  total <- sum(data$citations) #sums all of the citations
  return(total)
}
#this function calculates the total number of journals
TotalJournals <- function(data) {
  filter <- unique(data$journal) #filters data to pick out each journal
  total <- length(filter) #counts the number of journals
  return(total)
}
SourceType <- function(data) {
  source_type <- data.frame(table(data$description))
  source_type_transpose <- as.data.frame(t(source_type))
  names(source_type_transpose) <- as.matrix(source_type_transpose[1,])
  source_type_transpose <- source_type_transpose[-1,]
  row.names(source_type_transpose) <- NULL
  return(source_type_transpose)
}
#this function calculates the h-index
SpHindex <- function(data) {
  data$citations <- as.numeric(data$citations) #change class from factor to numeric
  sorteddf <- sort(data$citations, decreasing = TRUE) #sort in descending order
  Hindex <- 0   #computes h-index
  for(i in 1:length(sorteddf)) {
    if (sorteddf[i] > Hindex) {
      Hindex <- Hindex + 1
    }
  }
  return(Hindex)
}
#this function canculates the number of years since the first publication
YearsPublishing <- function(data) {
  data$year <- as.numeric(substr(data$cover_date, 1, 4))
  as.numeric(substr(Sys.Date(), 1, 4)) - min(data$year) #time since first publication in years
  years_publishing <- as.numeric(substr(Sys.Date(), 1, 4)) - min(data$year, na.rm = TRUE) #time since first publication in years
  return(years_publishing)
}
#this function calculates the m-index
#it does not requires the calculation of the h-index beforehand
SpMindex <- function(data) {
  data$citations <- as.numeric(data$citations) #change class from factor to numeric
  sorteddf <- sort(data$citations, decreasing = TRUE) #sort in descending order
  Hindex <- 0   #computes h-index
  for(i in 1:length(sorteddf)) {
    if (sorteddf[i] > Hindex) {
      Hindex <- Hindex + 1
    }
  }
  data$year <- as.numeric(substr(data$cover_date, 1, 4))
  years_publishing <- as.numeric(substr(Sys.Date(), 1, 4)) - min(data$year, na.rm = TRUE) #time since first publication in years
  Mindex <- round(Hindex/years_publishing, digits = 3) #round to 3 decimal places
  return(Mindex)
}
#this function calculates the i10 index
#i10 index counts all of the publications with 10+ citations
Spi10 <- function(data) {
  data$citations <- as.numeric(data$citations) #changes class from factor to numeric
  sorteddf <- sort(data$citations, decreasing = TRUE) #sorts in descending order
  i10 <- sum(sorteddf>=10) #counts the publications with 10 or more citations
  return(i10)
}
#this function calculates the h-index of the past 5 years
SpH5 <- function(data) { #last 5 years from the current date
  current_date <- as.numeric(substr(Sys.Date(), 1, 4)) #current year
  #The easiest thing to do is to convert it into POSIXlt and subtract 5 from the years slot.
  d <- as.POSIXlt(Sys.Date())
  d$year <- d$year-5
  if (d < 1) { #the index is 0 if there are no records form the past 5 years
    return(as.numeric("0"))
  }
  as.Date(d)
  h5 <- SpHAfterdate(data, d)
  return(h5) #calls HAfterdate function
}
#this function calculates the h-index using a given date up till the newest record
SpHAfterdate <- function(data, date) {
  data$cover_date <- as.Date(data$cover_date, format = "%Y-%m-%d") #change format of the date from factor to date
  subsetdata <- dplyr::filter(data, cover_date > as.Date(date))
  if (dplyr::count(subsetdata) < 1) { #the index is 0 if there are no records in this time frame
    return(as.numeric("0"))
  }
  HAfterdate <- SpHindex(subsetdata) #calls Hindex function
  return(HAfterdate)
}
#this function returns a summary of all of the indices
Allindices <- function(data, genus, species, sourcetype = 0) {
  if (sourcetype == 1 & all.equal(0, data$citations) == FALSE) {
    combine <- data.frame(paste0(genus, " ", species), paste0(species), paste0(genus), TotalPub(data), TotalCite(data),
                          TotalJournals(data), YearsPublishing(data), SpHindex(data), SpMindex(data), Spi10(data), SpH5(data))
    combine[is.na(combine)] <- 0 #replace NA values with 0
    combine_st <- cbind(combine, SourceType(data))
    colnames(combine_st) <- c("genus_species", "species", "genus","publications", "citations", "journals", "years_publishing",
                                      "h", "m", "i10", "h5", names(SourceType(data)))
    return(combine_st)
  } else if (all.equal(0, data$citations) == TRUE) {
    zeroIndex <- data.frame(genus_species = paste0(genus, " ", species),
                            species = paste0(species),
                            genus = paste0(genus),
                            publications = 0, citations = 0, journals = 0, years_publishing = NA, h = 0, m = 0, i10 = 0, h5 = 0)
    return(zeroIndex)
  } else {
    combine <- data.frame(paste0(genus, " ", species), paste0(species), paste0(genus), TotalPub(data), TotalCite(data),
                          TotalJournals(data), YearsPublishing(data), SpHindex(data), SpMindex(data), Spi10(data), SpH5(data))
    combine[is.na(combine)] <- 0 #replace NA values with 0
    colnames(combine) <- c("genus_species", "species", "genus","publications", "citations", "journals", "years_publishing",
                                      "h", "m", "i10", "h5")
    return(combine)
  } 
  cat("\n", genus, species, "\n",
      TotalPub(data), "publications", "\n",
      TotalCite(data), "citations", "\n",
      TotalJournals(data), "journals", "\n",
      YearsPublishing(data), "years of publishing", "\n",
      "h:", SpHindex(data), "\n",
      "m:", SpMindex(data), "\n",
      "i10:", Spi10(data), "\n",
      "h5:", SpH5(data), "\n")
}
#plotting data from Allindices()
plotAllindices <- function(data) {
  facet_data <- data %>% 
    tidyr::pivot_longer(cols = h:h5,
                        names_to = "index",
                        values_to = "value")
  facet_data$index <- factor(facet_data$index,
                             levels = c("h", "m", "i10", "h5"),
                             labels = c("h-index", "m-index", "i10 index", "h5 index"))
  p <- ggplot(data = facet_data,
              aes(x = genus_species,
                  y = value,
                  colour = genus_species)) +
    geom_point(size = 2) +
    labs(colour = "Species") +
    spindex_plot_theme() +
    facet_wrap(~index,
               scales = "free",
               nrow = 2,
               ncol = 2)
  return(p)
}
getYear <- function(data, genus, species) {
  data$year <- as.numeric(substr(data$cover_date, 1, 4))
  output_by_year <- data.frame(table(data$year))
  names(output_by_year)[names(output_by_year)=="Var1"] <- "Year"
  output_by_year$Year <- as.numeric(as.character(output_by_year$Year))
  output_by_year <- tidyr::complete(output_by_year, Year = min(Year):max(Year), fill = list(Freq = 0))
  output_by_year$spp <- paste(genus, species)
  return(output_by_year)
}
plotPub <- function(data) {
  ggplot2::ggplot(data, ggplot2::aes(x = Year,
                                     y = Freq,
                                     group = spp,
                                     colour = spp)) +
    ggplot2::geom_line(size = 0.5,
                       alpha = 0.8) +
    ggplot2::geom_point(size = 1,
                        alpha = 0.8) +
    ggplot2::labs(x = "Year",
                  y = "Number of articles",
                  colour = "Species") +
    ggplot2::scale_y_discrete() +
    sppub_plot_theme()
}
apikey <- Sys.getenv("Elsevier_API")
sid <- auth(username = NULL, password = NULL)


Count_scopus(search = "t", genus = "Osphranter", species = "rufus") #6
Count_scopus(search = "tak", genus = "Osphranter", species = "rufus") #19
Count_scopus(search = "t", genus = "Osphranter") #10
Count_scopus(search = "tak", genus = "Osphranter") #45
FetchT_scopus(genus = "Osphranter", species = "rufus") #6
FetchTAK_scopus(genus = "Osphranter", species = "rufus") #19
FetchT_scopus(genus = "Osphranter") #10
FetchTAK_scopus(genus = "Osphranter") #45

Count_wos(search = "t", genus = "Osphranter", species = "rufus") 
Count_wos(search = "tak", genus = "Osphranter", species = "rufus") 
Count_wos(search = "t", genus = "Osphranter") 
Count_wos(search = "tak", genus = "Osphranter") 
FetchT_wos(genus = "Osphranter", species = "rufus") #6
FetchTAK_wos(genus = "Osphranter", species = "rufus") #19
FetchT_wos(genus = "Osphranter") #10
FetchTAK_wos(genus = "Osphranter") #45


Count_base(search = "t", genus = "Osphranter", species = "rufus") #10
Count_base(search = "tak", genus = "Osphranter", species = "rufus") #24
Count_base(search = "t", genus = "Osphranter") #20
Count_base(search = "tak", genus = "Osphranter") #70

cat(create_query_string_T_wos(genus = "Osphranter", species = "rufus"))
cat(create_query_string_T_wos(genus = "Osphranter"))


Count(db = "scopus", search = "t", genus = "Osphranter", species = "rufus") #6
Count(db = "scopus", search = "tak", genus = "Osphranter", species = "rufus") #19
Count(db = "scopus", search = "t", genus = "Osphranter") #10
Count(db = "scopus", search = "tak", genus = "Osphranter") #45
Count(db = "base", search = "t", genus = "Osphranter", species = "rufus") #10
Count(db = "base", search = "tak", genus = "Osphranter", species = "rufus") #24
Count(db = "base", search = "t", genus = "Osphranter") #20
Count(db = "base", search = "tak", genus = "Osphranter") #70

Fetch(db = "scopus", search = "t", genus = "Osphranter", species = "rufus") #6
Fetch(db = "scopus", search = "tak", genus = "Osphranter", species = "rufus") #19
Fetch(db = "scopus", search = "t", genus = "Osphranter") #10
Fetch(db = "scopus", search = "tak", genus = "Osphranter") #45

Fetch(db = "base", search = "tak", genus = "Osphranter") #45


#something with more records
Count(db = "scopus", search = "t", genus = "Ovis", species = "aries") #19
Count(db = "scopus", search = "tak", genus = "Osphranter", species = "rufus") #19





query <- 'TS = ("animal welfare") AND PY = (2002-2003)'
query_wos(query = 'TS = ("animal welfare") AND PY = (2002-2003)', sid = sid)


jessicatytam/specieshindex documentation built on June 24, 2022, 3:31 a.m.