R/classify.R
In peruflorads43: Reviewed Official Classification of Endangered Wild Flora Species in Peru

#------------------------------------------------
# Function wrap of .classify_algo for multiple species
#
#' @keywords internal

.splist_classify <- function(x) {
  # Infrataxa identifiers
  infrasp <- c("subsp.", "ssp.", "var.", "subvar.",
               "forma", "f.", "subf.")
  Infrasp_cat <- toupper(infrasp)
  # Regular expression to make sure, infra code is between names
  Infrasp_cat_reg <- paste("[[:alpha:]]",
                           gsub("\\.",
                                "\\\\.",
                                Infrasp_cat),
                           "[[:alpha:]]")
  # Split names
  x_split <- strsplit(x, " ")

  # Aply the algorithm
  result <- lapply(x_split,
                   .classify_algo,
                   Infrasp_cat_reg)

  # Combine result list into a matrix
  result <- do.call(rbind, result)
  result <- cbind(x, result)

  # Combine categories and remove
  result[, 5] <- paste0(result[, 5], result[, 6])
  result[, 9] <- paste0(result[, 9], result[, 10])
  result <- result[, -c(6, 10), drop = FALSE]

  # Give the colnames of the matrix
  colnames(result) <- c(
    "Species",
    "Genus",
    "Epithet",
    "Author",
    "Subspecies",
    "Variety",
    "Subvariety",
    "Forma",
    "Subforma"
  )
  return(result)
}

#------------------------------------------------
# The algorithm for one name
#' @keywords internal
.classify_algo <- function(x_split_i,
                           Infrasp_cat_reg) {

  # Base output
  output <- character(10)

  # Count the number of names
  n <- length(x_split_i)

  # Genus and epithet
  output[1:2] <- x_split_i[1:2]


  # Check for infrataxa
  if (n > 2) {
    # Connect previous and next name to check for infras
    x_split_i_paste <- x_split_i
    x_split_i_paste[2:n] <- paste(substr(x_split_i[1:(n - 1)], 1, 1),
                                  x_split_i[2:n],
                                  substr(x_split_i[3:n],1 , 1))

    infra_check <- sapply(as.list(Infrasp_cat_reg),
                          function(x, y) {
                            regexpr(x, y) == 1
                          },
                          x_split_i_paste)
    infra_id <- rowSums(infra_check) > 0



    # if there is none get only the author name
    if (!any(infra_id)) {
      output[3] <- paste(x_split_i[3:n],
                         collapse = " ")
    } else {
      # If it has infra categories, get them

      n_infra <- sum(infra_id) # Number of infra categories
      pos <- which(infra_id)
      for (i in 1:n_infra) {
        # do it for all infra names
        # Get the position of the infra
        pos_1 <- pos[i] + 1
        pos_out <- which(infra_check[pos[i], ]) + 3
        output[pos_out] <- x_split_i[pos_1]
      }
      if (n > pos_1) {
        # get the author
        output[3] <- paste(x_split_i[(pos_1 + 1):n],
                           collapse = " ")
      }
      if (pos[1] > 3) { # Author names before infras
        output[3] <- paste(x_split_i[3:(pos[1] - 1)],
                           collapse = " ")
      }
    }
  }
  return(output)
}

Any scripts or data that you put into this service are public.

peruflorads43 documentation built on Aug. 21, 2023, 5:14 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

peruflorads43
Reviewed Official Classification of Endangered Wild Flora Species in Peru

R/classify.R
In peruflorads43: Reviewed Official Classification of Endangered Wild Flora Species in Peru

Defines functions .classify_algo .splist_classify

Try the peruflorads43 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

peruflorads43 Reviewed Official Classification of Endangered Wild Flora Species in Peru

R/classify.R In peruflorads43: Reviewed Official Classification of Endangered Wild Flora Species in Peru

Defines functions .classify_algo .splist_classify

Try the peruflorads43 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

peruflorads43
Reviewed Official Classification of Endangered Wild Flora Species in Peru

R/classify.R
In peruflorads43: Reviewed Official Classification of Endangered Wild Flora Species in Peru