knitr::opts_chunk$set(
  warning = FALSE,
  collapse = TRUE,
  comment = "#",
  fig.width = 7.5,
  fig.height = 5
)

library(AMR)
library(dplyr)

options(knitr.kable.NA = "")

structure_txt <- function(dataset) {
  paste0(
    "A data set with ",
    format(nrow(dataset), big.mark = " "), " rows and ",
    ncol(dataset), " columns, containing the following column names:  \n",
    AMR:::vector_or(colnames(dataset), quotes = "*", last_sep = " and ", sort = FALSE), "."
  )
}

download_txt <- function(filename) {
  msg <- paste0(
    "It was last updated on ",
    trimws(format(file.mtime(paste0("../data/", filename, ".rda")), "%e %B %Y %H:%M:%S %Z", tz = "UTC")),
    ". Find more info about the contents, (scientific) source, and structure of this [data set here](https://amr-for-r.org/reference/", ifelse(filename == "antivirals", "antimicrobials", filename), ".html).\n"
  )
  github_base <- "https://github.com/msberends/AMR/raw/main/data-raw/datasets/"
  local_filename <- paste0("../data-raw/datasets/", filename)
  rds <- paste0(local_filename, ".rds")
  txt <- paste0(local_filename, ".txt")
  excel <- paste0(local_filename, ".xlsx")
  feather <- paste0(local_filename, ".feather")
  parquet <- paste0(local_filename, ".parquet")
  spss <- paste0(local_filename, ".sav")
  stata <- paste0(local_filename, ".dta")
  create_txt <- function(filename, type, software, exists) {
    if (isTRUE(exists)) {
      paste0(
        "* Download as [", software, "](", github_base, basename(filename), ") (",
        AMR:::formatted_filesize(filename), ")  \n"
      )
    } else {
      paste0("* *(unavailable as ", software, ")*\n")
    }
  }

  if (any(
    file.exists(rds),
    file.exists(txt),
    file.exists(excel),
    file.exists(feather),
    file.exists(parquet),
    file.exists(spss),
    file.exists(stata)
  )) {
    msg <- c(
      msg, "\n**Direct download links:**\n\n",
      create_txt(rds, "rds", "original R Data Structure (RDS) file", file.exists(rds)),
      create_txt(txt, "txt", "tab-separated text file", file.exists(txt)),
      create_txt(excel, "xlsx", "Microsoft Excel workbook", file.exists(excel)),
      create_txt(feather, "feather", "Apache Feather file", file.exists(feather)),
      create_txt(parquet, "parquet", "Apache Parquet file", file.exists(parquet)),
      create_txt(spss, "sav", "IBM SPSS Statistics data file", file.exists(spss)),
      create_txt(stata, "dta", "Stata DTA file", file.exists(stata))
    )
  }
  paste0(msg, collapse = "")
}

print_df <- function(x, rows = 6) {
  x %>%
    as.data.frame(stringsAsFactors = FALSE) %>%
    head(n = rows) %>%
    mutate_all(function(x) {
      if (is.list(x)) {
        sapply(x, function(y) {
          if (length(y) > 3) {
            paste0(paste(y[1:3], collapse = ", "), ", ...")
          } else if (length(y) == 0 || all(is.na(y))) {
            ""
          } else {
            paste(y, collapse = ", ")
          }
        })
      } else {
        x
      }
    }) %>%
    knitr::kable(align = "c")
}

All reference data (about microorganisms, antimicrobials, SIR interpretation, EUCAST rules, etc.) in this AMR package are reliable, up-to-date and freely available. We continually export our data sets to formats for use in R, MS Excel, Apache Feather, Apache Parquet, SPSS, and Stata. We also provide tab-separated text files that are machine-readable and suitable for input in any software program, such as laboratory information systems.

If you are working in Python, be sure to use our AMR for Python package. It allows all relevant AMR data sets to be natively available in Python.

microorganisms: Full Microbial Taxonomy

r structure_txt(microorganisms)

This data set is in R available as microorganisms, after you load the AMR package.

r download_txt("microorganisms")

NOTE: The exported files for SPSS and Stata contain only the first 50 SNOMED codes per record, as their file size would otherwise exceed 100 MB; the file size limit of GitHub. Their file structures and compression techniques are very inefficient. Advice? Use R instead. It's free and much better in many ways.

The tab-separated text file and Microsoft Excel workbook both contain all SNOMED codes as comma separated values.

Example content

Included (sub)species per taxonomic kingdom:

microorganisms %>%
  count(kingdom) %>%
  mutate(n = format(n, big.mark = " ")) %>%
  setNames(c("Kingdom", "Number of (sub)species")) %>%
  print_df()

First 6 rows when filtering on genus Escherichia:

microorganisms %>%
  filter(genus == "Escherichia") %>%
  print_df()

antimicrobials: Antibiotic and Antifungal Drugs

r structure_txt(antimicrobials)

This data set is in R available as antimicrobials, after you load the AMR package.

r download_txt("antimicrobials")

The tab-separated text, Microsoft Excel, SPSS, and Stata files all contain the ATC codes, common abbreviations, trade names and LOINC codes as comma separated values.

Example content

antimicrobials %>%
  filter(ab %in% colnames(example_isolates)) %>%
  print_df()

clinical_breakpoints: Interpretation from MIC values & disk diameters to SIR

r structure_txt(clinical_breakpoints)

This data set is in R available as clinical_breakpoints, after you load the AMR package.

r download_txt("clinical_breakpoints")

Example content

clinical_breakpoints %>%
  mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>%
  mutate(ab_name = ab_name(ab, language = NULL), .after = ab) %>%
  print_df()

microorganisms.groups: Species Groups and Microbiological Complexes

r structure_txt(microorganisms.groups)

This data set is in R available as microorganisms.groups, after you load the AMR package.

r download_txt("microorganisms.groups")

Example content

microorganisms.groups %>%
  print_df()

intrinsic_resistant: Intrinsic Bacterial Resistance

r structure_txt(intrinsic_resistant)

This data set is in R available as intrinsic_resistant, after you load the AMR package.

r download_txt("intrinsic_resistant")

Example content

Example rows when filtering on Enterobacter cloacae:

intrinsic_resistant %>%
  transmute(
    microorganism = mo_name(mo),
    antibiotic = ab_name(ab)
  ) %>%
  filter(microorganism == "Enterobacter cloacae") %>%
  arrange(antibiotic) %>%
  print_df(rows = Inf)

dosage: Dosage Guidelines from EUCAST

r structure_txt(dosage)

This data set is in R available as dosage, after you load the AMR package.

r download_txt("dosage")

Example content

dosage %>%
  print_df()

example_isolates: Example Data for Practice

r structure_txt(example_isolates)

This data set is in R available as example_isolates, after you load the AMR package.

r download_txt("example_isolates")

Example content

example_isolates %>%
  print_df()

example_isolates_unclean: Example Data for Practice

r structure_txt(example_isolates_unclean)

This data set is in R available as example_isolates_unclean, after you load the AMR package.

r download_txt("example_isolates_unclean")

Example content

example_isolates_unclean %>%
  print_df()

microorganisms.codes: Common Laboratory Codes

r structure_txt(microorganisms.codes)

This data set is in R available as microorganisms.codes, after you load the AMR package.

r download_txt("microorganisms.codes")

Example content

microorganisms.codes %>%
  print_df()

antivirals: Antiviral Drugs

r structure_txt(antivirals)

This data set is in R available as antivirals, after you load the AMR package.

r download_txt("antivirals")

The tab-separated text, Microsoft Excel, SPSS, and Stata files all contain the trade names and LOINC codes as comma separated values.

Example content

antivirals %>%
  print_df()


msberends/AMR documentation built on June 14, 2025, 7:58 a.m.