R/0_0_text_install.R

Defines functions is_linux is_osx is_unix is_windows textrpp_uninstall textrpp_install_virtualenv python_unix_binary process_textrpp_installation_virtualenv process_textrpp_installation_conda textrpp_install

Documented in textrpp_install textrpp_install_virtualenv textrpp_uninstall

# copied and modified from tensorflow::install.R, https://github.com/rstudio/tensorflow/blob/master/R/install.R
# and https://github.com/quanteda/spacyr/tree/master/R

conda_args <- reticulate:::conda_args

#' Install text required python packages in conda or virtualenv environment
#'
#' @description Install text required python packages (rpp) in a self-contained environment.
#' For macOS and Linux-based systems, this will also install Python itself via a "miniconda" environment, for
#'   \code{textrpp_install}.  Alternatively, an existing conda installation may be
#'   used, by specifying its path.  The default setting of \code{"auto"} will
#'   locate and use an existing installation automatically, or download and
#'   install one if none exists.
#'
#'   For Windows, automatic installation of miniconda installation is not currently
#'   available, so the user will need to install
#'   \href{https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html}{miniconda
#'    (or Anaconda) manually}.
#' @param conda character; path to conda executable. Default "auto" which
#'   automatically find the path
#' @param update_conda Boolean; update to the latest version of Miniconda after install?
#' (should be combined with force_conda = TRUE)
#' @param force_conda Boolean; force re-installation if Miniconda is already installed at the requested path?
#' @param pip \code{TRUE} to use pip for installing rpp If \code{FALSE}, conda
#' package manager with conda-forge channel will be used for installing rpp.
#' @param rpp_version character; default is "rpp_version_system_specific_defaults", because diffent systems require
#' different combinations of python version and packages. It is also possible to
#' specify your own, such as c("torch==2.0.0", "transformers==4.19.2", "numpy", "pandas", "nltk", "scikit-learn",
#' "datasets", "evaluate").
#' @param python_version character; default is "python_version_system_specific_defaults". You can specify your
#' Python version for the condaenv yourself.
#'   installation.
#' @param python_path character; path to Python only for virtualenvironment installation
#' @param bin character; e.g., "python", only for virtualenvironment installation
#' @param envname character; name of the conda-environment to install text required python packages.
#'   Default is "textrpp_condaenv".
#' @param prompt logical; ask whether to proceed during the installation
#' @examples
#' \dontrun{
#' # install text required python packages in a miniconda environment (macOS and Linux)
#' textrpp_install(prompt = FALSE)
#'
#' # install text required python packages to an existing conda environment
#' textrpp_install(conda = "~/anaconda/bin/")
#' }
#' @export
textrpp_install <- function(conda = "auto",
                            update_conda = FALSE,
                            force_conda = FALSE,
                            rpp_version = "rpp_version_system_specific_defaults",
                            python_version = "python_version_system_specific_defaults",
                            envname = "textrpp_condaenv",
                            pip = TRUE,
                            python_path = NULL,
                            prompt = TRUE) {
  # Set system specific default versions
  if (rpp_version[[1]] == "rpp_version_system_specific_defaults") {
    if (is_osx() || is_linux()) {
      rpp_version <- c(
        "torch==2.2.0",
        "transformers==4.38.0",
        "huggingface_hub==0.20.0",
        "numpy==1.26.0",
        "pandas==2.0.3",
        "nltk==3.8.1",
        "scikit-learn==1.3.0", # higher versions 1.4 and 1.5 yield errors in textTopics() and warnings in textTrain
        "datasets==2.16.1",
        "evaluate==0.4.0",
        "accelerate==0.26.0",
        "bertopic==0.16.3",
        "jsonschema==4.19.2",
        "sentence-transformers==2.2.2",
        "flair==0.13.0",
        "umap-learn==0.5.6",
        "hdbscan==0.8.33",
        "scipy==1.10.1",
        "aiohappyeyeballs==2.4.4"
      )
    }
    if (is_windows()) {
      rpp_version <- c(
        "torch==2.2.0",
        "transformers==4.38.0",
        "huggingface_hub==0.20.0",
        "numpy==1.26.0",
        "pandas==2.0.3",
        "nltk==3.8.1",
        "scikit-learn==1.3.0",
        "datasets==2.16.1",
        "evaluate==0.4.0",
        "accelerate==0.26.0",
        "bertopic==0.16.3",
        "jsonschema==4.19.2",
        "sentence-transformers==2.2.2",
        "flair==0.13.0",
        "umap-learn==0.5.6",
        "hdbscan==0.8.33",
        "scipy==1.10.1",
        "aiohappyeyeballs==2.4.4"
      )
    }
  }

  if (python_version == "python_version_system_specific_defaults") {
    if (is_osx() || is_linux()) {
      python_version <- "3.9.0"
    }

    if (is_windows()) {
      python_version <- "3.9.0"
    }
  }

  # verify os
  if (!is_windows() && !is_osx() && !is_linux()) {
    stop("This function is available only for Windows, Mac, and Linux")
  }

  # verify 64-bit
  if (.Machine$sizeof.pointer != 8) {
    stop(
      "Unable to install the text-package on this platform.",
      "Binary installation is only available for 64-bit platforms."
    )
  }

  # install rust for singularity machine -- but it gives error in github action
  # reticulate::py_run_string("import os\nos.system(\"curl --proto '=https' --tlsv1.2 -sSf
  # https://sh.rustup.rs | sh -s -- -y\")")
  system("curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y")

  # resolve and look for conda help(conda_binary)
  conda <- tryCatch(reticulate::conda_binary(conda), error = function(e) NULL)
  have_conda <- !is.null(conda)

  # Mac and linux
  if (is_unix()) {
    # check for explicit conda method
    # validate that we have conda
    if (!have_conda) {
      message("No conda was found in the system. ")
      if (prompt) {
        ans <- utils::menu(c("No", "Yes"), title = "Do you want Text to download
                           miniconda using reticulate::install_miniconda()?")
      } else {
        ans <- 2 # When no prompt is set to false, default to install miniconda.
      }
      if (ans == 2) {
        reticulate::install_miniconda(update = update_conda)
        conda <- tryCatch(reticulate::conda_binary("auto"), error = function(e) NULL)
      } else {
        stop("Conda environment installation failed (no conda binary found)\n", call. = FALSE)
      }
    }

    # Update mini_conda
    if (update_conda && force_conda || force_conda) {
      reticulate::install_miniconda(update = update_conda, force = force_conda)
    }

    # process the installation of text required python packages
    process_textrpp_installation_conda(conda,
      rpp_version,
      python_version,
      prompt,
      envname = envname,
      pip = pip
    )

    # Windows installation
  } else {
    # determine whether we have system python help(py_versions_windows)
    if (python_version == "find_python") {
      python_versions <- reticulate::py_versions_windows()
      python_versions <- python_versions[python_versions$type == "PythonCore", ]
      python_versions <- python_versions[python_versions$version %in% c("3.5", "3.6", "3.7", "3.8", "3.9"), ]
      python_versions <- python_versions[python_versions$arch == "x64", ]
      have_system <- nrow(python_versions) > 0

      if (have_system) {
        # Well this isn't used later
        python_version <- python_versions[1, ]
      }
    }

    # validate that we have conda:
    if (!have_conda) {
      # OK adds help(install_miniconda)
      reticulate::install_miniconda(update = update_conda)
      conda <- tryCatch(reticulate::conda_binary("auto"), error = function(e) NULL)
    }
    # Update mini_conda
    if (have_conda && update_conda || have_conda && force_conda) {
      reticulate::install_miniconda(update = update_conda, force = force_conda)
    }
    # process the installation of text required python packages
    process_textrpp_installation_conda(conda,
      rpp_version,
      python_version,
      prompt,
      envname = envname,
      pip = pip
    )
  }

  message(colourise(
    "\nInstallation is completed.\n",
    fg = "blue", bg = NULL
  ))
  message(
    " ",
    sprintf("Condaenv: %s ", envname), "\n"
  )

  message(colourise(
    "Great work - do not forget to initialize the environment \nwith textrpp_initialize().\n",
    fg = "green", bg = NULL
  ))
  invisible(NULL)
}

process_textrpp_installation_conda <- function(conda,
                                               rpp_version,
                                               python_version,
                                               prompt = TRUE,
                                               envname = "textrpp_condaenv",
                                               pip = FALSE) {
  conda_envs <- reticulate::conda_list(conda = conda)
  if (prompt) {
    ans <- utils::menu(c("Confirm", "Cancel"), title = "Confirm that a new conda environment will be set up.")
    if (ans == 2) stop("condaenv setup is cancelled by user", call. = FALSE)
  }
  conda_env <- subset(conda_envs, conda_envs$name == envname)
  if (nrow(conda_env) == 1) {
    message(
      "Using existing conda environment ", envname, " for text installation\n.",
      "\ntext:",
      paste(rpp_version, collapse = ", "), "will be installed.  "
    )
  } else {
    message(
      "A new conda environment", paste0('"', envname, '"'), "will be created and \npython required packages:",
      paste(rpp_version, collapse = ", "), "will be installed.  "
    )
    message("Creating", envname, "conda environment for text installation...\n")
    python_packages <- ifelse(is.null(python_version), "python=3.9",
      sprintf("python=%s", python_version)
    )
    python <- reticulate::conda_create(envname, packages = python_packages, conda = conda)
  }

  message("Installing text required python packages...\n")
  packages <- rpp_version

  reticulate::conda_install(envname, packages, pip = pip, conda = conda)
}



process_textrpp_installation_virtualenv <- function(python = "/usr/local/bin/python3.9",
                                                    rpp_version,
                                                    pip_version,
                                                    envname = "textrpp_virtualenv",
                                                    prompt = TRUE) {
  libraries <- paste(rpp_version, collapse = ", ")
  message(sprintf(
    'A new virtual environment called "%s" will be created using "%s" \n and,
    the following text reuired python packages will be installed: \n "%s" \n \n',
    envname, python, libraries
  ))
  if (prompt) {
    ans <- utils::menu(c("No", "Yes"), title = "Proceed?")
    if (ans == 1) stop("Virtualenv setup is cancelled by user", call. = FALSE)
  }

  # Make python path help(virtualenv_create)
  reticulate::virtualenv_create(envname,
                                python,
                                pip_version = NULL,
                                required = TRUE)

  reticulate::use_virtualenv(envname, required = TRUE)

  #
  for (i in seq_len(length(rpp_version))) {
    reticulate::py_install(rpp_version[[i]], envname = envname, pip = TRUE)
  }

  message(colourise(
    "\nSuccess!\n",
    fg = "green", bg = NULL
  ))
}

# Check whether "bin"/something exists in the bin folder
# For example, bin = "pip3" bin = "python3.9" bin = ".virtualenv"
# And for example: file.exists("/usr/local/bin/.virtualenvs") /Users/oscarkjell/.virtualenvs
python_unix_binary <- function(bin) {
  locations <- file.path(c("/usr/local/bin", "/usr/bin"), bin)
  locations <- locations[file.exists(locations)]
  if (length(locations) > 0) {
    locations[[1]]
  } else {
    NULL
  }
}

#' @rdname textrpp_install
#' @description If you wish to install Python in a "virtualenv", use the
#'   \code{textrpp_install_virtualenv} function. It requires that you have a python version
#'   and path to it (such as "/usr/local/bin/python3.9" for Mac and Linux.).
#' @param pip_version character;
#' @examples
#' \dontrun{
#' # install text required python packages in a virtual environment
#' textrpp_install_virtualenv()
#' }
#' @export
textrpp_install_virtualenv <- function(rpp_version = c("torch==2.0.0",
                                                       "transformers==4.19.2",
                                                       "numpy",
                                                       "pandas",
                                                       "nltk"),
                                       python_path = NULL, # "/usr/local/bin/python3.9",
                                       pip_version = NULL,
                                       bin = "python3",
                                       envname = "textrpp_virtualenv",
                                       prompt = TRUE) {
  # find system python binary
  if (!is.null(python_path)) {
    python <- python_path
    } else {
      python <-  python_unix_binary(bin = bin)
    }


  if (is.null(python)) {
    stop("Unable to locate Python on this system.", call. = FALSE)
  }

  process_textrpp_installation_virtualenv(
    python = python,
    pip_version = pip_version,
    rpp_version = rpp_version,
    envname = envname,
    prompt = prompt
  )


  message(colourise(
    "\nInstallation is completed.\n",
    fg = "blue", bg = NULL
  ))
  invisible(NULL)
}


#' Uninstall textrpp conda environment
#'
#' Removes the conda environment created by textrpp_install()
#' @param conda path to conda executable, default to "auto" which automatically
#'   finds the path
#' @param prompt logical; ask whether to proceed during the installation
#' @param envname character; name of conda environment to remove
#' @export
textrpp_uninstall <- function(conda = "auto",
                              prompt = TRUE,
                              envname = "textrpp_condaenv") {
  conda <- tryCatch(reticulate::conda_binary(conda), error = function(e) NULL)
  have_conda <- !is.null(conda)

  if (!have_conda) {
    stop("Conda installation failed (no conda binary found)\n", call. = FALSE)
  }

  conda_envs <- reticulate::conda_list(conda = conda)
  conda_env <- subset(conda_envs, conda_envs$name == envname)
  if (nrow(conda_env) != 1) {
    stop("conda environment", envname, "is not found", call. = FALSE)
  }
  message("A conda environment", envname, "will be removed\n")
  ans <- ifelse(prompt, utils::menu(c("No", "Yes"), title = "Proceed?"), 2)
  if (ans == 1) stop("condaenv removal is cancelled by user", call. = FALSE)
  python <- reticulate::conda_remove(envname = envname)

  message("\nUninstallation complete.\n\n")

  invisible(NULL)
}

###### see utils.R in spacyr
# checking OS functions, thanks to r-tensorflow;

is_windows <- function() {
  identical(.Platform$OS.type, "windows")
}

is_unix <- function() {
  identical(.Platform$OS.type, "unix")
}

is_osx <- function() {
  Sys.info()["sysname"] == "Darwin"
}

is_linux <- function() {
  identical(tolower(Sys.info()[["sysname"]]), "linux")
}

#is_ubuntu <- function() {
#  if (is_unix() && file.exists("/etc/lsb-release")) {
#    lsbrelease <- readLines("/etc/lsb-release")
#    any(grepl("Ubuntu", lsbrelease))
#  } else {
#    FALSE
#  }
#}

#python_version_function <- function(python) {
#  # check for the version
#  result <- system2(python, "--version", stdout = TRUE, stderr = TRUE)
#
#  # check for error
#  error_status <- attr(result, "status")
#  if (!is.null(error_status)) {
#    stop("Error ", error_status, " occurred while checking for python version", call. = FALSE)
#  }
#
#  # parse out the major and minor version numbers
#  matches <- regexec("^[^ ]+\\s+(\\d+)\\.(\\d+).*$", result)
#  matches <- regmatches(result, matches)[[1]]
#  if (length(matches) != 3) {
#    stop("Unable to parse Python version '", result[[1]], "'", call. = FALSE)
#  }
#
#  # return as R numeric version
#  numeric_version(paste(matches[[2]], matches[[3]], sep = "."))
#}

#pip_get_version <- function(cmd, major_version) {
#  regex <- "^(\\S+)\\s?(.*)$"
#  cmd1 <- sub(regex, "\\1", cmd)
#  cmd2 <- sub(regex, "\\2", cmd)
#  oldw <- getOption("warn")
#  options(warn = -1)
#  result <- paste(system2(cmd1, cmd2, stdout = TRUE, stderr = TRUE),
#    collapse = " "
#  )
#  options(warn = oldw)
#  version_check_regex <- sprintf(".+(%s.\\d+\\.\\d+).+", major_version)
#  return(sub(version_check_regex, "\\1", result))
#}


#conda_get_version <- function(major_version = NA, conda, envname) {
#  condaenv_bin <- function(bin) path.expand(file.path(dirname(conda), bin))
#  cmd <- sprintf(
#    "%s%s %s && conda search torch -c conda-forge%s",
#    ifelse(is_windows(), "", ifelse(is_osx(), "source ", "/bin/bash -c \"source ")),
#    shQuote(path.expand(condaenv_bin("activate"))),
#    envname,
#    ifelse(is_windows(), "", ifelse(is_osx(), "", "\""))
#  )
#  regex <- "^(\\S+)\\s?(.*)$"
#  cmd1 <- sub(regex, "\\1", cmd)
#  cmd2 <- sub(regex, "\\2", cmd)
#
#  result <- system2(cmd1, cmd2, stdout = TRUE, stderr = TRUE)
#  result <- sub("\\S+\\s+(\\S+)\\s.+", "\\1", result)
#  if (!is.na(major_version)) {
#    result <- grep(paste0("^", major_version, "\\."), result, value = TRUE)
#  }
#  #
#  return(result[length(result)])
#}
OscarKjell/text documentation built on April 3, 2025, 3:07 p.m.