R/clean_pkg.R

Defines functions clean_pkg

Documented in clean_pkg

#' Cleans up illegal characters in packages generated by make_organismdbi(),
#' make_orgdb(), and make_txdb(). This attempts to fix some of the common
#' problems therein.
#'
#' The primary problem this function seeks to solve is derived from the fact
#' that some species names in the eupathdb contain characters which are not
#' allowed in orgdb/txdb/organismdbi instances.  Thus this invokes a couple of
#' regular expressions in an attempt to make sure these generated packages are
#' actually installable.
#'
#' One thing I should consider is to add some of this logic to my eupath queries
#' rather than perform these clunky modifications to the already-generated
#' packages.
#'
#' @param path  Location for the original Db/Dbi instance.
#' @param removal  String to remove from the instance.
#' @param replace  What to replace removal with, when necessary.
#' @param sqlite  Also modify the sqlite database?
#' @return  A hopefully cleaner OrgDb/TxDb/OrganismDbi sqlite package.
#' @export
clean_pkg <- function(path, removal="-like", replace="", sqlite=TRUE) {
  ## This is because TxDb creation fails if you have an author like 'abelew <abelew@gmail.com>'
  ##at_cmd <- paste0("sed -i 's/ at /\\@/g' ", path, "/DESCRIPTION")
  basedir <- dirname(path)
  dir <- basename(path)
  full_path <- file.path(basedir, dir)
  ## at_cmd <- paste0("perl -p -i -e 's/ at /\\@/g' ", full_path, "/DESCRIPTION")
  at_cmd <- glue::glue("perl -p -i -e 's/ at /\\@/g' {full_path}/DESCRIPTION")
  system(command = at_cmd)
  ## Since I changed @ to at I figured . could be dot too
  ## dot_cmd <- paste0("perl -p -i -e 's/ dot /\\./g' ", full_path, "/DESCRIPTION")
  dot_cmd <- glue::glue("perl -p -i -e 's/ dot /\\./g' {full_path}/DESCRIPTION")
  system(dot_cmd)

  new_dir <- dir
  new_path <- file.path(basedir, new_dir)
  if (grepl(pattern = removal, x = dir)) {
    ## Get rid of the -like in the path name
    new_dir <- gsub(pattern = removal, replacement = replace, x = dir)
    new_path <- file.path(basedir, new_dir)
    ## And rename the directory
    ## mv_cmd <- paste0("mv ", path, " ", new_path)
    mv_cmd <- glue::glue("mv {path} {new_path}")
    message("moving orgdb: ", mv_cmd)
    system(mv_cmd)
    ## Collect the text files in the new package and remove all -like instances in them
    ## find_cmd <- paste0("perl -p -i -e 's/",
    ##                    removal, "/", replace,
    ##                    "/g' $(find ", new_path,
    ##                    " -type f | grep -v 'sqlite' | grep -v 'zzz' | grep -v 'rda')")
    find_cmd <- glue::glue(
      "perl -p -i -e 's/{removal}/{replace}/g' \\
      $(find {new_path} -type f | grep -v 'sqlite' | grep -v 'zzz' | grep -v 'rda')")
    message("rewriting orgdb files: ", find_cmd)
    system(find_cmd)

    if (isTRUE(sqlite)) {
      ## Move the sqlite file, now the directory has been renamed.
      ## So when we go to move it we need to take that into account.
      old_sqlite_base <- gsub(pattern = ".db", replacement = "", x = dir)
      sqlite_basename <- basename(dir)
      sqlite_basename <- gsub(pattern = ".sqlite", replacement = "", x = sqlite_basename)
      old_sqlite_file <- file.path(new_dir, "inst", "extdata", glue::glue("{old_sqlite_base}.sqlite"))
      old_sqlite <- file.path(basedir, old_sqlite_file)
      new_sqlite_file <- gsub(pattern = removal, replacement = replace, x = old_sqlite_file)
      new_sqlite <- file.path(basedir, new_sqlite_file)
      ## sqlite_mv_cmd <- paste0("mv ", old_sqlite, " ", new_sqlite)
      sqlite_mv_cmd <- glue::glue("mv {old_sqlite} new_sqlite")
      message("moving sqlite file: ", sqlite_mv_cmd)
      system(sqlite_mv_cmd)
      ## orgdb_dir <- new_dir
      new_pkg_name <- gsub(pattern = removal, replacement = replace, x = sqlite_basename)
      ## Update the orgdb sqlite file to reflect the new name
      ## final_sqlite_cmd <- paste0("chmod +w ", new_sqlite, " ; sqlite3 ", new_sqlite,
      ##                            " \"UPDATE metadata SET value='", new_pkg_name,
      ##                            "' WHERE name='SPECIES';\" ; chmod -w ", new_sqlite)
      final_sqlite_cmd <- glue::glue(
        "chmod +w {new_sqlite}; sqlite3 {new_sqlite} \\
        \"UPDATE metadata SET value='{new_pkg_name}' WHERE name='SPECIES';\";\\
        chmod -w {new_sqlite}")
      message("rewriting sqlite db:", final_sqlite_cmd)
      system(final_sqlite_cmd)
    }
  }
  message("The cleaned orgdb should be located at: ", new_path, ".")
  return(new_path)
}
khughitt/EuPathDB documentation built on Nov. 4, 2023, 4:19 a.m.