# Run by package maintainer via aliases revdepsh and revdepr in .dev/.bash_aliases. See
# that file for comments.
# revdep = reverse first-order dependency; i.e. the CRAN and Bioconductor packages which directly use data.table
Sys.unsetenv("R_PROFILE_USER")
# The alias sets R_PROFILE_USER so that this script runs on R starting up, and leaves the R prompt running.
# But if we don't unset it now, anything else from now on that does something like system("R CMD INSTALL"), e.g. update.packages()
# and BiocManager::install(), will call this script again recursively.
# options copied from .dev/.Rprofile that aren't run due to the way this script is started via a profile
options(help_type="html")
options(error=quote(utils::dump.frames()))
options(width=200) # for cran() output not to wrap
# Check that env variables have been set correctly:
# export R_LIBS_SITE=NULL # R 4.2.0 changed to NULL but it doesn't appear to work
# export R_LIBS=~/build/revdeplib/
# export _R_CHECK_FORCE_SUGGESTS_=true
if (length(.libPaths())==3L) .libPaths(.libPaths()[-2L], include.site=FALSE) # workaround as I couldn't get R_LIBS_SITE=NULL to be effective
stopifnot(identical(length(.libPaths()), 2L)) # revdeplib writeable by me, and the pre-installed recommended R library (sudo writeable)
stopifnot(identical(.libPaths()[1L], getwd()))
tt = file.info(.libPaths())[,"uname"]
stopifnot(identical(length(tt), 2L))
stopifnot(tt[1L]==Sys.info()["user"])
if (grepl("devel", .libPaths()[2L])) {
stopifnot(tt[2L]==Sys.info()["user"])
R = "~/build/R-devel/bin/R" # would use Rdevel alias but the bash alias doesn't work from system()
} else {
stopifnot(tt[2L]=="root")
R = "R" # R-release
}
if (length(dup_pkgs <- intersect(dir(.libPaths()[1]), dir(.libPaths()[2])))) {
stop("Package(s) installed in both paths: ", paste(dup_pkgs, collapse=" "))
# S4 issues are frustrating as it seems that dependencies need to be reinstalled to reset the nobody-knows-how-it-works S4 cache?
# For example, to fix prioritizr's error in its examples :
# error in evaluating the argument 'x' in selecting a method for function 'print':
# error in evaluating the argument 'y' in selecting a method for function 'intersecting_units':
# package slot missing from signature for generic ‘raster’
# I reinstalled raster. But that didn't help. Then raster's depend sp but that didn't help. Then sp followed by raster
# again but that didn't help. Then all dependencies but that didn't help. Then lattice too but that didn't help.
# Then after a few hours, I stumbled on the realization when I reinstalled lattice it got installed in revdeplib rather
# than R's base library where the recommended packages are; lattice was in two places on .libPaths(). Removing it from
# revdeplib didn't help. But reinstalling lattice (using sudo R) in /usr/lib/R/library did appear to finally fix prioritizr.
# Why it was lattice that needed to be reinstalled just beats me, and why do S4 packages need to be reinstalled anyway?
}
stopifnot(identical(Sys.getenv("_R_CHECK_FORCE_SUGGESTS_"),"true"))
# _R_CHECK_FORCE_SUGGESTS_=true explicitly in .dev/.bash_aliases
# All suggests should be installed for revdep checking. This avoids problems for some packages for which the attempt to run
# run R CMD check without all suggests can fail due to changed behaviour when some of the suggests aren't available;
# e.g. https://github.com/reimandlab/ActivePathways/issues/14
cflags = system("grep \"^[^#]*CFLAGS\" ~/.R/Makevars", intern=TRUE)
cat("~/.R/Makevars contains", cflags)
if (!grepl("^CFLAGS=-O[0-3] *$", cflags)) {
stop("Some packages have failed to install in the past (e.g. processx and RGtk2) when CFLAGS contains -pedandic, -Wall, and similar. ",
"So for revdepr keep CFLAGS simple; i.e. -O[0-3] only. Check ~/.R/Makevars.")
} else {
cat(" ok\n")
}
options(repos = c("CRAN"=c("http://cloud.r-project.org")))
options(repos = BiocManager::repositories())
# Some CRAN packages import Bioc packages; e.g. wilson imports DESeq2. So we need to install DESeq2 from Bioc.
# BiocManager::repositories() includes CRAN in its result (it appends to getOption("repos"). Using the Bioc function
# ensures the latest Bioc version is in the repo path here (their repos have the version number in the path).
options(warn=1) # warning at the time so we can more easily see what's going on package by package when we scroll through output
cat("options()$timeout==", options()$timeout," set by R_DEFAULT_INTERNET_TIMEOUT in .dev/.bash_aliases revdepsh\n",sep="")
# R's default is 60. Before Dec 2020, we used 300 but that wasn't enough to download Bioc package BSgenome.Hsapiens.UCSC.hg19 (677GB) which is
# suggested by CRAN package CNVScope which imports data.table. From Dec 2020 we use 3600.
if (is.null(utils::old.packages(.libPaths()[2]))) {
cat("All", length(dir(.libPaths()[2])), "recommended packages supplied with R in", .libPaths()[2], "are the latest version\n")
} else {
cat("Some recommended packages supplied with R need to be updated ...\n")
system(paste0(if(R=="R")"sudo ", R, " -e \"utils::update.packages('",.libPaths()[2],"', ask=TRUE, checkBuilt=TRUE)\""))
# old.packages was called first, to avoid entering password for sudo if, as is most often the case, all recommended packages are already to date
}
require(utils) # only base is loaded when R_PROFILE_USER runs
update.packages(ask=FALSE, checkBuilt=TRUE)
# if package not found on mirror, try manually a different one:
# install.packages("<pkg>", repos="http://cran.stat.ucla.edu/")
# update.packages(ask=FALSE) # a repeat sometimes does more, keep repeating until none
# Follow: https://bioconductor.org/install
# Ensure no library() call in .Rprofile, such as library(bit64)
# As from October 2020, Matt no longer checks Bioconductor revdeps. After many years of trying, and repeated
# emails to Bioconductor maintainers, there were still too many issues not fixed for too long. The packages
# are big in size and have many warnings which make it hard to find the true problems. The way the Bioc
# devel and release repositories are set up require more work and confuses communication. That doesn't need
# to be done in the better and simpler way that CRAN is setup.
# require(BiocManager)
# BiocManager::install(ask=FALSE, version="devel", checkBuilt=TRUE)
# BiocManager::valid()
avail = available.packages() # includes CRAN and Bioc, from getOption("repos") set above
avail = avail[!rownames(avail) %in% c("cplexAPI","Rcplex"), ]
# cplexAPI is suggested by revdeps ivmte and prioritizr. I haven't succeeded to install IBM ILOG CPLEX which requires a license,
# so consider cplexAPI not available when resolving missing suggests at the end of status().
# Update: cplexAPI was removed from CRAN on 5 Nov 2021 so this is now redundant, but leave it in place for future use.
# Update: Rcplex is on CRAN as of 20 Nov 2022 but with install errors, therefore treat it as not available.
# The presence of packages here in revdeplib which no longer exist on CRAN could explain differences to CRAN. A revdep
# could be running tests using that package when available and failing which may be the very reason that package was removed from CRAN.
# When it is removed from revdeplib to match CRAN, then the revdep might then pass as it will skip its tests using that package.
x = installed.packages()
tt = match(rownames(x), rownames(avail))
removed = rownames(x)[is.na(tt) & is.na(x[,"Priority"])]
cat("Removing",length(removed),"packages which are no longer available on CRAN/Bioc:", paste(removed, collapse=","), "\n")
stopifnot(all(x[removed,"LibPath"] == .libPaths()[1]))
oldn = nrow(x)
remove.packages(removed, .libPaths()[1])
x = installed.packages()
stopifnot(nrow(x) == oldn-length(removed))
# Ensure all installed packages were built with this x.y release of R; i.e. that checkBuilt=TRUE worked above
cat("This is R ",R.version$major,".",R.version$minor,"; ",R.version.string,"\n",sep="")
cat("Previously installed packages were built using:\n")
print(tt <- table(x[,"Built"], dnn=NULL))
minorR = paste(strsplit(as.character(getRversion()), split="[.]")[[1]][c(1,2)], collapse=".")
if (any(w<-names(tt)<minorR)) {
stop(sum(tt[w])," packages built with R<",minorR," failed to update: ", paste(paste0("'",rownames(x)[x[,"Built"]<minorR],"'"), collapse=","), "\n",sep="")
}
deps = tools::package_dependencies("data.table",
db = available.packages(repos=getOption("repos")["CRAN"]), # just CRAN revdeps though (not Bioc) from October 2020
which="all", reverse=TRUE, recursive=FALSE)[[1]]
# exclude = c("TCGAbiolinks") # too long (>30mins): https://github.com/BioinformaticsFMRP/TCGAbiolinks/issues/240
# deps = deps[-match(exclude, deps)]
table(avail[deps,"Repository"], dnn=NULL)
old = 0
new = 0
if (basename(.libPaths()[1]) != "revdeplib") stop("Must start R with exports as above")
for (p in deps) {
fn = paste0(p, "_", avail[p,"Version"], ".tar.gz")
if (!file.exists(fn) ||
identical(tryCatch(packageVersion(p), error=function(e)FALSE), FALSE) ||
packageVersion(p) != avail[p,"Version"]) {
cat("\n**** Installing revdep:", p, "\n")
system(paste0("rm -rf ", p, ".Rcheck")) # Remove last check (of previous version) to move its status() to not yet run
install.packages(p, dependencies=TRUE)
# To install its dependencies. The package itsef is installed superfluously here because the tar.gz will be passed to R CMD check.
# If we did download.packages() first and then passed that tar.gz to install.packages(), repos= is set to NULL when installing from
# local file, so dependencies=TRUE wouldn't know where to get the dependencies. Hence usig install.packages first with repos= set.
download.packages(p, destdir="~/build/revdeplib", contriburl=avail[p,"Repository"]) # So R CMD check can run on these
if (!file.exists(fn)) stop("Still does not exist!:", fn)
new = new+1
} else {
old = old+1
}
}
cat("New downloaded:",new," Already had latest:", old, " TOTAL:", length(deps), "\n")
# Remove the tar.gz no longer needed :
for (p in deps) {
f = paste0(p, "_", avail[p,"Version"], ".tar.gz") # keep this one
all = system(paste0("ls ",p,"_*.tar.gz"), intern=TRUE)
old = all[all!=f]
for (i in old) {
cat("Removing",i,"because",f,"is newer\n")
system(paste0("rm ",i))
}
}
all = system("ls *.tar.gz", intern=TRUE)
all = sapply(strsplit(all, split="_"),'[',1)
for (i in all[!all %in% deps]) {
cat("Removing",i,"because it", if (!i %in% rownames(avail)) "has been removed from CRAN\n" else "no longer uses data.table\n")
system(paste0("rm ",i,"_*.tar.gz"))
}
num_tar.gz = as.integer(system("ls *.tar.gz | wc -l", intern=TRUE))
if (length(deps) != num_tar.gz) stop("num_tar.gz==",num_tar.gz," but length(deps)==",length(deps))
status0 = function(bioc=FALSE) {
deps = deps[grep("bioc", avail[deps,"Repository"], invert=!bioc)]
x = unlist(sapply(deps, function(x) {
fn = paste0("./",x,".Rcheck/00check.log")
if (file.exists(fn)) {
v = suppressWarnings(system(paste0("grep 'Status:' ",fn), intern=TRUE))
if (!length(v)) return("RUNNING")
return(substr(v, 9L, nchar(v)))
}
if (file.exists(paste0("./",x,".Rcheck"))) return("RUNNING")
return("NOT STARTED")
}))
e = grep("ERROR",x)
w = setdiff( grep("WARNING",x), e)
n = setdiff( grep("NOTE",x), c(e,w) )
ok = setdiff( grep("OK",x), c(e,w,n) )
r = grep("RUNNING",x)
ns = grep("NOT STARTED", x)
cat(" ERROR :",sprintf("%4d",length(e)),":",paste(sort(names(x)[e])),"\n",
"WARNING :",sprintf("%4d",length(w)),":",paste(sort(names(x)[w])),"\n",
"NOTE :",sprintf("%4d",length(n)),"\n", #":",paste(sort(names(x)[n])),"\n",
"OK :",sprintf("%4d",length(ok)),"\n",
"TOTAL :",sprintf("%4d",length(e)+length(w)+length(n)+length(ok)),"/",length(deps),"\n",
if (length(r)) paste0("RUNNING : ",paste(sort(names(x)[r]),collapse=" "),"\n"),
if (length(ns)) paste0("NOT STARTED : ",paste(sort(names(x)[head(ns,20)]),collapse=" "), if(length(ns)>20)paste(" +",length(ns)-20,"more"), "\n"),
"\n"
)
assign(if (bioc) ".fail.bioc" else ".fail.cran", c(sort(names(x)[e]), sort(names(x)[w])), envir=.GlobalEnv)
assign(if (bioc) ".running.bioc" else ".running.cran", sort(names(x)[r]), envir=.GlobalEnv)
# if parallel finished then 'running' means killed; we want to see if status on CRAN (using cran()) shows FAIL with a log showing kill signal (or similar) due to taking too long
invisible()
}
status = function(bioc=FALSE) {
cat("\nInstalled data.table to be tested against:",
as.character(packageVersion("data.table")),
format(as.POSIXct(packageDescription("data.table")$Packaged, tz="UTC"), tz=""), # local time
"\n\nCRAN:\n")
status0()
if (bioc) {
cat("BIOC:\n"); status0(bioc=TRUE)
cat("TOTAL :", length(deps), "\n\n")
}
cat("Oldest 00check.log (to check no old stale ones somehow missed):\n")
system("find . -name '00check.log' | xargs ls -lt | tail -1")
cat("\n")
tt = length(system('ps -aux | grep "parallel.*R.* CMD check"', intern=TRUE))>2L
cat("parallel R CMD check is ", if(tt)"" else "not ", "running\n",sep="")
if (file.exists("/tmp/started.flag")) {
# system("ls -lrt /tmp/*.flag")
tt = as.POSIXct(file.info(c("/tmp/started.flag","/tmp/finished.flag"))$ctime)
if (is.na(tt[2])) { tt[2] = Sys.time(); cat("Has been running for "); }
else cat("Ran for ");
cat(round(diff(as.numeric(tt))/60, 1), "mins\n")
}
# Now deal with Suggests that are not available. Could have been removed from CRAN/Bioc, or are not installing for some reason like system library not installed.
tt = system("find . -name '00check.log' -exec grep -zl 'ERROR.Packages* suggested but not available' {} \\;", intern=TRUE)
if (length(tt)) {
tt = sort(substring(tt, 3L, nchar(tt)-nchar(".Rcheck/00check.log")))
installed = installed.packages()
all_sugg_unavail = c()
for (pkg in tt) {
sugg = strsplit(gsub("\n","",avail[pkg,"Suggests"]), split=",")[[1L]]
sugg = gsub("^ ","",sugg)
sugg = gsub(" [(].+[)]","",sugg)
miss = sugg[!sugg %in% rownames(installed)]
cat("\n",pkg,sep="")
if (!length(miss)) {
cat(" 00check.log states that some of its suggests are not installed, but they all appear to be. Inspect and rerun.\n")
next
}
cat(" is missing",paste(miss,collapse=","))
if (any(tt <- miss %in% rownames(avail))) {
cat("; some are available, installing ...\n")
install.packages(miss[which(tt)]) # careful not to ask for unavailable packages here, to avoid the warnings we already know they aren't available
} else {
cat("; all unavailable on CRAN/Bioc\n")
all_sugg_unavail = c(all_sugg_unavail, pkg)
}
}
if (length(all_sugg_unavail)) {
cat('\nPackages for which all their missing suggests are not available (',length(all_sugg_unavail),'): ', paste(all_sugg_unavail, collapse=" "), "\n", sep="")
cat('Rerunning them with R_CHECK_FORCE_SUGGESTS=FALSE ...\n')
run(all_sugg_unavail, R_CHECK_FORCE_SUGGESTS=FALSE, ask=FALSE)
# the main run() ran with TRUE in an attempt to check suggests where possible in case data.table usage is there. It does that as OS level using
# parallel linux command of `R CMD check`. Therefore it would be awkward to rerun with TRUE in that step. Instead we trigger the rerun from
# here in status() afterwards once we know all such packages and can count and log them (the cat() just above). Since run() is
# asynchronous we do have to wait again and run status() again when perfbar shows finished. However, currently there are only
# 37/1315 like this so it only takes a few minutes with my default of 6 at a time.
return(TRUE) # to indicate status() started a run() so that the run() afterwards can be avoided which would otherwise see these as not started and run them simultaneously but with _SUGGESTS=TRUE
}
# Otherwise, inspect manually each result in fail.log written by log()
}
invisible(FALSE)
}
cran = function() # reports CRAN status of the .fail.cran packages
{
x = c(.fail.cran, .running.cran)
if (!length(x)) {
cat("No CRAN revdeps in error, warning or running status\n")
return(invisible())
}
require(data.table)
p = proc.time()
db <<- setDT(tools::CRAN_check_results())
cat("tools::CRAN_check_results() returned",prettyNum(nrow(db), big.mark=","),"rows in",timetaken(p),"\n")
ans = db[Package %chin% x,
.(ERROR=sum(Status=="ERROR", na.rm=TRUE),
WARN =sum(Status=="WARN", na.rm=TRUE),
cran =paste(unique(Version),collapse=";"),
local=as.character(tryCatch(packageVersion(.BY[[1]]), error=function(e)"error"))),
keyby=Package]
ans[local==cran, c("cran","local"):=""]
ans[, "right_click_in_bash":=paste0("https://cran.r-project.org/web/checks/check_results_",Package,".html")]
setkey(ans, Package)[x,]
}
run = function(pkgs=NULL, R_CHECK_FORCE_SUGGESTS=TRUE, choose=NULL, ask=TRUE) {
if (length(pkgs)==1) pkgs = strsplit(pkgs, split="[, ]")[[1]]
if (anyDuplicated(pkgs)) stop("pkgs contains dups")
if (!length(pkgs)) {
opts = c("not.started","cran.fail","bioc.fail","both.fail","rerun.cran","rerun.bioc","rerun.all")
w = if (is.null(choose)) {
cat(paste0(1:length(opts),": ",opts) , sep="\n")
suppressWarnings(as.integer(readline("Enter option: ")))
} else choose
if (is.na(w) || !w %in% seq_along(opts)) stop(w," is invalid")
which = opts[w]
numtgz = as.integer(system("ls -1 *.tar.gz | wc -l", intern=TRUE))
stopifnot(numtgz==length(deps))
if (which=="rerun.all") {
pkgs = "__ALL__"
cmd = "rm -rf *.Rcheck"
cat("WIPE ALL CHECK RESULTS:",cmd,"\n")
cat("Proceed? (ctrl-c or enter)\n")
scan(quiet=TRUE)
system(cmd)
} else if (which=="rerun.cran") {
pkgs = deps[ !grepl("bioconductor", avail[deps,"Repository"]) ]
} else if (which=="rerun.bioc") {
pkgs = deps[ grepl("bioconductor", avail[deps,"Repository"]) ]
} else {
pkgs = NULL
if (which=="not.started") pkgs = deps[!file.exists(paste0("./",deps,".Rcheck"))] # those that haven't run
if (which %in% c("cran.fail","both.fail")) pkgs = union(pkgs, .fail.cran) # .fail.* were written to .GlobalEnv by status0()
if (which %in% c("bioc.fail","both.fail")) pkgs = union(pkgs, .fail.bioc)
}
}
if (length(pkgs)==0) { cat("No packages to run\n"); return(invisible()); }
if (identical(pkgs,"__ALL__")) {
cat("Running all",length(deps),"packages\n")
filter = ""
} else {
cat("Running",length(pkgs),"packages:", paste(pkgs), "\n")
filter = paste0("| grep -E '", paste0(paste0(pkgs,"_"),collapse="|"), "' ")
}
if (ask && is.null(choose)) {
cat("Proceed? (ctrl-c or enter)\n")
scan(quiet=TRUE)
}
if (!identical(pkgs,"_ALL_")) for (i in pkgs) system(paste0("rm -rf ./",i,".Rcheck"))
SUGG = paste0("_R_CHECK_FORCE_SUGGESTS_=",tolower(R_CHECK_FORCE_SUGGESTS))
cmd = paste0("ls -1 *.tar.gz ", filter, "| TZ='UTC' OMP_THREAD_LIMIT=2 ",SUGG," parallel --max-procs 50% --timeout 1200 ",R," CMD check")
# TZ='UTC' because some packages have failed locally for me but not on CRAN or for their maintainer, due to sensitivity of tests to timezone
if (as.integer(system("ps -e | grep perfbar | wc -l", intern=TRUE)) < 1) system("perfbar",wait=FALSE)
system("touch /tmp/started.flag ; rm -f /tmp/finished.flag")
system(paste("((",cmd,">/dev/null 2>&1); touch /tmp/finished.flag)"), wait=FALSE)
}
inst = function() {
last = system("ls -t ~/GitHub/data.table/data.table_*.tar.gz", intern=TRUE)[1L] # latest timestamp should be dev version
cat("Installing",last,"...\n")
system(paste0(R," CMD INSTALL ",last))
}
log = function(bioc=FALSE, fnam="~/fail.log", app="gedit") {
x = c(.fail.cran, if (bioc) .fail.bioc)
cat("Writing 00check.log for",length(x),"packages to",fnam,":\n")
cat(paste(x,collapse=" "), "\n")
require(BiocManager) # to ensure Bioc version is included in attached packages sessionInfo. It includes the minor version this way; e.g. 1.30.4
cat(capture.output(sessionInfo()), "\n", file=fnam, sep="\n")
for (i in x) {
system(paste0("ls | grep '",i,"_.*tar.gz' >> ",fnam))
if (bioc && i %in% .fail.bioc) {
# for Bioconductor only, now include the git commit and date. Although Bioc dev check status online may show OK :
# https://bioconductor.org/checkResults/devel/bioc-LATEST/
# the Bioc package maintainer has to remember to bump the version number otherwise Bioc will not propogate it,
# and BiocManager::install(version="devel") will not update to the latest version, even though the Bioc dev
# status online does update. For packages which fail/warn locally but state OK on Bioc check page, compare the
# output here to the commit hash and date displayed by Bioc-dev check results. If there's a mismatch, contact
# the maintainer and ask them to bump their version number.
M = as.matrix(unclass(packageDescription(i)))
mode(M) = "character"
out = capture.output(print(M[grep("commit|Date|Packaged",rownames(M),ignore.case=TRUE),,drop=FALSE]))
cat(out[-1], sep="\n", file=fnam, append=TRUE)
}
system(paste0("grep -H . ./",i,".Rcheck/00check.log >> ",fnam)) # the fail messages
cat("\n\n", file=fnam, append=TRUE)
}
system(paste(app, fnam), wait=FALSE)
invisible()
}
inst()
if (!status()) run(choose=1) # run pkgs in not-started status; i.e. updates to and new revdeps. Unless status() found any all-suggests-unavail which it then started run() for with _SUGGESTS=FALSE
# Now R prompt is ready to fix any problems with CRAN or Bioconductor updates.
# Then run run(), status() and log() as per section in CRAN_Release.cmd
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.