R/addNeedsCompilationToSD.R

cran <- tools::CRAN_package_db()

library(data.table)

setDT(cran)
setkey(cran, "Package")

setDT(sd)
setkey(sd, "package")

nativeornot <- cran[,.(Package,NeedsCompilation)][sd[,.(package)]]

## this is of course the same as 'dead' package but we cover it here as a side effect of 'do not know compilation or not'
nativeornot[is.na(NeedsCompilation)==TRUE, .(Package)]
setkey(nativeornot, "Package")

berries <- readRDS("inst/data/berries.rds")
berries <- berries[newest==TRUE]               # only need one each for now
setkey(berries, pkg)

## all dead packages for which the local cranberries pool does NOT have a file -- now empty on pass two
berries[nativeornot[is.na(NeedsCompilation)==TRUE, .(Package)], .(pkg, file)][is.na(file)==TRUE]

## all dead packages for which we do!
d <- berries[nativeornot[is.na(NeedsCompilation)==TRUE, .(Package)], .(pkg, file)][is.na(file)==FALSE]

hasSourceOrNot <- function(file) {
  cmd <- paste0("tar tvaf ", file, "| grep \"/src/$\"")
  res <- readLines(pipe(cmd))
  isTRUE(nchar(res) > 1)
}

## compute 'has source directory or not' by looking at the tarfiles (for the 90%-ish of the files we need it for )
d[, hasSrc := hasSourceOrNot(file), by=file]

## merge into a temp data.table
td  <- d[nativeornot, .(pkg, hasSrc, NeedsCompilation)]

## convert NeedsCompilation into proper boolean
td[is.na(NeedsCompilation)!=TRUE, nc:=(NeedsCompilation=="yes")]

## now we have 2475 obs, 'nc' derived from CRAN_package_db() by 2475 NAs for old packages
## ow carry over hasSrc boolean for the rows that were 'nc' is NA -- now only 211 NAs left
td[is.na(nc)==TRUE, nc:=hasSrc]

## and merge into sd creating sd2
sd2 <- sd[td[,.(pkg, nc)]]

saveRDS(sd2, "inst/data/sd2.rds")



                  
barryrowlingson/cransurv documentation built on Feb. 6, 2020, 4:41 a.m.