R/berries.R

## this analysis borrows heavily on a short talk I once gave 
## entitled 'R for System Administration' which incidentally
## also used this data (to show how to find dupes)

cranberriesDir <- "~/cranberries/sources"       # a near-complete mirror of CRAN

files <- list.files(cranberriesDir, pattern="*.tar.gz", full.names=TRUE)
berries <- data.table(file=files, file.info(files))
berries[, baseNM := basename(file)][
  ,nameNE := gsub(".tar.gz$", "", baseNM)][
  ,pkg := gsub("(.*)_.*$", "\\1", nameNE)][
  ,ver := gsub(".*_(.*)$", "\\1", nameNE)]        

setkeyv(berries, c("pkg", "mtime"))

berries[, newest := (ver == last(ver)), by=pkg]
berries[newest==TRUE, .(pkg, ver, file)][1:10]

saveRDS(berries, file="inst/data/berries.rds")
barryrowlingson/cransurv documentation built on Feb. 6, 2020, 4:41 a.m.