## working from the cran sync log
getmeta <- function(cransync = "./Data/cran.csv"){
## get the CRAN sync listing:
## rsync -azn --out-format="%f,%l,%M" cran.r-project.org::CRAN/src/contrib . > Data/cran.csv
##
cran = data.table::fread(cransync, stringsAsFactors=FALSE)
names(cran)=c("path","size","time")
pc = process_cran
regexps = list(
live = "contrib/[A-Za-z][^/]*.tar.gz",
archive = "contrib/Archive/[^/]*/[A-Za-z][^/]*tar.gz",
last = "contrib/Archive/[^/]*$"
)
parts = lapply(regexps, function(regexp){
pc(cran[grep(regexp, cran$path),])
})
names(parts) = names(regexps)
## Some "live" packages are symlinks. No real package should be smaller than this, and
## all symlinks should be smaller.
parts$live = parts$live[parts$live$size > 120,]
return(parts)
}
process_cran <- function(cran){
## add path parts and fix timestamp
parts = parse_cran_file(cran$path)
cran$time = as.POSIXct(gsub("-"," ",cran$time))
cran$path = NULL
cbind(parts, cran)
}
parse_cran_file <- function(path){
## split path into package anv version
filename = gsub(".*/","",path)
package = gsub("_.*","",filename)
version = gsub(".*_","",filename)
version = gsub(".tar.gz","",version)
data.table::data.table(package=package, version=version, stringsAsFactors=FALSE)
}
tsort <- function(d){
d[order(d$time),]
}
cranstatus <- function(name, meta, asof=Sys.time()){
## return full history to time `asof`
## dead packages return a non-NA $death which should also be the last timepoint
## in the $history which is a repeat version of the last released version
## live packages return NA $death and have a repeat of the current version with the
## `asof` date.
##
## the `asof` date should be later than any version dates, typically it would be "now", as
## in "whats the state of packages as of now". Hence the Sys.time() default...
##
## this could be modified to truncate $history at `asof` to answer Q's like "what was
## the state of "splancs" as of 2010-01-01.
live = tsort(meta$live[meta$live$package==name,])
archive = tsort(meta$archive[meta$archive$package==name,])
last = tsort(meta$last[meta$last$package==name,])
if(nrow(live)==0){
history = rbind(archive, archive[nrow(archive),])
history$version[nrow(history)]=history$version[nrow(history)-1]
history$time[nrow(history)]=max(last$time)
death = max(last$time)
}else{
history = rbind(archive, live)
history = rbind(history, history[nrow(history),])
history$time[nrow(history)] = asof
death = NA
}
list(history=history, birth=min(history$time), death=death)
}
as_sd <- function(package, meta, now){
h = cranstatus(package, meta)
d = data.frame(package=package, birth=h$birth, death=h$death)
if(is.na(h$death)){
d$life = as.numeric(now)-as.numeric(d$birth)
d$event = 0
}else{
d$life = as.numeric(d$death) - as.numeric(d$birth)
d$event = 1
}
d
}
meansize <- function(package, meta){
status = cranstatus(package, meta)
hist = status$history
meanZ(hist$t, hist$size)
}
meandeps <- function(package, meta, now){
cranstat = cranstatus(package, meta)
history = cran_package_history(package)
history = rbind(history, history[nrow(history),])
if(nrow(history) != nrow(cranstat$history)){
warning("Row mismatch in CRAN listing (",nrow(cranstat$history)-1,") and pkgsearch history (",nrow(history)-1,") for ",package)
}
dt = deptab(history)
dt$time = as.POSIXct(history$crandb_file_date)
if(is.na(cranstat$death)){
dt$time[length(dt$time)] = now
}else{
dt$time[length(dt$time)] = max(cranstat$history$time)
}
data.table::data.table(
Depends = meanZ(dt$time, dt$Depends),
Suggests = meanZ(dt$time, dt$Suggests),
Imports = meanZ(dt$time, dt$Imports)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.