### cran interaction code
### available.packages() tells us the current live packages
### https://cran.r-project.org/src/contrib/Archive/
### gets all packages
### AP = set of packages from available.packages()
### ARC = set of packages from https://cran.r-project.org/src/contrib/Archive/
### then
### LIVE = AP
### DEAD = setdiff(ARC, AP)
LIVE <- function(){
ap = available.packages(filter=list("duplicates","CRAN"))
return(rownames(ap))
}
ARC <- function(url = "https://cran.r-project.org/src/contrib/Archive/"){
html = xml2::read_html(url)
tab = rvest::html_table(html)
tab = tab[[1]][,c("Name","Last modified")]
tab = tab[grepl("^[A-Za-z]",tab$Name),]
tab = tab[nchar(tab$`Last modified`)>10,]
tab$date = as.Date(tab$`Last modified`)
tab$`Last modified` = NULL
tab$Name = gsub("/","",tab$Name)
tab
}
DEAD <- function(live=LIVE(), archive=ARC()){
deadnames = setdiff(archive$Name, live)
archive[archive$Name %in% deadnames,]
}
dead_info <- function(deadname, dead){
data.frame(
name = deadname,
born = birth(deadname),
died = dead[dead$Name==deadname,]$date
)
}
birth <- function(name){
history = try(pkgsearch::cran_package_history(name))
if(inherits(history,"try-error")){
return(NA)
}
born = min(as.Date(history$date))
born
}
live_info <- function(livename){
born = birth(livename)
data.frame(
name = livename,
born = born,
died = NA
)
}
survival_data <- function(livenames, dead){
alive = do.call(
rbind,
lapply(livenames,
function(n){
message("live: ",n)
live_info(n)
})
)
deadp = do.call(
rbind,
lapply(dead$Name,
function(n){
message("dead: ",n)
dead_info(n, dead)
}
)
)
combo = rbind(deadp, alive)
combo
}
survpack <- function(bd, now=as.Date(Sys.time())){
## if not dead yet, set died to now and event to 0
## remove if NA in born
fail = is.na(bd$born)
bd = bd[!fail,]
bd$event = 1-as.numeric(is.na(bd$died))
bd$died[is.na(bd$died)] = now
bd = bd[!(bd$born == bd$died),]
bd$start = as.numeric(bd$born)
bd$end = as.numeric(bd$died)
bd
}
survival_covariates <- function(packagenames, verbose=FALSE){
n = length(packagenames)
i = 1
do.call(rbind,lapply(packagenames, function(packagename){
vmessage = message
if(!verbose){
vmessage = function(...){}
}
vmessage("Getting ",packagename, ", ", i, " of ",n)
i <<- i + 1
history = cran_package_history(packagename)
history$D = as.Date(history$crandb_file_date)
nh = nrow(history)
deps = history$dependencies[nh][[1]]$type
if(length(deps)==0){
deps="None"
}
dtab = deptab(history)
dtab$D = as.Date(history$crandb_file_date)
meanDepends = meanZ(dtab$D, dtab$Depends)
meanImports = meanZ(dtab$D, dtab$Imports)
meanSuggests = meanZ(dtab$D, dtab$Suggests)
data.frame(
Name = packagename,
Depends = sum(deps=="Depends"),
Imports = sum(deps=="Imports"),
Suggests = sum(deps=="Suggests"),
meanDepends = meanDepends,
meanImports = meanImports,
meanSuggests = meanSuggests
)
}))
}
integral = function(x,y){
sum(diff(x) * (head(y,-1)+tail(y,-1)))/2
}
meanZ <- function(t,N){
t = as.numeric(t)
# package time use is a stairstep
if(length(t)==1){return(N)}
sum(N[-length(N)] * diff(t))/(max(t)-min(t))
}
meanZint = function(t,N){
# this is trapezoidal integration - joining the dots
if(length(t)==1){return(N)}
integral(as.numeric(t),N)/as.numeric(diff(range(as.numeric(t))))
}
deptab = function(his){
dtab = data.frame(
do.call(
rbind,
lapply(
his$dependencies,
function(D){
table(
factor(D$type,levels=c("Depends","Suggests","Imports"))
)}
)
))
dtab
}
cranurl = "https://cran.r-project.org/src/contrib/"
archive = paste0(cranurl, "00Archive/")
pkgarc = function(package){
paste0(archive,package)
}
size_history <- function(package, keep_dupe_dates=FALSE, ap){
html = xml2::read_html(pkgarc(package))
cells = rvest::html_nodes(html, "td")
celltext = rvest::html_text(cells)
releases = which(grepl(".tar.gz$",celltext))
sh = data.frame(
rel = celltext[releases],
date = celltext[releases+1],
size = celltext[releases+2],
stringsAsFactors=FALSE
)
sh$date = as.Date(sh$date)
sh = sh[order(sh$date),]
if(!keep_dupe_dates){
sh = sh[!duplicated(sh$date),]
}
sh$bytes = parse_size(sh$size)
message("need to add latest if present in top level archive, ie live")
if(package %in% rownames(ap)){
}
sh
}
parse_size <- function(sizes){
## approximate file size from web listing. "1.4K" and "234M" format.
mult = substr(sizes, nchar(sizes),nchar(sizes))
mnum = c("K"=2^10,"M"=2^20)
num = as.numeric(substr(sizes, 1, nchar(sizes)-1))
num * mnum[mult]
}
size_currents <- function(){
index = xml2::read_html(cranurl)
texts = html_text(html_nodes(html,"td"))
meta = data.frame(tarball=texts[ipkg], date=as.Date(texts[ipkg+1]), size=texts[ipkg+2],
stringsAsFactors=FALSE)
meta$name = gsub("_.*","",meta$tarball)
meta$bytes = parse_size(meta$size)
meta
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.