knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

Intro

This is the replication script for 'archivist: An R Package for Managing, Recording and Restoring Data Analysis Results' (Przemyslaw Biecek, Marcin Kosinski) submitted to JSS.

First, make sure that archivist is installed.

if (!require(archivist)) {
  install.packages("archivist")
  library(archivist)
}

Section 2. Motivation

Reading artifacts from GitHub

archivist::aread('pbiecek/Eseje/arepo/ba7f58fafe7373420e3ddce039558140') 

Reading artifacts from package

library("archivist")
models <- asearch("pbiecek/graphGallery", patterns = "class:lm")
modelsBIC <- sapply(models, BIC)
sort(modelsBIC)

Reading artifacts from Shiny

# wake up the shiny container at shinyapps
# not needed for other shiny apps
invisible(xml2::read_html("https://cogito.shinyapps.io/archivistShiny/"))

# here we are reading the artifact
archivist::aread('https://cogito.shinyapps.io/archivistShiny/arepo/ca680b829abd8f0a4bd2347dcf9fe534')

Section 3. Functionality

Section 3.1 Repository management

Creation of a new empty repository

repo <- "arepo"
createLocalRepo(repoDir = repo, default = TRUE)

Deletion of an existing repository

repo <- "arepo"
deleteLocalRepo(repoDir = repo)

Copying artifacts from other repositories

repo <- "arepo"
createLocalRepo(repoDir = repo, default = TRUE)
copyRemoteRepo(repoTo = repo, md5hashes= "7f3453331910e3f321ef97d87adb5bad", 
         user = "pbiecek", repo = "graphGallery", repoType = "github")

Showing repository statistics

showLocalRepo(repoDir = repo, method = "tags")
summaryLocalRepo(repoDir = 
    system.file("graphGallery", package = "archivist")) 

Setting a default repository

setRemoteRepo(user = "pbiecek", repo = "graphGallery", repoType = "github")
setLocalRepo(repoDir = system.file("graphGallery", package = "archivist"))

Saving to the default local repository

setLocalRepo(repoDir = repo)
data(iris)
saveToLocalRepo(iris)
aoptions("repoType", "github")

Section 3.2 Artifact management

Saving an R object into a repository

library("ggplot2")
repo <- "arepo"
pl <- qplot(Sepal.Length, Petal.Length, data = iris)
saveToRepo(pl, repoDir = repo)

showLocalRepo(repoDir = repo, "tags")

Session info for this object

asession("11127cc6ce69a89d11d0e30865a33c13")

Serialization of an object creation event into repository

library("archivist")
createLocalRepo("arepo", default = TRUE)
library("dplyr")
iris %a%
   filter(Sepal.Length < 6) %a%
   lm(Petal.Length~Species, data=.) %a%
   summary() -> tmp

ahistory(tmp)
ahistory(md5hash = "050e41ec3bc40b3004bc6bdd356acae7")

Loading an object from a repository

Femote, local or in a package

loadFromRemoteRepo("7f3453331910e3f321ef97d87adb5bad", repo="graphGallery", user="pbiecek", 
                             value=TRUE)
loadFromLocalRepo("7f3453", system.file("graphGallery", package = "archivist"), value=TRUE)

archivist::aread("pbiecek/graphGallery/7f3453331910e3f321ef97d87adb5bad")

library("archivist")
setLocalRepo(system.file("graphGallery", package = "archivist"))
# loadFromLocalRepo("7f3453", value=TRUE)
archivist::aread("7f3453")
setLocalRepo(system.file("graphGallery", package = "archivist"))
model <- aread("2a6e492cb6982f230e48cf46023e2e4f")
summary(model)
digest::digest(model)

Removal of an object from repository

rmFromLocalRepo("7f3453331910e3f321ef97d87adb5bad", repoDir = repo)

Remove all older than 30 days

(obj2rm <- searchInLocalRepo(list(dateFrom = "2010-01-01", dateTo = Sys.Date() - 30), repoDir = repo))

rmFromLocalRepo(obj2rm, repoDir = repo, many = TRUE)

Section 3.3 Search for an artifact

Search in a local/GitHub repository

searchInLocalRepo(pattern = "class:gg", 
    repoDir = system.file("graphGallery", package = "archivist"))


searchInLocalRepo(pattern = list(dateFrom = "2016-01-01",
    dateTo = "2016-02-07" ), 
    repoDir = system.file("graphGallery", package = "archivist"))


searchInLocalRepo(pattern=c("class:gg", "labelx:Sepal.Length"),
         repoDir = system.file("graphGallery", package = "archivist"))  

Retrieval of a list of R objects with given tags

setLocalRepo(system.file("graphGallery", package = "archivist"))
models <- asearch(patterns = c("class:lm", "coefname:Sepal.Length"))

models <- asearch("pbiecek/graphGallery",  
    patterns = c("class:lm", "coefname:Sepal.Length"))
lapply(models, coef)

plots <- asearch(patterns = c("class:gg", "labelx:Sepal.Length"))
length(plots)

library("gridExtra")
do.call(grid.arrange, plots)

Interactive search

arepo <- system.file("graphGallery", package = "archivist")
#shinySearchInLocalRepo(arepo)

Section 3.4 Extensions

Archivisation of all results of a specific function

library("archivist")
createLocalRepo("allModels", default = TRUE)
atrace("lm", "z")

# in the article is only one call to lm()
lm(Sepal.Length~Sepal.Width, data=iris)
lm(Sepal.Length~Petal.Length, data=iris)
lm(Sepal.Length~Petal.Length, data=iris)

sapply(asearch("class:lm"), BIC)

deleteLocalRepo("allModels")

Integration with the knitr package

Requires a knitr report to work

# addHooksToPrint(class=c("ggplot", "data.frame"),
#     repoDir = "arepo",
#     repo = "Eseje", user = "pbiecek", subdir = "arepo")

Gallery of artifacts in the repository

Requires a GitHub repository to work

# createMDGallery("arepo/readme.md",
#     repo="Eseje", user = "pbiecek", subdir = "arepo",
#     addMiniature = TRUE, addTags = TRUE)

Restoring older versions of packages

asession("pbiecek/graphGallery/arepo/600bda83cb840947976bd1ce3a11879d")

# Be warned, this line will install al lot of packages in old versions
# restoreLibs("pbiecek/graphGallery/arepo/600bda83cb840947976bd1ce3a11879d")
# aread("pbiecek/graphGallery/arepo/600bda83cb840947976bd1ce3a11879d")

R Session

sessionInfo()


pbiecek/archivist documentation built on May 25, 2021, 11:36 p.m.