#' Generates various reports consering the CRAN meta data
#' @param include.code.review boolean, include code review will install all packages to this machine this can take a long time of first run
#' @return a data.frame containing CRAN metadata, and code review information
#' @importFrom magrittr "%>%"
#' @export
cran.meta.generate.reports <- function(reports.directory=NULL, include.code.review = FALSE){
if(include.code.review){
cran.meta.install.all.packages()
}
if(is.null(reports.directory)){
reports.directory <- .choose_directory()
} else {
if (!dir.exists(reports.directory)){
reports.directory <- .choose_directory()
}
}
#once loaded we don't manipulate the cranData object directly, don't reload...
#if(!exists("cranData")) cranData<-tools::CRAN_package_db()
cranData<-tools::CRAN_package_db()
#library(dplyr)
# Fix broken column names (duplicates, and invalid strings)
names(cranData) <- make.names(names(cranData), unique=TRUE)
package.license.restricts.use <- dplyr::filter(cranData,cranData$License_restricts_use == "yes")
# ------------------------------------------------------------------------------
# [TODO] start recursive function untill all reverse depends, and revers imports
# are found, from starting list
# ------------------------------------------------------------------------------
RDRI.cols <- c("Package","Reverse.depends","Reverse.imports")
pack.restricts.use.RDRI <- dplyr::select(package.license.restricts.use,RDRI.cols)
pack.restricts.use.RDRI <- dplyr::union(pack.restricts.use.RDRI[,'Reverse.depends'],pack.restricts.use.RDRI[,'Reverse.imports'])
# remove NA
pack.restricts.use.RDRI <- pack.restricts.use.RDRI[!is.na(pack.restricts.use.RDRI)]
pack.restricts.use.RDRI <- unlist(strsplit(pack.restricts.use.RDRI, ","))
# Remove white space
whitespace <- " \t\n\r\v\f"
pack.restricts.use.RDRI <- stringr::str_replace_all(pack.restricts.use.RDRI, whitespace, "")
pack.restricts.use.RDRI <- as.data.frame(pack.restricts.use.RDRI)
names(pack.restricts.use.RDRI) <- "Package"
descriptive.cols = c("Package","License","Title",RDRI.cols)
pack.restricts.use.RDRI <- dplyr::inner_join(pack.restricts.use.RDRI,dplyr::select(cranData,descriptive.cols),by="Package")
pack.restricts.use.RDRI <- dplyr::union(pack.restricts.use.RDRI,dplyr::select(package.license.restricts.use,descriptive.cols))
# ------------------------------------------------------------------------------
# End function, currently we get a final answer without going deeper than one iteration.
# ------------------------------------------------------------------------------
# Here is a list of packages that the enterprise may not
# want to use as a general rule.
# Here is a list of packages that the enterprise may not
# want to use as a general rule.
# check if xlsx is installed prior to exporting report
if(exists("xlsx::write.xlsx")){
report.filepath <- file.path(reports.directory,"package.license.restricts.use.xlsx")
write.xlsx(
pack.restricts.use.RDRI,
file=report.filepath,
sheetName="license_restricts_use",
row.names=FALSE
)
}else{
report.filepath <- file.path(reports.directory,"package.license.restricts.use.csv")
write.csv(pack.restricts.use.RDRI,
file=report.filepath,
row.names=FALSE
)
}
if(.Platform$OS.type == "unix") {
# if libre office is installed open with that!
system("which soffice>/tmp/libreoffice.txt")
libre.office.install <- read.fwf("/tmp/libreoffice.txt",10000)
libre.office.install<-as.character(libre.office.install[1,1])
if(dir.exists(libre.office.install)){
system(paste0("screen ",libre.office.install," -o ",report.filepath))
}
} else {
shell(report.filepath)
}
#https://rviews.rstudio.com/2018/03/08/cran-package-metadata
pdb <- cranData
meta_data <- pdb[,c(1,4,5,17,60,61)]
libraryNames <- pdb$Package
#library(dplyr)
#write.csv(pdb$Package,"packageNames.csv")
names(meta_data) <- c("Package", "Dep", "Imp", "Aut", "RD", "RI")
fcn<-function(x,y){
x <- strsplit(unlist(x),",")
y <- strsplit(unlist(y) ,",")
z <- unlist(na.omit(union(x,y)))
}
#library(dplyr)
meta_data<-dplyr::mutate(meta_data,
DepImp=mapply(fcn,Dep,Imp),
RDRI=mapply(fcn,RD,RI))
clean<-function(x){
gsub("\\[[^]]*]","",x)
}
clean2<-function(x){
gsub("[\r\n]","",x)
}
#library(purrr)
meta_data$Aut <- purrr::map(purrr::map(meta_data$Aut,clean),clean2)
rm_na<-function(x){
list(na.omit(unlist(x)))
}
#library(dplyr) #%>% pipes aren't loading like they should, may need to re-write without them.
c_dat1<-seq_len(nrow(meta_data)) %>%
purrr::map_df(~{
meta_data[.x, ] %>%
select(-Package, -DepImp, -RDRI) %>%
purrr::map_df(~ifelse(is.na(.x), 0, length(stringr::str_split(.x, ",")[[1]]))) %>%
dplyr::mutate(Package=meta_data$Package[.x])
}) %>%
dplyr::select(Package, Aut, Dep, Imp, RD, RI)
c_dat2<-seq_len(nrow(meta_data)) %>%
purrr::map_df(~{
meta_data[.x, ] %>%
dplyr::select(-Package, -Aut, -Dep, -Imp, -RD, -RI) %>%
purrr::map_df(~ifelse(is.na(.x), 0, length(rm_na(.x)[[1]])))
}) %>%
dplyr::select(DepImp, RDRI)
c_dat<-unique(cbind(c_dat1,c_dat2))
c_dat <- dplyr::arrange(c_dat,desc(RDRI))
#c_filt<-filter(c_dat,)
#------------------------------------------------------------------
ss<-function(x){
avg<-round(mean(x),digits=2)
std<-round(sd(x),digits = 2)
med<-median(x)
res<-list(mean=avg, sd=std, median=med)
}
res<-cbind(names(c_dat[-1]),purrr::map_df(c_dat[-1],ss))
names(res)<-c("Features", "mean","sd", "median")
res
quantile(c_dat$RDRI)
top_RDRI<-c_dat %>% arrange(desc(RDRI)) %>% unique()
head(top_RDRI[,c(1,2,7,8)],15)
quantile(c_dat$Aut,probs=seq(0,1,.1))
quantile(c_dat$DepImp,probs=seq(0,1,.1))
#quantile(c_dat$RDRI,probs=seq(.73,.76,.01))
cran.has.rdri <- c_dat %>%
dplyr::filter(RDRI>=1) %>% dplyr::select(Package)
#colaboration, Author count
plot(c_dat$Aut)
quantile(c_dat$Aut,probs=seq(0,1,.1))
# long tail, do a log transpose...
# [TODO] save plots
plot(x=c_dat$Aut,y=(c_dat$RDRI))
quantile(log(c_dat$Aut),probs=seq(0,1,.1))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.