#' Recursively extract the symbolic links to all github resources
#'
#'
#' \code{git.recursive}
#'
#'
#' This function takes a either a combination of a username/repo, or a
#' full path to a rop folder and recurively retreives the file information.
#'
#'
#'
#' @param x The combo of username/repo or full folder path to a github repository.
#' @param filter_pat A pattern to filter the returned data frame for subsetting
#'
#' @return
#'
#' \format{
#' A data frame with 7 observations on the following 5 variables.
#' \describe{
#' \item{\strong{file_name}}{\emph{character}The basename of the file.}
#' \item{\strong{file_link}}{\emph{character}Full path to the file.}
#' \item{\strong{file_type}}{\emph{character}File extension or file-type.}
#' \item{\strong{folder}}{\emph{character} The directory path-folder that contains
#' the file.}
#' \item{\strong{raw}}{\emph{character}A full path to the raw github file,
#' which can be downloaded as if it were a cdn.}
#' }
#' }
#'
#'
#' @examples
#'
#' had_xml2 <- git.recursive('hadley/xml2')
#' dim(had_xml2)
#'
#' # get the first R file from the Ropensci Magick repo
#' git.recursive('ropensci/magick', filter_pat = "\\.R$") %>%
#' slice(1) %>% .$raw
#'
#'
#' @export
git.recursive <- function(x, filter_pat = NULL){
library(stringi)
library(xml2)
library(tools)
library(rvest)
library(plyr)
library(dplyr, quietly = TRUE)
library(jsonlite)
options('stringsAsFactors' = FALSE)
ul.ac <- function(x){
x %>% unlist %>% as.character()
}
is.file <- function(x){
nchar(tools::file_ext(x)) > 0
}
fi.get_ext <- function(x){
ul.ac(stringi::stri_extract_all_regex(x, "\\.([[:alnum:]]+)$", simplify = T))
}
fi.get_name <- function(x){
if(exists('basename')){
base_name <- function(x){
ul.ac(lapply(stringi::stri_split_regex(x, "/"), function(i)i[length(i)]))
}
base_name(x)
}else{
basename(x)
}
}
git_dir <- '.css-truncate-target .js-navigation-open'
m.pth <- function(x){
if(!grepl('^http', x)){
pth <- sprintf('https://github.com%s',
ifelse(!grepl('^/', x),
paste0("/", x), x))
}else{
pth <- x
}
return(pth)
}
r.pth <- function(x){
gsub('https://github.com',
'https://raw.githubusercontent.com' ,
gsub('/blob','',x))
}
# ref_idx <- read_html(m.pth("CarlBoneri/roxydoc2")) %>%
# html_nodes(git_dir) %>% lapply(., function(i){
# data.frame(file_link = m.pth(html_attr(i, "href")),
# name = html_attr(i, "title")) %>%
# mutate(type_of = ifelse(is.file(name), 'file', 'dir')) %>%
# mutate(file_type = fi.get_ext(name),
# file_name = fi.get_name(name))
# }) %>% rbind.pages()
#
# ref_idx$folder <- ul.ac(
# llply(1:nrow(ref_idx), function(i){
# stri_replace_all_regex(
# ref_idx[i,'file_link'],
# sprintf("%s|/%s",
# "https://github.com/(\\w+)/(\\w+)/blob/master/",
# ref_idx[i, "file_name"]),
# "")
# })
# )
#
#
# ref_idx$raw <- ul.ac(llply(ref_idx$file_link, r.pth))
ref_idx <- read_html(m.pth(x)) %>%
html_nodes(git_dir) %>% lapply(., function(i){
data.frame(file_link = m.pth(html_attr(i, "href")),
name = html_attr(i, "title")) %>%
mutate(type_of = ifelse(is.file(name), 'file', 'dir')) %>%
mutate(file_type = fi.get_ext(name),
file_name = fi.get_name(name),
folder = stri_replace_all_regex(
file_link,
sprintf("%s|/%s",
"https://github.com/(\\w+)/(\\w+)/blob/master/",
file_name),
""),
raw = r.pth(file_link))
})%>% rbind.pages
if(any(ref_idx$type_of == 'dir')){
old <- ref_idx %>% dplyr::filter(type_of == "file")
new <- lapply(ref_idx %>% dplyr::filter(type_of == "dir") %>% .$file_link,
git.recursive) %>% rbind.pages()
out <- rbind(old, new)
}else{
out <- ref_idx
}
if(!is.null(filter_pat)){
out <- out[stringi::stri_detect_regex(out$name, filter_pat) ,]
}
return(out)
# out %>% select(file_name, file_link = next_link, file_type, folder, raw)
}
f.git <- git.recursive
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.