#
# 1. Source the script to load the functions defined below
#
# Provide random paths ---------------------------------------------------------
if (FALSE)
{
set.seed(4)
paths <- kwb.pathdict::random_paths(
#max_depth = 10,
max_depth = 5,
max_elements = 5, depth_to_leaf_weight = function(d) 0.4
)
}
# Provide real paths -----------------------------------------------------------
if (FALSE)
{
# Define the path to a file containing a big path list
file <- "~/pathana-db/path-info_2019-12-01_hauke.csv"
# Read the big path list
all_paths <- fakin.path.app:::read_file_paths(file)$path
# Remove paths of which the encoding changed during pathlist-conversion
paths <- all_paths[- which_changed(all_paths)]
kwb.fakin:::store(paths, "test_splitting_paths")
}
# Convert paths to one matrix of subdirectory names ----------------------------
if (FALSE)
{
length(paths)
system.time(subdirs_1 <- kwb.file::to_subdir_matrix(paths, method = 1))
system.time(subdirs_2 <- kwb.file::to_subdir_matrix(paths, method = 2))
system.time(subdirs_3 <- kwb.file::to_subdir_matrix(paths, method = 3))
identical(subdirs_1, subdirs_2)
identical(subdirs_1, subdirs_3)
View(subdirs_1)
}
# Convert paths to a list of matrices of subdirectory names --------------------
if (FALSE)
{
# Create list of subdirectory matrices, one per depth level
subdirs_in_depth <- kwb.file::to_subdir_matrix(paths, result_type = "list")
# Number of paths in the different depths must sum up to number of all paths
stopifnot(identical(sum(sapply(subdirs_in_depth, nrow)), length(paths)))
# Restore the paths from the list of subdirectory matrices
system.time(backpaths_1 <- restore_paths(subdirs_in_depth))
# Compare with original paths
identical(backpaths_1, paths)
# Create a pathlist object
pl <- pathlist::pathlist(paths)
# Amount of empty space in the subdirectory matrix
mean(! nzchar(pl@folders))
# Restore the paths from the pathlist object
system.time(backpaths_2 <- as.character(pl))
# Compare with original paths
identical(backpaths_2, paths)
# Compare the sizes of both data structures
kwb.utils::percentage(size_mb(subdirs_in_depth), size_mb(pl@folders))
# Define depth of which to select paths
depth <- 4
# Select paths in a certain depth from the path object
tmp_paths_1 <- pl[which(pl@depths + lengths(strsplit(pl@root, "/")) == depth)]
# Select paths in a certain depth from the list structure
tmp_paths_2 <- restore_paths(subdirs_in_depth, keys = as.character(depth))
# Compare the selected path sets
identical(as.character(tmp_paths_1), tmp_paths_2)
# Look into the function definition of a class method and a package function
getMethod("as.character", "pathlist")
pathlist:::paste_segments
}
# Create node information from paths -------------------------------------------
if (FALSE)
{
# Split original paths at slashes
system.time(slashes <- gregexpr("/", paths))
# Compare performance with kwb.file::to_subdir_matrix()
system.time(subdirs <- kwb.file::to_subdir_matrix(paths))
# Calculate path depths
depths <- lengths(slashes) + 1
nodes <- lapply(seq_len(max(depths)), function(depth) {
print(depth)
#depth <- 3
indices <- which(depths >= depth)
x <- sapply(indices, function(i) {
if (depth < depths[i]) {
substr(paths[i], 1, slashes[[i]][depth] - 1)
} else {
paths[i]
}
})
unique(x)
})
}
# Create node information from list of subdirectory matrices -------------------
if (FALSE)
{
leaves <- lapply(subdirs_in_depth, function(x) x[, ncol(x)])
depths <- as.integer(names(subdirs_in_depth))[-1]
for (depth in depths) {
#depth <- depths[1]
print(depth)
keys <- list(me = as.character(depth), parent = as.character(depth - 1L))
subdirs <- subdirs_in_depth[[keys$me]]
parent_paths <- rowwise_paths(subdirs[, -depth, drop = FALSE])
parent_indices <- match(parent_paths, leaves[[keys$parent]])
attr(leaves[[keys$me]], "parent") <- parent_indices
}
str(leaves)
}
# which_changed ----------------------------------------------------------------
which_changed <- function(paths)
{
pl <- pathlist:::pathlist(paths)
reconstructed_paths <- as.character(pl)
has_changed <- reconstructed_paths != paths
if (! any(has_changed)) {
return(integer())
}
which_changed <- which(has_changed)
message(
"These ", sum(has_changed), " elements differ:\n",
paste0("[", which_changed, "]: ", paths[has_changed], collapse = "\n")
)
which_changed
}
# restore_paths ----------------------------------------------------------------
restore_paths <- function(subdirs_in_depth, keys = names(subdirs_in_depth))
{
stopifnot(all(keys %in% names(subdirs_in_depth)))
rows <- kwb.utils::getAttribute(subdirs_in_depth, "original_rows")
x <- lapply(subdirs_in_depth[keys], function(m) do.call(paste, c(
kwb.utils::asColumnList(m), sep = "/"
)))
unlist(x, use.names = FALSE)[order(unlist(rows[keys]))]
}
# rowwise_paths ----------------------------------------------------------------
rowwise_paths <- function(m)
{
#m <- subdirs_in_depth$`1`
paths <- do.call(paste, c(kwb.utils::asColumnList(m), sep = "/"))
stats::setNames(paths, rownames(m))
}
# size_mb ----------------------------------------------------------------------
size_mb <- function(x)
{
as.integer(object.size(x)) / 2^20
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.