Nothing
## ----setup, include=FALSE-----------------------------------------------------
use_cached_data = TRUE
can_run = require(kdtools) &&
require(ggplot2) &&
require(tidytext) &&
require(printr) &&
require(scales) &&
kdtools::has_cxx17()
bench_ntuples = 1e7
bench_ntrials = 21
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
eval = can_run
)
if (can_run) theme_set(theme_classic())
## ----eval=!can_run, echo=FALSE------------------------------------------------
# if (has_cxx17()) {
# message("Required packages missing -- code will not be evaluated")
# } else {
# message("kdtools needs C++17 for full functionality, code will not be evaluated")
# }
## ----echo=FALSE, fig.width=6, fig.height=6------------------------------------
nr = 5e3
x = matrix(runif(nr), nc = 2)
y = kd_sort(matrix_to_tuples(x))
z = as.data.frame(tuples_to_matrix(y))
z$i = 1:nrow(z)
names(z) = c("x", "y", "i")
ggplot(z, aes(x, y)) +
geom_path(color = "darkgrey") +
geom_point(size = 0.25) +
geom_point(aes(color = i), alpha = 0.15, size = 5) +
scale_color_gradientn(colors = rainbow(100, s = 0.8, v = 0.8)) +
guides(color = "none") +
coord_fixed()
## ----include=FALSE------------------------------------------------------------
cache_file = "../inst/extdata/sort_benchmark_data"
if (!file.exists(cache_file))
cache_file = system.file("extdata/sort_benchmark_data", package = "kdtools")
if (use_cached_data && file.exists(cache_file)) {
load(cache_file)
} else {
reps = bench_ntrials
ndim = seq(1, 9, 2)
ntuples = round(seq(100, bench_ntuples, length.out = 5))
what = factor(c("kd_sort", "kd_sort_threaded", "lex_sort"))
res = expand.grid(ndim = ndim, ntuples = ntuples, what = what, time = 0)
for (i in 1:nrow(res))
{
times = numeric(reps)
for (j in seq_len(reps))
{
x = matrix(runif(prod(res[i, 1:2])), ncol = res[i, "ndim"])
y = matrix_to_tuples(x)
switch(as.character(res[i, "what"]),
kd_sort =
{
start_timing()
kd_sort(y, inplace = TRUE, parallel = FALSE)
end_timing()
},
kd_sort_threaded =
{
start_timing()
kd_sort(y, inplace = TRUE, parallel = TRUE)
end_timing()
},
lex_sort =
{
start_timing()
lex_sort(y, inplace = TRUE)
end_timing()
})
times[j] = get_timing()
}
res[i, "time"] = median(times)
}
save(res, file = cache_file)
}
## ----echo=FALSE, fig.width=7--------------------------------------------------
ggplot(res) +
geom_line(aes(x = ntuples, y = time, color = as.factor(ndim))) +
scale_x_continuous("Number of Tuples",
labels = trans_format("identity", scientific_format(digits = 1))) +
facet_wrap(~what) +
theme(strip.background = element_blank(),
legend.position = "bottom") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(color = "Number of dimensions",
x = "Number of tuples",
y = "Time (seconds)")
## -----------------------------------------------------------------------------
j = cbind(sample(1:9))
k = cbind(sample(1:9))
t(kd_sort(j))
t(kd_sort(cbind(j, k)))
t(kd_sort(cbind(0, j, k)))
t(kd_sort(cbind(j, k, 0)))
t(kd_sort(cbind(0, j, 0)))
## ----fig.width=5, fig.height=5, echo=FALSE------------------------------------
xy = matrix(runif(1e3), nc = 2)
p = c(0.33, 0.33, 0.66, 0.66)
xy = kd_sort(xy)
lb = kd_lower_bound(xy, p[1:2])
ub = kd_upper_bound(xy, p[3:4])
rq = kd_range_query(xy, p[1:2], p[3:4])
rq = kd_sort(rq)
i1 = 1:nrow(xy) >= lb
i2 = 1:nrow(xy) < ub
i3 = i1 & i2
i4 = sapply(1:nrow(xy), function(i) kd_binary_search(rq, xy[i, ]))
df = rbind(data.frame(x = xy[, 1], y = xy[, 2], i = i1, j = ">= lower bound"),
data.frame(x = xy[, 1], y = xy[, 2], i = i2, j = "< upper bound"),
data.frame(x = xy[, 1], y = xy[, 2], i = i3, j = ">= lower b. and < upper b."),
data.frame(x = xy[, 1], y = xy[, 2], i = i4, j = "within(lower, upper)"))
ggplot(df, aes(x, y)) +
geom_path(color = "darkgrey") +
geom_point(size = 0.25) +
geom_point(aes(color = i), alpha = 0.15, size = 3) +
scale_color_manual(values = c("steelblue", "aquamarine")) +
scale_x_continuous(breaks = c(0.33, 0.66)) +
scale_y_continuous(breaks = c(0.33, 0.66)) +
geom_vline(xintercept = p[1], color = "red") +
geom_hline(yintercept = p[2], color = "red") +
geom_vline(xintercept = p[3], color = "red") +
geom_hline(yintercept = p[4], color = "red") +
guides(color = "none") +
coord_fixed() +
facet_wrap(~j) +
theme(strip.background = element_blank())
## ---- echo=FALSE--------------------------------------------------------------
cache_file = "../inst/extdata/query_benchmark_data"
if (!file.exists(cache_file))
cache_file = system.file("extdata/query_benchmark_data", package = "kdtools")
if (use_cached_data && file.exists(cache_file)) {
load(cache_file)
} else {
build = as.data.frame(time_build(bench_ntuples, bench_ntrials))
query = as.data.frame(time_query(bench_ntuples, bench_ntrials))
build$Time = signif(build$Time, 3)
query$Time = signif(query$Time, 3)
build$Ratio = signif(build$Time / build$Time[1], 2)
query$Ratio = signif(query$Time / query$Time[1], 2)
save(build, query, file = cache_file)
}
build
query
## ---- echo=FALSE--------------------------------------------------------------
cache_file = "../inst/extdata/nn_benchmark_data"
if (!file.exists(cache_file))
cache_file = system.file("extdata/nn_benchmark_data", package = "kdtools")
if (use_cached_data && file.exists(cache_file)) {
load(cache_file)
} else {
res = as.data.frame(time_nn(bench_ntuples, bench_ntrials))
res$Time = signif(res$Time, 3)
res1 = res[1:4,]
res2 = res[5:8,]
rownames(res2) = NULL
res1$Ratio = signif(res1$Time / res1$Time[1], 2)
res2$Ratio = signif(res2$Time / res2$Time[1], 2)
save(res1, res2, file = cache_file)
}
res1
res2
## ----include=FALSE------------------------------------------------------------
header_code = '
#include <Rcpp.h>
using Rcpp::CharacterVector;
using Rcpp::NumericVector;
using Rcpp::Rcout;
using Rcpp::stop;
#include <chrono>
#include <utility>
using std::begin;
using std::end;
#include <tuple>
#include <vector>
using std::vector;
#include <string>
using std::string;
#define NO_TUPLEMAPR
#include <kdtools.h>
using namespace keittlab;
using kdtools::kd_sort;
using kdtools::kd_range_query;
using kdtools::kd_nearest_neighbors;
using key_type = std::tuple<double, string>;
using range_type = vector<key_type>;
using pointers_type = vector<key_type*>;
#define N 3
void print_range(const range_type& x)
{
int n = x.size();
if (n < 2 * N) stop("Not enough rows");
for (int i = 0; i != N; ++i)
Rcout << i << " "
<< std::get<0>(x[i]) << " "
<< std::get<1>(x[i]) << std::endl;
Rcout << "..." << std::endl;
for (int i = n - N; i != n; ++i)
Rcout << i << " "
<< std::get<0>(x[i]) << " "
<< std::get<1>(x[i]) << std::endl;
}
void print_range(const pointers_type& x)
{
int n = x.size();
if (n < 2 * N) stop("Not enough rows");
for (int i = 0; i != N; ++i)
Rcout << i << " "
<< std::get<0>(*x[i]) << " "
<< std::get<1>(*x[i]) << std::endl;
Rcout << "..." << std::endl;
for (int i = n - N; i != n; ++i)
Rcout << i << " "
<< std::get<0>(*x[i]) << " "
<< std::get<1>(*x[i]) << std::endl;
}
template <typename T>
std::chrono::duration<double> time_it(T x)
{
auto start = std::chrono::high_resolution_clock::now();
x();
auto finish = std::chrono::high_resolution_clock::now();
return finish - start;
}
struct make_key
{
key_type operator()(const double a, const char* b){ return key_type(a, b); }
};
struct greater_key
{
bool operator()(const double& lhs, const double& rhs)
{
return lhs > rhs;
}
bool operator()(const string& lhs, const string& rhs)
{
return lhs > rhs;
}
};
template<class InputIt1, class InputIt2>
double set_similarity(InputIt1 first1, InputIt1 last1,
InputIt2 first2, InputIt2 last2)
{
double num = 0, denom = 0;
while (first1 != last1 && first2 != last2) {
if (*first1 < *first2) {
++first1; ++denom;
} else {
if (!(*first2 < *first1)) {
++first1; ++num;
}
++first2; ++denom;
}
}
denom += std::distance(first1, last1) + std::distance(first2, last2);
return num / denom;
}
namespace keittlab {
namespace kdtools {
template <>
double scalar_dist(const std::string& lhs, const std::string& rhs)
{
std::string a(lhs), b(rhs);
std::sort(begin(a), end(a));
std::sort(begin(b), end(b));
return 1 - set_similarity(begin(a), end(a),
begin(b), end(b));
}
} // namespace kdtools
} // namespace keittlab
'
header_file = "mixed_query.h"
cat(header_code, file = file.path(tempdir(), header_file))
pkg_cppflags = Sys.getenv("PKG_CPPFLAGS")
pkg_cppflags = paste(pkg_cppflags, paste0("-I\"", tempdir(), "\""))
Sys.setenv(PKG_CPPFLAGS = pkg_cppflags)
## -----------------------------------------------------------------------------
data("parts_of_speech")
numbers = signif(runif(nrow(parts_of_speech)), 4)
strings = sample(tolower(parts_of_speech[[1]]))
mixed_query(numbers, strings)
## -----------------------------------------------------------------------------
sort_pointers(numbers, strings)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.