pkern: Fast Kriging with Product Kernels

# Generated by `rjournal_pdf_article()` using `knitr::purl()`: do not edit by hand
# Please edit kcov.Rmd to modify this file

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk[['set']](echo = FALSE, warning = FALSE, message = FALSE)
# Try adding \usepackage{float} with chunk option fig.pos='h' if floating figures are driving you crazy.

# TODO: publish package and replace this with normal load call
library(devtools)
load_all()

# spatial libraries
library(sp)
library(sf)
library(terra)

# load helpers and prepared datasets
source('helpers.R')
meuse_list = get_meuse()

# copy the log zinc points data
pts = meuse_list[['soils']]['log_zinc']

# save default graphical parameters
.par_default = par(no.readonly=TRUE)

# set up a common color palette (this is the default in blitzkrig)
.pal = function(n) { hcl.colors(n, 'Spectral', rev=TRUE) }

library(smoothr)
library(ggplot2)


# TMP: leftovers from template 
# library(plotly)
# library(palmerpenguins)


## ----cfun-prep----------------------------------------------------------------
# define column names and a caption for the (two) calls below that make the table for the html and latex outputs
cfun_colnames = c('code', 'name', 'alias', '$c\\left( \\Delta \\right)$')
cfun_caption = 'A list of one-dimensional correlation functions available in blitzkrig. Kronecker covariances are constructed from the product of a pair of these functions, one receiving the $x$ separation distance as its argument, and the other the $y$ distance. Normalization constants are omitted for brevity, and $K_p$ denotes the order-$p$ Bessel function of the second kind (where $p$ is a shape parameter).'

# define the table
cfun_df = data.frame(

  code = c('exp', 'gau', 'gex',  'mat', 'sph'),
  name = c('Exponential', 'Gaussian', 'Gamma-Exponential', "Mat\\'ern", 'Spherical'),
  alias = c('-', 'Squared-Exponential, or Stable Kernel', 'Power-Exponential', "Whittle-Mat\\'ern", '-'),
  fun = c('$\\exp\\left( -\\Delta \\right)$',
          '$\\exp\\left( -\\Delta^2 \\right)$',
          '$\\exp\\left( -\\Delta^p \\right)$',
          '$\\Delta^p K_p\\left( \\Delta \\right)$',
          '$1 - (3/2)\\Delta + (1/2)\\Delta^3$')
)


## ----cfun-table, eval = knitr::is_html_output()-------------------------------
#> knitr::kable(cfun_df, format='html', col.names=cfun_colnames, caption=cfun_caption, escape=FALSE)


## ----cfun-table-latex, eval = knitr::is_latex_output()------------------------
kableExtra::kable_styling(knitr::kable(cfun_df, format='latex', col.names=cfun_colnames, booktabs=TRUE, escape=FALSE, caption=cfun_caption), font_size=9)


## ----meuse-setup, include=TRUE, echo=TRUE-------------------------------------
# snap log zinc data to grid of specified resolution
g = pkern_snap(pts, g=list(gres=c(y=50, x=50)))


## ----meuse-png, fig.show='hold', out.width='50%', fig.dim=c(5,5), fig.cap='Zinc concentrations (in log parts per billion) from the Meuse dataset, a soil survey on the floodplain of the Meuse River (left). These data are snapped to a grid for use with blitzkrig (right)', fig.alt='In the left pane, a diagram showing the winding Meuse river surrounded by about a hundred colored points indicating zinc levels at sample sites. In the right pane, a black-and-white grid of intersecting lines, with a small number of colored cells, one for each point on the left pane.'----
# plot source data using sf package, restoring default graphical parameters afterwards
plot(pts, pch=16, reset=FALSE, pal=.pal, main=NA, key.pos=1)
plot(st_geometry(pts), pch=1, add=TRUE)
plot(meuse_list[['river_poly']], col='lightblue', border=NA, add=TRUE)
plot(meuse_list[['river_line']], col='black', lwd=1, add=TRUE)
par(.par_default)

# plot gridded version using the blitzkrig package
pkern_plot(g, col_grid='lightgrey', reset=FALSE, zlab='log(ppb)')
plot(meuse_list[['river_line']], col='black', lwd=1, add=TRUE)


## ----meuse-ok, include=TRUE, echo=TRUE----------------------------------------
# fit the covariance model and mean
fit_result_ok = pkern_fit(g, quiet=TRUE)


## ----meuse-ok-pred, echo=TRUE-------------------------------------------------
# compute conditional mean and variance 
z_ok = pkern_cmean(g_obs=g, pars=fit_result_ok[['pars']])
z_ok_var = pkern_cmean(g_obs=g, pars=fit_result_ok[['pars']], out='v', quiet=TRUE)


## ----meuse-ok-pred-png, results='hide', fig.show='hold', out.width='50%', fig.dim=c(5,5), fig.cap='Ordinary kriging prediction and variance heatmaps generated by blitzkrig for the Meuse example. Predictions are generated for the entire grid, but are masked here to show detail in areas nearest the observed points.', fig.alt='Two heatmap images are shown, both masked to the same region, covering the convex hull of the observed points, and displaying a range of smoothly varying colors'----
# make a masking polygon based on variances
is_var_high = z_ok_var > quantile(z_ok_var, 0.35, na.rm=TRUE)
mask_rast = pkern_export(modifyList(g, list(gval=is_var_high)))
mask_poly = st_as_sf(as.polygons(mask_rast))[1,] |> smoothr::smooth(method='ksmooth', smoothness=2)

# convert to raster via terra
mask_g = pkern_grid(terra::rasterize(as(st_geometry(mask_poly), 'SpatVector'), pkern_export(g)))
is_ok_masked = is.na(mask_g[['gval']])

# #terra::rasterize(poly_mask)
z_ok_plot = z_ok 
z_ok_plot[is_ok_masked] = NA
g_ok = modifyList(g, list(gval=z_ok_plot))

z_ok_var_plot = z_ok_var
z_ok_var_plot[is_ok_masked] = NA
g_ok_var = modifyList(g, list(gval=z_ok_var_plot))
# create pkern grid objects from output vectors


# plot predictor on log scale with river line 
pkern_plot(g_ok, axes=FALSE, zlab='log(ppb)', xlab='', ylab='', main='ordinary kriging predictor', reset=FALSE)
plot(meuse_list[['river_line']], col='black', lwd=1, add=TRUE)
par(.par_default)

# plot variance  
pkern_plot(g_ok_var, axes=FALSE, zlab='V(x,y)', xlab='', ylab='', main='ordinary kriging variance', pal='Inferno', reset=FALSE)
plot(meuse_list[['river_line']], col='black', lwd=1, add=TRUE)
par(.par_default)


## ----meuse-uk-prep, echo=TRUE-------------------------------------------------
# measure distances for every point in the grid
river_dist = sf::st_distance(pkern_coords(g, out='sf'), meuse_list[['river_line']])

# include both distance and its square root
river_dist = units::drop_units(river_dist)
X = scale(cbind(river_dist, sqrt(river_dist)))

# find the subset of predictors at observed response locations
is_obs = !is.na(g[['gval']])


## ----meuse-uk, echo=TRUE------------------------------------------------------
# fit the covariance model again with X
fit_result_uk = pkern_fit(g_obs=g, X=X[is_obs,], quiet=TRUE)

# compute conditional mean and variance (supply all distances in X this time)
z_uk = pkern_cmean(g, fit_result_uk[['pars']], X=X)
z_uk_var = pkern_cmean(g, fit_result_uk[['pars']], X=X, out='v', quiet=TRUE)


## ----meuse-lm, echo=TRUE------------------------------------------------------
# use GLS to estimate the spatially varying trend 
z_lm = pkern_GLS(g, fit_result_uk[['pars']], X=X, out='z')


## ----meuse-uk-pred-png, results='hide', fig.show='hold', out.width='33%', fig.dim=c(6,8), fig.cap='Universal kriging predictions and variance generated by blitzkrig for the Meuse example. The response variable (log zinc concentration) is de-trended using a linear predictor (left) based on distance to river and its square root during model fitting, resulting in more detail in kriging predictions (middle), and a decrease in kriging variance (right)', fig.alt='Three heatmap images are shown: The first displays a winding river from above, with colours becoming brighter near the river; The second and third cover the same region, showing smoothly varying colors in two different palettes'----
# mask for points with low variance (include lowest third)
# max_var_uk = quantile(z_uk_var, 0.40)
# is_uk_masked = (z_uk_var > max_var_uk) | is_ok_masked

g_lm_plot = modifyList(g, list(gval=z_lm))

z_uk_plot = z_uk
z_uk_plot[is_ok_masked] = NA
g_uk_plot = modifyList(g, list(gval=z_uk_plot))

z_uk_var_plot = z_uk_var
z_uk_var_plot[is_ok_masked] = NA
g_uk_var_plot = modifyList(g, list(gval=z_uk_var_plot))

# plot linear predictor on log scale with river line 
pkern_plot(g_lm_plot, axes=FALSE, zlab='log(ppb)', xlab='', ylab='', main='covariates', reset=FALSE)
plot(meuse_list[['river_line']], col='black', lwd=1, add=TRUE)
par(.par_default)

# plot kriging predictor on log scale with river line 
pkern_plot(g_uk_plot, axes=FALSE, zlab='log(ppb)', xlab='', ylab='', main='kriging prediction', reset=FALSE)
plot(meuse_list[['river_line']], col='black', lwd=1, add=TRUE)
par(.par_default)

# omit high variance pixels (95th percentile) to better show interior detail
zlim = c(min(z_uk_var_plot, na.rm=TRUE), quantile(z_uk_var_plot, p=0.95, na.rm=TRUE))

# plot kriging variance on log scale with river line 
pkern_plot(g_uk_var_plot, axes=FALSE, zlab='log(ppb)', xlab='', ylab='', main='kriging variance', pal='Inferno', zlim=zlim, reset=FALSE)
plot(meuse_list[['river_line']], col='black', lwd=1, add=TRUE)
par(.par_default)



## ----meuse-ok-vg, results='hide'----------------------------------------------
# compute sample semivariogram for the OK model
vg_ok = pkern_sample_vg(g)

# recompute variogram with trend removed
g_residual = g
g_residual[['gval']][is_obs] = g_residual[['gval']][is_obs] - z_lm[is_obs]
vg_UK = pkern_sample_vg(g_residual)


## ----meuse-vg-png, fig.show='hold', out.width='50%', fig.dim=c(5, 3), fig.pos='!htb', fig.cap='Fitted covariance models from kriging on the Meuse dataset, visualized in two ways: On the left, estimated semi-variogram values (circles) are plotted next to model predictions (blue curves). On the right, a heatmap displays covariances with respect to the central point. The top two plots show the fitted OK model. The bottom two plots show the UK model, where removing a linear trend from the response data has resulted in lower variance and a smaller range.', fig.alt='A four panel plot: On the top row, the left panel is a raster image resembling a blurry photo of a purple ball occupying the middle third of the image, and the right panel is a scatterplot of points in the shape of a hill, with a smoothly varying blue line running through the point cloud. The bottom row is similar but the fuzzy ball is much smaller and the blue line plateaus sooner.'----

# plot the sample semi-variogram with theoretical curve in blue for OK model fit
par(mar=c(5.1, 5.1, 4.1, 2.1))
pkern_plot_semi(vg_ok, fit_result_ok[['pars']], main='OK model semi-variogram')
par(.par_default)

# plot correlation heatmap for OK model fit
g_plot = modifyList(g, list(gdim=rep(min(g[['gdim']]), 2), gyx=NULL, gval=NULL))
pkern_plot_pars(fit_result_ok[['pars']], g_plot, ij=T, main='')

# plot the sample semi-variogram with theoretical curve in blue
par(mar=c(5.1, 5.1, 4.1, 2.1))
pkern_plot_semi(vg_UK, fit_result_uk[['pars']], main='UK model semi-variogram')
par(.par_default)

# plot correlation heatmap
g_plot = modifyList(g, list(gdim=rep(min(g[['gdim']]), 2), gyx=NULL, gval=NULL))
pkern_plot_pars(fit_result_uk[['pars']], g_plot, ij=T, main='')




## ----meuse-uk-aniso, echo=TRUE------------------------------------------------
# fit a 2 + 2 parameter Gaussian covariance with anisotropy
fit_result_uk_gau = pkern_fit(g_obs=g, X=X[is_obs,], iso=FALSE, quiet=TRUE)

# fit a 2 + 4 parameter product Matern
fit_result_uk_mat = pkern_fit(g_obs=g, X=X[is_obs,], pars='mat', iso=FALSE, quiet=TRUE)


## ----meuse-alt-fit, fig.show='hold', out.width='19%', fig.dim=c(5, 5), fig.pos='!htb', fig.cap='Five examples of anisotropic covariance structures fitted to the Meuse data. As in the previous figure, darker pixels indicate stronger correlation with the central point. From left to right, these are the Kronecker covariances formed by setting both $c_x$ and $c_y$ equal to "gau", "mat", "gxp", "sph", and "exp" models, respectively.'----

# fit the other kernels
fit_result_uk_gxp = pkern_fit(g, pars='gxp', X=X[is_obs,], iso=FALSE, quiet=TRUE)
fit_result_uk_sph = pkern_fit(g, pars='sph', X=X[is_obs,], iso=FALSE, quiet=TRUE)
fit_result_uk_exp = pkern_fit(g, pars='exp', X=X[is_obs,], iso=FALSE, quiet=TRUE)

# plot everything in minimal style
g_plot = modifyList(g, list(gdim=rep(min(g[['gdim']]), 2), gyx=NULL, gval=NULL))
pkern_plot_pars(fit_result_uk_gau$pars, g_plot, minimal=TRUE, main='', xlab='', ylab='', pal='Inferno')
pkern_plot_pars(fit_result_uk_mat$pars, g_plot, minimal=TRUE, main='', xlab='', ylab='', pal='Inferno')
pkern_plot_pars(fit_result_uk_gxp$pars, g_plot, minimal=TRUE, main='', xlab='', ylab='', pal='Inferno')
pkern_plot_pars(fit_result_uk_sph$pars, g_plot, minimal=TRUE, main='', xlab='', ylab='', pal='Inferno')
pkern_plot_pars(fit_result_uk_exp$pars, g_plot, minimal=TRUE, main='', xlab='', ylab='', pal='Inferno')
par(.par_default)


## ----meuse-alt-pred, results='hide'-------------------------------------------
# extract the GLS estimates of the linear predictor (trend)
pts_lm_mat = pkern_GLS(g, fit_result_uk_mat[['pars']], X=X, out='z')

# recompute variogram with trend removed
g_residual_mat = g
g_residual_mat[['gval']][is_obs] = g_residual_mat[['gval']][is_obs] - pts_lm_mat[is_obs]
vg_UK_mat = pkern_sample_vg(g_residual_mat)

# compute conditional mean and variance
z_uk_mat = pkern_cmean(g, fit_result_uk_mat[['pars']], X=X)
z_uk_mat_var = pkern_cmean(g, fit_result_uk_mat[['pars']], X=X, out='v', quiet=TRUE)


## ----meuse-alt-pred-png, results='hide', fig.show='hold', out.width='33%', fig.dim=c(5,6), fig.pos='!bht', fig.cap='Universal kriging results for the Meuse example using a product Matérn covariance function. The semi-variogram (left) is now a ribbon plot, showing a range values at a given distance due to anisotropy. The middle and right panes show the model predictions and their variance.', fig.alt=''----

# copy data and apply mask
z_uk_mat_plot = z_uk_mat
z_uk_mat_plot[is_ok_masked] = NA
g_uk_mat_plot = modifyList(g, list(gval=z_uk_mat_plot))

z_uk_mat_var_plot = z_uk_mat_var
z_uk_mat_var_plot[is_ok_masked] = NA
g_uk_mat_var_plot = modifyList(g, list(gval=z_uk_mat_var_plot))

# plot the sample semi-variogram with theoretical curve in blue for anisotropic model fit
par(mar=c(5.1, 5.1, 4.1, 2.1))
pkern_plot_semi(vg_UK_mat, fit_result_uk_mat[['pars']], main='Matern model semi-variogram')
par(.par_default)

# plot kriging predictor on log scale with river line 
pkern_plot(g_uk_mat_plot, axes=FALSE, zlab='log(ppb)', xlab='', ylab='', main='kriging prediction', reset=FALSE)
plot(meuse_list[['river_line']], col='black', lwd=1, add=TRUE)
par(.par_default)

# omit high variance pixels (98th percentile) to better show interior detail
zlim = c(min(z_uk_mat_var_plot, na.rm=TRUE), quantile(z_uk_mat_var_plot, p=0.98, na.rm=TRUE))

# plot kriging variance on log scale with river line 
pkern_plot(g_uk_mat_var_plot, axes=FALSE, zlab='log(ppb)', xlab='', ylab='', main='kriging variance', pal='Inferno', zlim=zlim, reset=FALSE)
plot(meuse_list[['river_line']], col='black', lwd=1, add=TRUE)
par(.par_default)



## ----meuse-cv-prep------------------------------------------------------------
# load pre-computed results
cv_results_path = 'D:/pkern/vignettes/comparisons/data/cv_results.csv'
cv_results_all = read.csv(cv_results_path, row.names=NULL)
cv_results = cv_results_all[c('name', 'covariance', 'isotropic', 'covariates', 'parameters', 'rMSPE', 'rMSPEb')]
#cv_results['covariates'] = as.logical(cv_results['covariates'])

# this helper function call runs the CV workflow (uncomment to run again)
#cv_results = run_cv(g, X, n_fold=5, n_rep=25)
#write.csv(cv_results, cv_results_path)

# define caption text here
caption_cv = 'Estimates of the root mean squared prediction error on the Meuse dataset for log zinc (rMSPE) and its back-transformed values (rMSPEb) in a 25 X 5-fold cross-validation (CV) experiment. Results are reported for the four kriging models presented earlier, illustrating a progression of improvement as we add covariates and refine the covariance model.'


## ----cv-table, eval = knitr::is_html_output()---------------------------------
#> knitr::kable(cv_results, format='html', caption=caption_cv)


## ----cv-table-latex, eval = knitr::is_latex_output()--------------------------
kableExtra::kable_styling(knitr::kable(cv_results, format='latex', booktabs=TRUE, caption=caption_cv), font_size=9)


## ----treed-png, results='hide', fig.dim=c(5,4), fig.show='hold', out.width='33%', fig.pos='htb', fig.cap='A 1021 x 1349 raster on forest density in central BC, Canada (left) is up-scaled to produce a much coarser resolution version with dimensions 32 x 43 (middle). blitzkrig can rapidly downscale rasters like these. In a UK model adjusting for elevation, the predictions at right (at original resolution) were generated in less than a second.'----

example_input_path = 'D:/pkern/vignettes/comparisons/data/inputs/treed_1376_1377329.rds'

# load the training data
temp_list = readRDS(example_input_path)
treed_g_train = temp_list[['training_grid']]
treed_pts = temp_list[['training_pts']]
rm(temp_list)

start_fit = Sys.time()

# extract covariate data from DEM
dem_path = 'D:/pkern/vignettes/comparisons/data/treed_dem.tif'
dem_g_src = pkern_grid(rast(dem_path))

# load the full resolution treed data and snap training points to it
treed_path = 'D:/pkern/vignettes/comparisons/data/treed.tif'
treed_g_src = pkern_grid(rast(treed_path))
treed_g_snap = pkern_snap(treed_pts, treed_g_src)
is_treed_obs = !is.na(treed_g_snap[['gval']])

# use elevation and its square root as covariates
treed_X = cbind(dem_g_src[['gval']], sqrt(dem_g_src[['gval']]))
X = NA

# fit the model
treed_fit_result = pkern_fit(treed_g_train, X=treed_X[is_treed_obs,], iso=FALSE, pars='gau', quiet=TRUE)
pars = treed_fit_result[['pars']]

end_fit = Sys.time()
start_pred = Sys.time()
  
# make predictions
treed_z = pkern_cmean(g_obs=treed_g_snap, X=treed_X, pars=pars)
treed_g_pred = modifyList(treed_g_snap, list(gval=treed_z))

# report both times
end_pred = Sys.time()
print(end_fit-start_fit)
print(end_pred-start_pred)


# find a common range for colorbar
zlim = range(c(treed_g_pred$gval, treed_g_train$gval, treed_g_src$gval), na.rm=TRUE)
pal_nm = 'YlGn'
pal = function(n) { hcl.colors(n, pal_nm, rev=TRUE) }

# draw the three plot panes
pkern_plot(treed_g_src, zlim=zlim, main='treed data (original scale)', minimal=TRUE, pal=pal_nm, rev=TRUE, col_box='black')
pkern_plot(treed_g_train, zlim=zlim, main='treed data (up-scaled)', minimal=TRUE, col_grid='white', pal=pal_nm, rev=TRUE)
pkern_plot(treed_g_pred, zlim=zlim, zlab='density', main='kriging predictions', minimal=TRUE, pal=pal_nm, rev=TRUE)



## ----bench-results------------------------------------------------------------
# load the benchmarking results
inputs_csv_path = 'D:/pkern/vignettes/comparisons/data/inputs.csv'
result_csv_path = 'D:/pkern/vignettes/comparisons/data/outputs.csv'
inputs_df = read.csv(inputs_csv_path, row.names=NULL)
results_df = read.csv(result_csv_path, row.names=NULL)
results_names = unique(results_df[['name_in']]) 

# TODO: run bechmarking script again with higher max output points and save error messages (if any) to data frame
# then copy some of this code to the end of that script and save csv. The factor reordering probably has to stay here
# also include gridded and gx, gy columns

# set variance eval time 0 for packages that included variance in kriging prediction output
results_df[['var_s']][ results_df[['pkg']] %in% c('gstat', 'geoR') ] = 0 

# create variance + prediction eval time column
results_df[['pred_and_var_s']] = results_df[['pred_s']] + results_df[['var_s']]
results_df[['with_var']] = 'yes' 

# add new rows to represent prediction time only
results_only_pred = results_df[results_df[['pkg']] %in% c('fields', 'pkern', 'RandomFields'),]
results_only_pred[['pred_and_var_s']] = results_only_pred[['pred_s']]
results_only_pred[['with_var']] = 'no'
results_df = rbind(results_df, results_only_pred)

# re-order packages factor to the order we want them listed the legend
results_df[['pkg']] = factor(results_df[['pkg']], levels=c('pkern', 'fields', 'RandomFields', 'gstat', 'geoR'))

# indicate if the example dataset is a complete grid
results_df[['is_complete']] = c('no', 'yes')[ 1 + as.integer(!is.na(results_df[['gdim_x']])) ]
results_df[['is_complete']] = factor(results_df[['is_complete']], levels=c('yes', 'no'))


## ----bench-fit-png, fig.dim=c(5,2.5), fig.pos='!htb', fig.cap='Evaluation times for likelihood-based OK model fitting to example data with a range of sample sizes ($n_o$). Cases where the observed data form a complete regular grid are indicated by dashed lines, whereas incomplete cases are indicated with solid lines. The likelihood function in blitzkrig is optimized for the complete case, leading to faster performance. Two additional up-scaled treed datasets (with $n_o$ 5440 and 21632) were tested to show large-$n_o$ behaviour in blitzkrig.'----

# plot time to fit the model against number of observed points
ggplot(subset(results_df, !(pkg == 'gstat'))) +
  aes(x=n_train, y=fit_s, color=pkg, lty=is_complete) +
  geom_point(size=1, pch=1) +
  geom_line(lwd=0.5) +
  scale_linetype_manual(values=c(no='solid', yes='11')) +
  xlab('number of observed points') +
  ylab('fitting time (seconds)') +
  labs(color='R package',
       lty='complete grid') +
  scale_x_log10(
    breaks = scales::trans_breaks('log10', function(x) 10^x),
    labels = scales::trans_format('log10', scales::math_format(10^.x))
  ) +
  scale_y_log10(
    breaks = scales::trans_breaks('log10', function(x) 10^x),
    labels = scales::trans_format('log10', scales::math_format(10^.x))
  ) +
  theme_bw() +
  theme(text=element_text(size=8),
        strip.text.x=element_text(face='bold'),
        strip.text.y=element_text(face='bold'))


## ----bench-pred-png, fig.dim=c(6,4), fig.pos='[!tb]', fig.cap='A comparison of computational performance in kriging as a function of the number of point prediction, $n_p$. Dashed lines indicate prediction time on its own, and solid lines indicate time to compute both predictions and variance. Panels separate results from six example, where top row examples are incomplete (grids containing NAs), and bottom row examples are complete. As $n_o$ grows large, blitzkrig shows relatively fast performance, particularly on complete datasets.'----
# names of the two groups of example datasets (pts = incomplete, rast = complete)
pts_names = c('ozone', 'meuse', 'treed')
rast_names = paste0('treed_', c(88, 352, 1376))

# reorder data to reflect the panel order we want in the plot
results_plot_df = results_df[ results_df[['name_in']] %in% c(pts_names, rast_names),]
results_plot_df = results_plot_df[order( match(results_plot_df[['name_in']], c(pts_names, rast_names)) ),]

# create titles with sample size and order by input size
results_plot_df[['n_plot']] = paste0('(n_o=', results_plot_df[['n_train']], ')')
results_plot_df[['name_plot']] = apply(results_plot_df[, c('name_in', 'n_plot')], 1, paste, collapse=' ')
results_plot_df[['name_plot']] = factor(results_plot_df[['name_plot']], levels=unique(results_plot_df[['name_plot']]))

# make the plot
ggplot(results_plot_df) +
  aes(x=n_out, y=pred_and_var_s, color=pkg, lty=with_var) +
  geom_point(size=1, pch=1) +
  geom_line(lwd=0.5) +
  scale_linetype_manual(values = c(no='11', yes='solid')) +
  xlab('number of points predicted') + 
  ylab('prediction time (seconds)') +
  labs(color='R package',
       lty='with variance') +
  scale_x_log10(
   breaks = scales::trans_breaks('log10', function(x) 10^x),
   labels = scales::trans_format('log10', scales::math_format(10^.x))
  ) +
  scale_y_log10(
   breaks = scales::trans_breaks('log10', function(x) 10^x),
   labels = scales::trans_format('log10', scales::math_format(10^.x))
  ) +
  facet_wrap(vars(name_plot)) +
  #scale_color_viridis(discrete=TRUE, option='turbo') +
  theme_bw() +
  theme(text=element_text(size=8),
        strip.text.x=element_text(face='bold'),
        strip.text.y=element_text(face='bold'))