IntroductionSLGP.R
In SLGP: Spatial Logistic Gaussian Process for Field Density Estimation

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)

## ----loadHousing, warning=FALSE-----------------------------------------------
library(dplyr)
# Load the dataset (available in MASS package)
require(MASS)
data("Boston", package = "MASS")
df <- Boston %>%
  mutate(age_bin = cut(age, breaks = seq(0, 100, by = 10), include.lowest = FALSE)) %>%
  group_by(age_bin) %>%
  mutate(age_bin = paste0(age_bin, "\nn=", n()))%>%
  ungroup()%>%
  mutate(age_bin = factor(age_bin, 
                          levels = sort(unique(age_bin), decreasing = FALSE))) %>%
  data.frame()

range_response <- c(0, 50) # Can use range(df$medv), or user defined range as we do here
range_x <- c(0, 100) # Can use range(df$age), or user defined range as we do here

## ----figureHousing, fig.cap = "A visual representation of the dependency of the median value of owner-occupied homes on proportion of owner-occupied units constructed before 1940 in the Boston Housing dataset.", fig.fullwidth=TRUE, fig.height=4, fig.width=10, fig.align='center',fig.pos="H"----
library(ggplot2)
library(ggpubr)
library(viridis)
# Scatterplot: med vs. age
scatter_plot <- ggplot(df, aes(x = age, y = medv)) +
  geom_point(alpha = 0.5, color = "navy") +
  labs(x = "Proportion of owner-occupied units\nbuilt prior to 1940 [AGE, %]", 
       y = "Median value of owner-occupied homes [MEDV, k$]",
       title = "Median value vs. age of homes") +
  theme_bw()+
  coord_cartesian(xlim=range_x,
                  ylim=range_response)

# Histogram: Distribution of med by age bin
hist_plot <- ggplot(df, aes(x = medv)) +
  geom_histogram(mapping=aes(y=after_stat(density)),
                 position = "identity", breaks = seq(0, 50, 2.5),
                 fill="darkgrey", col="grey50", lwd=0.2, alpha=0.7) +
  geom_rug(sides = "b", color = "navy", alpha = 0.5)+
  facet_wrap(~ age_bin, scales = "free_y", nrow=2) +
  labs(x = "Median value of owner-occupied homes [MEDV, k$]", 
       y = "Probability density", 
       title = "Histogram of median housing values by AGE group") +
  theme_bw()+
  coord_cartesian(xlim=range_response,
                  ylim=c(0, 0.25))
ggarrange(scatter_plot, hist_plot, ncol = 2, nrow = 1,
          widths = c(0.3, 0.7))
# ggsave("./Figures/scatter.pdf", width=10, height=3.5)

## ----SLGPfitting00------------------------------------------------------------
library(SLGP)
modelMAP <- slgp(medv~age, # Use a formula to specify predictors VS response
                 # Can use medv~. for all variables,
                 # Or medv ~ age + var2 + var3 for more variables
                 data=df,
                 method="MAP", #Maximum a posteriori estimation scheme
                 basisFunctionsUsed = "RFF",
                 interpolateBasisFun="WNN", # Accelerate inference
                 hyperparams = list(lengthscale=c(0.15, 0.15), 
                                    # Applied to normalised data
                                    # So 0.15 is 15% of the range of values
                                    sigma2=1), 
                 # Will be re-selected with sigmaEstimationMethod
                 sigmaEstimationMethod = "heuristic", 
                 # Set to heuristic for numerical stability                 
                 predictorsLower= c(range_x[1]),
                 predictorsUpper= c(range_x[2]),
                 responseRange= range_response,
                 opts_BasisFun = list(nFreq=100,
                                      MatParam=5/2),
                 seed=1)

## ----SLGPplotting2, fig.cap = "Predictive probability density of medv at age, seen over slices.", fig.fullwidth=TRUE, fig.height=5, fig.width=10, fig.align='center',fig.pos="H"----


library(SLGP)
library(viridis)
dfGrid <- data.frame(expand.grid(seq(range_x[1], range_x[2], 5), 
                                 seq(range_response[1], range_response[2],, 101)))
colnames(dfGrid) <- c("age", "medv")
pred <- predictSLGP_newNode(SLGPmodel=modelMAP,
                            newNodes = dfGrid)

scale_factor <- 100
ggplot()  +
  labs(y = "Proportion of owner-occupied units\nbuilt prior to 1940 [%]", 
       x = "Median value of owner-occupied homes [k$]",
       title = "Median value vs. age of homes") +
  theme_bw()+
  geom_ribbon(data=pred,
              mapping=aes(x=medv, ymax=scale_factor*pdf_1+age, 
                          ymin=age, group=-age, fill=age),
              col="grey", alpha=0.9)+
  geom_point(data=df,
             mapping=aes(x = medv, y = age), alpha = 0.5, color = "navy")+
  scale_fill_viridis(option = "plasma",
                     guide = guide_colorbar(nrow = 1,
                                            title = "Indexing variable: Proportion of owner-occupied units built prior to 1940",
                                            barheight = unit(2, units = "mm"),
                                            barwidth = unit(55, units = "mm"),
                                            title.position = 'top',
                                            label.position = "bottom",
                                            title.hjust = 0.5))+
  theme(legend.position = "bottom")+
  coord_flip()


## ----SLGPfitting2-------------------------------------------------------------
modelLaplace <- retrainSLGP(SLGPmodel=modelMAP, 
                            newdata = df, 
                            method="Laplace")

## ----SLGPfitting22, eval=FALSE------------------------------------------------
#  # Or equivalent, more explicit in the re-using of the elements
#  # From the SLGP prior
#  
#  modelLaplace <- slgp(medv~age,
#                   data=df,
#                   method="MAP", #Maximum a posteriori estimation scheme
#                   basisFunctionsUsed = "RFF",
#                   interpolateBasisFun="WNN", # Accelerate inference
#                   hyperparams = modelMAP@hyperparams,
#                   sigmaEstimationMethod = "none",# Already selected in the prior
#                   predictorsLower= c(range_x[1]),
#                   predictorsUpper= c(range_x[2]),
#                   responseRange= range_response,
#                   opts_BasisFun = modelMAP@opts_BasisFun,
#                   BasisFunParam = modelMAP@BasisFunParam,
#                   seed=1)

## ----SLGPLaplaceplot, fig.cap = "Predictive probability density (and draws from a Laplace approximation) of medv at age, seen over 3 slices.", fig.fullwidth=TRUE, fig.height=4, fig.width=8, fig.align='center',fig.pos="H"----
# Define the three selected values
selected_values <- c(20, 50, 95)
gap <- 2.5

dfGrid <- data.frame(expand.grid(seq(3), 
                                 seq(range_response[1], range_response[2],, 101)))
colnames(dfGrid) <- c("ID", "medv")
dfGrid$age <- selected_values[dfGrid$ID]
pred <- predictSLGP_newNode(SLGPmodel=modelLaplace,
                            newNodes = dfGrid)
pred$meanpdf <- rowMeans(pred[, -c(1:3)])

library(tidyr)
# Filter the data: keep values within ±5 of the selected ones
df_filtered <- df %>%
  mutate(interval=findInterval(age, c(0, 
                                      selected_values[1]-gap, 
                                      selected_values[1]+gap, 
                                      selected_values[2]-gap, 
                                      selected_values[2]+gap, 
                                      selected_values[3]-gap, 
                                      selected_values[3]+gap)))%>%
  filter(interval %in% c(2, 4, 6))%>%
  group_by(interval)%>%
  mutate(category = paste0("Age close to ", c("", selected_values[1],
                                              "", selected_values[2],
                                              "", selected_values[3])[interval], 
                           "\nn=", n()))
names <- sort(unique(df_filtered$category))
pred$category <- names[pred$ID]

set.seed(1)
selected_cols <- sample(seq(1000), size=10, replace=FALSE)
df_plot <- pred %>%
  dplyr::select(c("age", "medv", "category", 
                  paste0("pdf_", selected_cols)))%>%
  pivot_longer(-c("age", "medv", "category"))


ggplot(mapping=aes(x = medv)) +
  geom_histogram(df_filtered,
                 mapping=aes(y=after_stat(density)),
                 position = "identity", breaks = seq(0, 50, 2.5),
                 fill="darkgrey", col="grey50", lwd=0.2, alpha=0.7) +
  geom_rug(data=df_filtered, sides = "b", color = "navy", alpha = 0.5)+
  geom_line(data=df_plot, mapping=aes(y=value, group=name), 
            color = "black", lwd=0.1, alpha=0.5)+
  geom_line(data=pred, mapping=aes(y=meanpdf, group=category), color = "red")+
  facet_wrap(~ category, scales = "free_y", nrow=1) +
  labs(x = "Median value of owner-occupied homes [k$]", 
       y = "Probability density", 
       title = "Histogram of median value at bins centered at several 'age' values, with width 5\nversus SLGP draws from a Laplace approximation (black curves) and average (red curve)") +
  theme_bw()+
  coord_cartesian(xlim=range_response,
                  ylim=c(0, 0.25))


## ----SLGPfitting3, eval=FALSE-------------------------------------------------
#  modelMCMC <- slgp(medv~age, # Use a formula to specify predictors VS response
#                    # Can use medv~. for all variables,
#                    # Or medv ~ age + var2 + var3 for more variables
#                    data=df,
#                    method="MCMC", #MCMC
#                    basisFunctionsUsed = "RFF",
#                    interpolateBasisFun="WNN", # Accelerate inference
#                    hyperparams = list(lengthscale=c(0.15, 0.15),
#                                       # Applied to normalised data
#                                       # So 0.15 is 15% of the range of values
#                                       sigma2=1),
#                    # Will be re-selected with sigmaEstimationMethod
#                    sigmaEstimationMethod = "heuristic", # Set to heuristic for numerical stability
#                    predictorsLower= c(range_x[1]),
#                    predictorsUpper= c(range_x[2]),
#                    responseRange= range_response,
#                    opts_BasisFun = list(nFreq=100,
#                                         MatParam=5/2),
#                    opts = list(stan_chains=2, stan_iter=1000))

## ----SLGPMCMCplotMoments, fig.cap = "Simultaneous prediction of the fields moments (and associated uncertainty) using a SLGP model", fig.fullwidth=TRUE, fig.height=4, fig.width=10, fig.align='center',fig.pos="H"----
dfX <- data.frame(age=seq(range_x[1], range_x[2], 2))

predMean <- predictSLGP_moments(SLGPmodel=modelLaplace,
                                newNodes = dfX, 
                                power=c(1),
                                centered=FALSE) # Uncentered moments
# For the mean
predVar <- predictSLGP_moments(SLGPmodel=modelLaplace,
                               newNodes = dfX, 
                               power=c(2), 
                               centered=TRUE) # Centered moments
# For the variance, Kurtosis and Skewness

pred <- rbind(predMean, predVar)
pred <- pred %>%
  pivot_longer(-c("age", "power"))%>%
  mutate(value=ifelse(power==2, sqrt(value), value))%>% # Define std
  data.frame()

pred$power <- factor(c("Expected value", 
                       "Standard deviation")[as.numeric(pred$power)],
                     levels=c("Expected value", "Standard deviation"))
df_plot <- pred %>%
  group_by(age, power)%>%
  summarise(q10 = quantile(value, probs=c(0.1)),
            q50 = quantile(value, probs=c(0.5)),
            q90 = quantile(value, probs=c(0.9)),
            mean = mean(value), .groups="keep")%>%
  ungroup() # summarise uncertainty


ggplot(df_plot, mapping=aes(x = age)) +
  geom_ribbon(mapping = aes(ymin=q10, ymax=q90),
              alpha = 0.25, lty=2, col="black", fill="cornflowerblue")+
  geom_line(mapping=aes(y=q50))+
  facet_grid(.~power)+
  labs(x = "Proportion of owner-occupied units built prior to 1940 [AGE, %]", 
       y = "Moment value") +
  theme_bw()+
  coord_cartesian(xlim=range_x,
                  ylim=range_response)


## ----SLGPMCMCplotQuantiles, fig.cap = "Simultaneous quantile prediction using a SLGP model, with estimation performed by MAP", fig.fullwidth=TRUE, fig.height=5, fig.width=10, fig.align='center',fig.pos="H"----
probsL <- c(5, 25, 50, 75, 95)/100
dfX <- data.frame(age=seq(range_x[1], range_x[2], 2))
pred <- predictSLGP_quantiles(SLGPmodel=modelLaplace,
                              newNodes = dfX, 
                              probs=probsL)

df_plot <- pred %>%
  pivot_longer(-c("age", "probs"))%>%
  group_by(age, probs)%>%
  summarise(q10 = quantile(value, probs=c(0.1)),
            q50 = quantile(value, probs=c(0.5)),
            q90 = quantile(value, probs=c(0.9)),
            mean = mean(value), .groups="keep")%>%
  ungroup()%>%
  mutate(probs=factor(paste0("Quantile: ", 100*probs, "%"),
                      levels=paste0("Quantile: ", 100*probsL, "%")))


plot1 <- ggplot(df_plot, mapping=aes(x = age)) +
  geom_ribbon(mapping = aes(ymin=q10, ymax=q90, 
                            col=probs, fill=probs, group=probs),
              alpha = 0.25, lty=3)+
  geom_line(mapping=aes(y=q50, lty="SLGP", 
                        col=probs,group=probs), lwd=0.75)+
  labs(x = "Proportion of owner-occupied units built prior to 1940 [AGE, %]", 
       y = "Median value of owner-occupied homes [MEDV, k$]",
       fill = "Quantile levels",
       col = "Quantile levels", 
       lty="Quantile estimation method") +
  theme_bw()+
  coord_cartesian(xlim=range_x,
                  ylim=range_response)+
  scale_linetype_manual(values=c(2, 1))

plot2 <- ggplot(df, mapping=aes(x=age))+
  geom_histogram(col="navy", fill="grey", bins = 30, alpha = 0.3,
                 breaks=seq(0, 100, 5))+
  theme_minimal()+
  labs(x = NULL, 
       y = "Sample count") 

library(ggpubr)
p_with_marginal <- ggarrange(
  plot2, plot1,
  ncol = 1, heights = c(1, 3),  # adjust height ratio
  align = "v"
)

print(p_with_marginal)
Any scripts or data that you put into this service are public.
SLGP documentation built on Sept. 9, 2025, 5:25 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
SLGP
Spatial Logistic Gaussian Process for Field Density Estimation

inst/doc/IntroductionSLGP.R
In SLGP: Spatial Logistic Gaussian Process for Field Density Estimation

Try the SLGP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SLGP Spatial Logistic Gaussian Process for Field Density Estimation

inst/doc/IntroductionSLGP.R In SLGP: Spatial Logistic Gaussian Process for Field Density Estimation

Try the SLGP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SLGP
Spatial Logistic Gaussian Process for Field Density Estimation

inst/doc/IntroductionSLGP.R
In SLGP: Spatial Logistic Gaussian Process for Field Density Estimation