simulation/splines.r

library(tidyverse)
library(caret)
theme_set(theme_classic())
library(splines)
library(mgcv)

# Load the data
data("Boston", package = "MASS")
# Split the data into training and test set
set.seed(123)
training.samples <- Boston$medv %>%
  createDataPartition(p = 0.8, list = FALSE)
train.data  <- Boston[training.samples, ]
test.data <- Boston[-training.samples, ]

ggplot(train.data, aes(lstat, medv) ) +
  geom_point() +
  stat_smooth()

knots <- quantile(train.data$lstat, p = c(0.25, 0.5, 0.75))
# Build the model
knots <- quantile(train.data$lstat, p = c(0.25, 0.5, 0.75))
model <- lm (medv ~ bs(lstat, knots = knots), data = train.data)
# Make predictions
predictions <- model %>% predict(test.data)
# Model performance
data.frame(
  RMSE = RMSE(predictions, test.data$medv),
  R2 = R2(predictions, test.data$medv)
)
ggplot(train.data, aes(lstat, medv) ) +
  geom_point() +
  stat_smooth(method = lm, formula = y ~ splines::bs(x, df = 3))

# The term s(lstat) tells the gam() function to find the “best” knots for a spline term.
# Build the model
model <- gam(medv ~ s(lstat), data = train.data,family = "gaussian")
model$coefficients
# Make predictions
predictions <- model %>% predict(test.data)
# Model performance
data.frame(
  RMSE = RMSE(predictions, test.data$medv),
  R2 = R2(predictions, test.data$medv)
)

ggplot(train.data, aes(lstat, medv) ) +
  geom_point() +
  stat_smooth(method = gam, formula = y ~ s(x))

use.data <- res
model <- gam(L2 ~ s(Loc_RP50), data = use.data, family = "gaussian")
model$coefficients

model <- gam(L2_W ~ s(Loc_RP50), data = use.data, family = "gaussian")
model$coefficients

ggplot(use.data, aes(Loc_RP50, L2) ) +
  geom_point() +
  stat_smooth(method = gam, formula = y ~ s(x))

ggplot(use.data, aes(Loc_RP50, L2_W) ) +
  geom_point() +
  stat_smooth(method = gam, formula = y ~ s(x))


ggplot(use.data, aes(Loc_RP50, L3) ) +
  geom_point() +
  stat_smooth(method = gam, formula = y ~ s(x))

ggplot(use.data, aes(Loc_RP50, L3_W) ) +
  geom_point() +
  stat_smooth(method = gam, formula = y ~ s(x))


###################
library(SplinesUtils)

## a toy dataset
set.seed(0)
x <- 1:100 + runif(100, -0.1, 0.1)
y <- poly(x, 9) %*% rnorm(9)
y <- y + rnorm(length(y), 0, 0.2 * sd(y))

## fit a smoothing spline
x <- res$Loc_RP50
y <- res$L2_W

sm <- smooth.spline(x, y)

## coerce "smooth.spline" object to "PiecePoly" object
oo <- SmoothSplineAsPiecePoly(sm)

## plot the spline
plot(oo)

sm_points <- dim(use.data)[1]/10
ggplot(use.data, aes(Loc_RP50, L2) ) +
  geom_point() +
  stat_smooth(method = 'gam',
              # method.args = list(family = "binomial"),
              formula = y ~ splines::ns(x, 60))

## find all stationary / saddle points
xs <- solve(oo, deriv = 1)

ys <- predict(oo, xs)

points(xs, ys, pch = 19)


a1 <- tibble(
  Loc_RP50 = rnorm(100, 100, 10),
  L2 = round(runif(100, 0, 300), 0),
  Weight = runif(100, 10, 50),
  L2_W =  Weight * -0.1 +  Weight^2 * 0.01
)

model <- gam(L2 ~ s(Loc_RP50), data = a1, family = "gaussian")
model$coefficients
summary(model)

predict(model, type = "response")

b <- predict(model)
plot(b)
model <- gam(L2 ~ splines::ns(Loc_RP50, 3), data = a1, family = "gaussian")
sooyongl/ESS documentation built on Dec. 23, 2021, 4:22 a.m.