inst/doc/SRPCAMS.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE, 
  warning = FALSE, 
  fig.dim = c(7, 4.5),
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(ssMRCD)
library(dplyr)
library(ggplot2)
library(robustbase)
library(tidyr)
library(ggridges)

## ----eval = FALSE-------------------------------------------------------------
# ? weatherHoheWarte

## ----load data----------------------------------------------------------------
data("weatherHoheWarte")
head(weatherHoheWarte)

## ----data preparation---------------------------------------------------------
varnames_short= c("cl", "rad", "vp", "wmax", "ap", "hum", "prec", "sight", "sun", 
               "tmax", "tmin", "t", "w")

# filter data
weather =  weatherHoheWarte %>% 
  dplyr::filter(year > 2000)

# select variables
X_data = weather %>% 
  select(cloud_cover:wind_velocity)  %>%
  as.matrix
colnames(X_data) = varnames_short

# get number of variables
p = dim(X_data)[2]

## ----scale data---------------------------------------------------------------
X_data = scale(X_data, center = robustbase::colMedians(X_data), scale = apply(X = X_data, MARGIN = 2, FUN = mad))

## ----groups and weights-------------------------------------------------------
N_groups = as.numeric(as.factor(weather$year))
N = max(N_groups)
W = time_weights(max(N_groups), c(5:1))

## ----hyperparameter tuning, eval = FALSE--------------------------------------
# set.seed(1234)
# 
# lambda = seq(0, 1, by = 0.05)
# opt_lambda = ssMRCD(X = X_data,
#                     groups = N_groups,
#                     weights = W,
#                     lambda = lambda,
#                     tuning = list(method = "residuals", plot = TRUE),
#                     alpha = 0.75)

## -----------------------------------------------------------------------------
lambda_grid = seq(0, 1, by = 0.05)
residual_values = c(3.199500,3.188801,3.182297,3.175784,3.171462,3.167695,3.165151,3.162758,
                    3.161896,3.161929,3.162562, 3.163831,3.165651,3.168228,3.171867,3.175790,
                    3.180868,3.186437,3.192634,3.200292,3.208845)
lambda_opt = 0.4

ggplot() + 
  geom_line(aes(x = lambda_grid,
                y = residual_values)) +
  geom_point(aes(x = lambda_grid,
                y = residual_values)) +
  geom_point(aes(x = lambda_opt,
                y = residual_values[lambda_grid == lambda_opt]),
             col = "red") +
  labs(x = expression(lambda),
       y = "Residual Value",
       title = "Optimal Smoothing Based on Residuals") +
  theme_minimal()

## ----ssMRCD-------------------------------------------------------------------
# get optimal smoothing parameter and estimator
set.seed(123)
ssMRCD_weather = ssMRCD(X = X_data,
                        groups = weather$year,
                        weights = W,
                        lambda = 0.4,
                        alpha = 0.75)

# collect covariance matrices
COVS = ssMRCD_weather$MRCDcov

plot(ssMRCD_weather, type = c("ellipses", "convergence"), variables = c("hum", "sun"))

## ----dynamic means------------------------------------------------------------
# collect means
mu_timeseries = do.call(cbind, ssMRCD_weather$MRCDmu)
colnames(mu_timeseries) = 2001:2023
rownames(mu_timeseries) = colnames(X_data)

# plot means dependent on year
ggplot(mu_timeseries %>%
         data.frame %>%
         mutate(Var1 = rownames(.)) %>%
         tidyr::pivot_longer(cols = X2001:X2023) %>% 
         group_by(Var1) %>% 
         mutate(value = (value-min(value))/(max(value) - min(value)),
                name = as.numeric(gsub("X", "", name)))) +
  geom_line(aes(x = name, y = value, group = Var1)) + 
  geom_smooth(aes(x = name, y = value, group = Var1), se = F) + 
  facet_wrap(vars(Var1)) + 
  theme_minimal() +
  labs(x = "", y = "")

## ----pca tuning, eval = TRUE--------------------------------------------------
set.seed(12345)
pca = msPCA(eta = seq(0, 3, 0.25), gamma = 0.5, COVS = COVS, k = p)
pca$plot_tpo

## -----------------------------------------------------------------------------
summary(pca, k = 3)

## ----scree plot---------------------------------------------------------------
# plot scree plot
screeplot(x = pca, 
                 text = TRUE,
                 size = 3, 
                 gnames = 2001:2023,
                 cutoff = 0.8)

## ----optimal k----------------------------------------------------------------
# optimal number of components
optimal_k = 3

## ----align loadings-----------------------------------------------------------
?align
pca$PC[, 1:optimal_k, ] = align(PC = pca$PC[, 1:optimal_k, ],type = "mean")

## ----plot loadings------------------------------------------------------------
# plot aligned loadings
plot(x = pca, 
     type = "loadings",
     gnames = 2001:2023, 
     k = 1) 

plot(x = pca, 
     type = "loadings",
     gnames = 2001:2023, 
     k = 2) 

plot(x = pca, 
     type = "loadings", 
     gnames = 2001:2023, 
     k = 3) 

## ----biplot-------------------------------------------------------------------
biplot(pca)

## ----scores-------------------------------------------------------------------
sc = scores(X = X_data, groups = weather$year, PC = pca$PC, mu = ssMRCD_weather$MRCDmu, Sigma = ssMRCD_weather$MRCDcov)
sc2 = scores(PC = pca$PC, ssMRCD = ssMRCD_weather)

## -----------------------------------------------------------------------------
plot(x = pca, ssMRCD = ssMRCD_weather, type = "scores")

## ----distance distribution----------------------------------------------------
# distances
score_sd = sc$SD
score_od = sc$OD

dat = data.frame(time = weather$time,
                 year = weather$year,
                 OD = score_od,
                 SD = score_sd,
                 groups = weather$year)
dat_melt = dat %>% tidyr::pivot_longer(cols = c("OD", "SD"))

ggplot(dat_melt %>% 
         dplyr::filter(name %in% c( "SD", "OD"))) +
  ggridges::geom_density_ridges_gradient(aes(x = value, 
                                   y = groups, 
                                   fill = as.factor(groups), 
                                   group = groups),
                               scale = 10,
                               rel_min_height = 0.01, 
                               gradient_lwd = 0.5) +
  scale_x_continuous(expand = c(0.03, 0)) +
  scale_y_discrete(expand = c(0.0, 0, 0.1, 0)) +
  scale_fill_discrete(name = "") +
  ggridges::theme_ridges(font_size = 13, grid = TRUE) + 
  theme(axis.title.y = element_blank(),
        legend.position = "right",
        panel.grid.major.y = element_line()
        ) + 
  labs(x = "") +
  facet_grid(cols = vars(name), scales = "free") 

## ----distance distance plot---------------------------------------------------
plot(pca, type = "score_distances", ssMRCD = ssMRCD_weather, k = 3)

## ----gif, eval = FALSE--------------------------------------------------------
# # use function saveGIF
# library(animation)
# param = 2001:2023
# 
# saveGIF(interval = 0.1,
#         movie.name = "PCA_scoreDist_bivariate.gif",
#         expr = {
#           for (i in param ){
#            g = ggplot(dat %>% dplyr::filter (year %in% seq(i-5, i+5, 1))) +
#                  geom_density_2d_filled(aes(x = SD,
#                      y = OD),
#                      alpha = 1) +
#                   scale_x_sqrt(limits = c(0,40)) +
#                   ylim(c(0, 6.2)) +
#                   labs(title = i)
# 
#             print(g)
#           }}
#         )

Try the ssMRCD package in your browser

Any scripts or data that you put into this service are public.

ssMRCD documentation built on Nov. 5, 2025, 7:44 p.m.