Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
warning = FALSE,
fig.dim = c(7, 4.5),
comment = "#>"
)
## -----------------------------------------------------------------------------
library(ssMRCD)
library(dplyr)
library(ggplot2)
library(robustbase)
library(tidyr)
library(ggridges)
## ----eval = FALSE-------------------------------------------------------------
# ? weatherHoheWarte
## ----load data----------------------------------------------------------------
data("weatherHoheWarte")
head(weatherHoheWarte)
## ----data preparation---------------------------------------------------------
varnames_short= c("cl", "rad", "vp", "wmax", "ap", "hum", "prec", "sight", "sun",
"tmax", "tmin", "t", "w")
# filter data
weather = weatherHoheWarte %>%
dplyr::filter(year > 2000)
# select variables
X_data = weather %>%
select(cloud_cover:wind_velocity) %>%
as.matrix
colnames(X_data) = varnames_short
# get number of variables
p = dim(X_data)[2]
## ----scale data---------------------------------------------------------------
X_data = scale(X_data, center = robustbase::colMedians(X_data), scale = apply(X = X_data, MARGIN = 2, FUN = mad))
## ----groups and weights-------------------------------------------------------
N_groups = as.numeric(as.factor(weather$year))
N = max(N_groups)
W = time_weights(max(N_groups), c(5:1))
## ----hyperparameter tuning, eval = FALSE--------------------------------------
# set.seed(1234)
#
# lambda = seq(0, 1, by = 0.05)
# opt_lambda = ssMRCD(X = X_data,
# groups = N_groups,
# weights = W,
# lambda = lambda,
# tuning = list(method = "residuals", plot = TRUE),
# alpha = 0.75)
## -----------------------------------------------------------------------------
lambda_grid = seq(0, 1, by = 0.05)
residual_values = c(3.199500,3.188801,3.182297,3.175784,3.171462,3.167695,3.165151,3.162758,
3.161896,3.161929,3.162562, 3.163831,3.165651,3.168228,3.171867,3.175790,
3.180868,3.186437,3.192634,3.200292,3.208845)
lambda_opt = 0.4
ggplot() +
geom_line(aes(x = lambda_grid,
y = residual_values)) +
geom_point(aes(x = lambda_grid,
y = residual_values)) +
geom_point(aes(x = lambda_opt,
y = residual_values[lambda_grid == lambda_opt]),
col = "red") +
labs(x = expression(lambda),
y = "Residual Value",
title = "Optimal Smoothing Based on Residuals") +
theme_minimal()
## ----ssMRCD-------------------------------------------------------------------
# get optimal smoothing parameter and estimator
set.seed(123)
ssMRCD_weather = ssMRCD(X = X_data,
groups = weather$year,
weights = W,
lambda = 0.4,
alpha = 0.75)
# collect covariance matrices
COVS = ssMRCD_weather$MRCDcov
plot(ssMRCD_weather, type = c("ellipses", "convergence"), variables = c("hum", "sun"))
## ----dynamic means------------------------------------------------------------
# collect means
mu_timeseries = do.call(cbind, ssMRCD_weather$MRCDmu)
colnames(mu_timeseries) = 2001:2023
rownames(mu_timeseries) = colnames(X_data)
# plot means dependent on year
ggplot(mu_timeseries %>%
data.frame %>%
mutate(Var1 = rownames(.)) %>%
tidyr::pivot_longer(cols = X2001:X2023) %>%
group_by(Var1) %>%
mutate(value = (value-min(value))/(max(value) - min(value)),
name = as.numeric(gsub("X", "", name)))) +
geom_line(aes(x = name, y = value, group = Var1)) +
geom_smooth(aes(x = name, y = value, group = Var1), se = F) +
facet_wrap(vars(Var1)) +
theme_minimal() +
labs(x = "", y = "")
## ----pca tuning, eval = TRUE--------------------------------------------------
set.seed(12345)
pca = msPCA(eta = seq(0, 3, 0.25), gamma = 0.5, COVS = COVS, k = p)
pca$plot_tpo
## -----------------------------------------------------------------------------
summary(pca, k = 3)
## ----scree plot---------------------------------------------------------------
# plot scree plot
screeplot(x = pca,
text = TRUE,
size = 3,
gnames = 2001:2023,
cutoff = 0.8)
## ----optimal k----------------------------------------------------------------
# optimal number of components
optimal_k = 3
## ----align loadings-----------------------------------------------------------
?align
pca$PC[, 1:optimal_k, ] = align(PC = pca$PC[, 1:optimal_k, ],type = "mean")
## ----plot loadings------------------------------------------------------------
# plot aligned loadings
plot(x = pca,
type = "loadings",
gnames = 2001:2023,
k = 1)
plot(x = pca,
type = "loadings",
gnames = 2001:2023,
k = 2)
plot(x = pca,
type = "loadings",
gnames = 2001:2023,
k = 3)
## ----biplot-------------------------------------------------------------------
biplot(pca)
## ----scores-------------------------------------------------------------------
sc = scores(X = X_data, groups = weather$year, PC = pca$PC, mu = ssMRCD_weather$MRCDmu, Sigma = ssMRCD_weather$MRCDcov)
sc2 = scores(PC = pca$PC, ssMRCD = ssMRCD_weather)
## -----------------------------------------------------------------------------
plot(x = pca, ssMRCD = ssMRCD_weather, type = "scores")
## ----distance distribution----------------------------------------------------
# distances
score_sd = sc$SD
score_od = sc$OD
dat = data.frame(time = weather$time,
year = weather$year,
OD = score_od,
SD = score_sd,
groups = weather$year)
dat_melt = dat %>% tidyr::pivot_longer(cols = c("OD", "SD"))
ggplot(dat_melt %>%
dplyr::filter(name %in% c( "SD", "OD"))) +
ggridges::geom_density_ridges_gradient(aes(x = value,
y = groups,
fill = as.factor(groups),
group = groups),
scale = 10,
rel_min_height = 0.01,
gradient_lwd = 0.5) +
scale_x_continuous(expand = c(0.03, 0)) +
scale_y_discrete(expand = c(0.0, 0, 0.1, 0)) +
scale_fill_discrete(name = "") +
ggridges::theme_ridges(font_size = 13, grid = TRUE) +
theme(axis.title.y = element_blank(),
legend.position = "right",
panel.grid.major.y = element_line()
) +
labs(x = "") +
facet_grid(cols = vars(name), scales = "free")
## ----distance distance plot---------------------------------------------------
plot(pca, type = "score_distances", ssMRCD = ssMRCD_weather, k = 3)
## ----gif, eval = FALSE--------------------------------------------------------
# # use function saveGIF
# library(animation)
# param = 2001:2023
#
# saveGIF(interval = 0.1,
# movie.name = "PCA_scoreDist_bivariate.gif",
# expr = {
# for (i in param ){
# g = ggplot(dat %>% dplyr::filter (year %in% seq(i-5, i+5, 1))) +
# geom_density_2d_filled(aes(x = SD,
# y = OD),
# alpha = 1) +
# scale_x_sqrt(limits = c(0,40)) +
# ylim(c(0, 6.2)) +
# labs(title = i)
#
# print(g)
# }}
# )
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.