The goal of outfit is to detect outliers in multivariate functional data
You can install the released version of outfit from CRAN with:
install.packages("outfit")
And the development version from GitHub with:
# install.packages("devtools")
devtools::install_github("pridiltal/outfit")
utilization_curves
datasetlibrary(outfit)
library(tidyverse)
## basic example code
head(utilization_curves)
#> basisid id cpu y subspaceid
#> 1 1 1 0.001007227 129 1 constant_scale_shape
#> 2 1 1 0.002784417 131 1 constant_scale_shape
#> 3 1 1 0.003465359 105 1 constant_scale_shape
#> 4 1 1 0.005992975 96 1 constant_scale_shape
#> 5 1 1 0.010085913 97 1 constant_scale_shape
#> 6 1 1 0.013900140 113 1 constant_scale_shape
p <- utilization_curves %>%
ggplot(aes(x = cpu, y = y, group = id, color= subspaceid)) +
geom_line() +
facet_wrap(~subspaceid, scales = "free_y", nrow = 2) +
theme(text = element_text(size=10), legend.position = "bottom" )+
scale_color_viridis_d()
print(p)
frq <- utilization_curves %>%
dplyr::select(id, subspaceid) %>%
unique() %>%
group_by(subspaceid) %>%
tally()
print(frq)
#> # A tibble: 8 x 2
#> subspaceid n
#> <chr> <int>
#> 1 0 anomaly_location_shift 1
#> 2 0 anomaly_rw 1
#> 3 0 anomaly_scale_shift 2
#> 4 0 anomaly_shape_shift 1
#> 5 1 constant_scale_shape 75
#> 6 2 constant_scale_shape 75
#> 7 3 constant_scale_shape 75
#> 8 4 constant_scale_shape 75
types <- unique(utilization_curves$subspaceid)
out_data <- utilization_curves %>%
filter(subspaceid %in% c("0 anomaly_location_shift", "0 anomaly_scale_shift", "0 anomaly_shape_shift", "0 anomaly_rw" ))
p <- out_data %>%
ggplot(aes(x = cpu, y = y, group = subspaceid, color= subspaceid)) +
geom_line() +
facet_wrap(vars(subspaceid, id), scales = "free_y", nrow = 2) +
theme(text = element_text(size=10), legend.position = "bottom" )+
scale_color_viridis_d()
p
library(GGally)
data <- utilization_curves %>%
dplyr::select(id, y)
features <- outfit::get_features(data = data, family = "NO")
d <- utilization_curves %>%
dplyr::select(id, subspaceid) %>%
unique()
features <- dplyr::full_join(features, d, by = "id" )
p <- GGally::ggparcoord(features,
columns = 2: (ncol(features)-1), groupColumn = ncol(features), order = "Outlying",
showPoints = TRUE,
alphaLines = 0.4, scale = "uniminmax"
) +
#scale_color_brewer(palette = "RdYlGn") +
scale_color_viridis_d() +
theme(legend.position = "bottom") +
xlab("Features") +
ylab("Values")
# plotly::ggplotly(p)
print(p)
out <- stray::find_HDoutliers(features[, 2:(ncol(features)-1)], alpha = 0.01
)
utilization_curves %>%
filter(id %in%(out$outliers)) %>%
select(id, subspaceid) %>%
unique()
#> id subspaceid
#> 1 305 0 anomaly_rw
score <- data.frame(id = 1:nrow(features), score = out$out_scores)
d <- utilization_curves %>%
dplyr::select(id, subspaceid) %>%
unique()
data_score <- full_join(d,score, by = "id")
data_score %>% arrange(desc(score)) %>% head(10)
#> id subspaceid score
#> 1 305 0 anomaly_rw 1.5584558
#> 2 281 4 constant_scale_shape 0.8457432
#> 3 301 0 anomaly_location_shift 0.6165525
#> 4 202 1 constant_scale_shape 0.4346108
#> 5 204 1 constant_scale_shape 0.4205720
#> 6 212 1 constant_scale_shape 0.4158072
#> 7 214 1 constant_scale_shape 0.3613625
#> 8 279 4 constant_scale_shape 0.3202449
#> 9 296 4 constant_scale_shape 0.2727066
#> 10 234 2 constant_scale_shape 0.2691825
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.