knitr::opts_chunk$set(echo = TRUE)

library(ggplot2)
library(patchwork)
library(dplyr)

localFDA

License Travis build status

Overview

Software companion for the paper "Localization processes for functional data analysis" by Elías, Antonio, Jiménez, Raúl, and Yukich, Joe, (2020) . It provides the code for computing localization processes and localization distances and their application to classification and outlier detection problems.

Installation

#install the package
devtools::install_github("aefdz/localFDA")
#load the package
library(localFDA)

Test usage

Load the example data and plot it.

X <- exampleData
n <- ncol(X)
p <- nrow(X)
t <- as.numeric(rownames(X))

#plot the data set
df_functions <- data.frame(ids = rep(colnames(X), each = p),
                           y = c(X),
                           x = rep(t, n)
                           )

functions_plot <- ggplot(df_functions) + 
                  geom_line(aes(x = x, y = y, group = ids, color = ids), 
                            color = "black", alpha = 0.25) + 
                  xlab("t") + theme(legend.position = "none")


functions_plot

Compute kth empirical localization processes

Empirical version of Equation (1) of the paper. For one focal,

focal <- "1"

localizarionProcesses_focal <- localizationProcesses(X, focal)$lc

Plot localization processes of order $1, 50, 100$ and $200$:

df_lc <- data.frame(k = rep(colnames(localizarionProcesses_focal), each = p),
                           y = c(localizarionProcesses_focal),
                           x = rep(t, n-1)
                           )

lc_plots <- list()
ks <- c(1, 50, 100, 200)

for(i in 1:4){
  lc_plots[[i]] <- functions_plot + 
                   geom_line(data = filter(df_lc, k == paste0("k=", ks[i])), 
                             aes(x = x, y = y, group = k), 
                             color = "blue", size = 1) +
                   geom_line(data = filter(df_functions, ids == focal), 
                             aes(x = x, y = y, group = ids), 
                             color = "red", linetype = "dashed", size = 1)+
                   ggtitle(paste("k = ", ks[i]))
}

wrap_plots(lc_plots)

Compute kth empirical localization distances

Equation (18) of the paper. For one focal,

localizationDistances_focal <- localizationDistances(X, focal)

head(localizationDistances_focal)

Plot the localization distances:

df_ld <- data.frame(k = names(localizationDistances_focal),
                           y = localizationDistances_focal,
                           x = 1:c(n-1)
                           )


ldistances_plot <- ggplot(df_ld, aes(x = x, y = y)) + 
                   geom_point() + 
                   ggtitle("Localization distances for one focal") + 
                   xlab("kth") + ylab("L")

ldistances_plot

Sample $\mu$ and $\sigma$

localizationStatistics_full <- localizationStatistics(X, robustify = TRUE)

#See the mean and sd estimations for k = 1, 100, 200, 400, 600

localizationStatistics_full$trim_mean[c(1, 100, 200, 400, 600)]
localizationStatistics_full$trim_sd[c(1, 100, 200, 400, 600)]

Classification

X <- classificationData

ids_training <- sample(colnames(X), 90)
ids_testing <- setdiff(colnames(X), ids_training)

trainingSample <- X[,ids_training]
testSample <- X[,ids_testing]; colnames(testSample) <- NULL #blind 
classNames <- c("G1", "G2")

classification_results <- localizationClassifier(trainingSample, testSample, classNames, k_opt = 3)

checking <- data.frame(real_classs = ids_testing, 
                      predicted_class =classification_results$test$predicted_class)

checking

Outlier detection

X <- outlierData

outliers <- outlierLocalizationDistance(X, localrule = 0.95, whiskerrule = 1.5)

outliers$outliers_ld_rule

Plot results,

df_functions <- data.frame(ids = rep(colnames(X), each = nrow(X)),
                           y = c(X),
                           x = rep(seq(from = 0, to = 1, length.out = nrow(X)), ncol(X)))


functions_plot <- ggplot(df_functions) + 
                  geom_line(aes(x = x, y = y, group = ids), 
                            color = "black") + 
                  xlab("t") + 
  theme(legend.position = "bottom")+
                  geom_line(data = df_functions[df_functions$ids %in% outliers$outliers_ld_rule,], aes(x = x, y = y, group = ids, color = ids), size = 1) +
  guides(color = guide_legend(title="Detected outliers"))

functions_plot 

References

Elías, Antonio, Jiménez, Raúl and Yukich, Joe (2020). Localization processes for functional data analysis [https://arxiv.org/abs/2007.16059]https://arxiv.org/abs/2007.16059.



aefdz/localFDA documentation built on Jan. 9, 2021, 6:42 p.m.