knitr::opts_chunk$set(dev = "cairo_pdf", message = 'hide', warning = FALSE)
library(VariSel)
library(tidyverse)
library(extrafont)
library(ggsci)
require(tidymodels)
require(furrr)
require(rlang)
require(patchwork)
loadfonts()
 theme_set(theme_bw() + theme_bw() +
             theme(strip.background = element_rect(fill = "white"), 
                   text = element_text(face="bold", family="LM Roman 10", size=16) ))

scale_colour_discrete <- function(...)scale_color_uchicago()
scale_fill_discrete <- function(...)scale_fill_uchicago()

A simlpe vision of the immune system :

\framesubtitle{Or how Astérix and Obélix can kick-off the romans}

```{tikz, tikz-ex2, fig.cap = "DC Th dialogue", fig.ext = 'pdf', echo =FALSE} \definecolor{vert}{RGB}{ 109,205,89} \definecolor{bleu}{RGB}{93,139,172} \definecolor{marron}{RGB}{172,126,93} \definecolor{marronfonce}{RGB}{120,88,65} \definecolor{i1}{RGB}{122,181,168} \definecolor{i2}{RGB}{122,165,181} \definecolor{i3}{RGB}{122,181,139} \definecolor{dc}{RGB}{206,184,181} \definecolor{dc2}{RGB}{123, 87, 229} \definecolor{lt}{RGB}{181,203,206} \tikzstyle{line} = [draw, -latex, color = marron, line width=1pt] \tikzstyle{block} = [rectangle, draw, fill=vert!20, draw= vert!80, text = marronfonce, text width=3.5em, text centered, rounded corners, minimum height=2em,line width=1pt] \tikzstyle{block3} = [rectangle, draw, fill=vert!10, draw= vert!90, text = marronfonce, text width=3em, text centered, rounded corners, minimum height=1.1em,line width=0.7pt] \tikzstyle{inp} = [rectangle, draw, text = marronfonce, text width=4.4em, text centered, rounded corners, minimum height=1em,line width=1pt] \begin{tikzpicture}[node distance = 2cm, auto] % nodes \node(DC){\includegraphics[width= 0.22\textwidth]{cellule_dend}}; \nodeleft of = DC, node distance = 3 cm, color = gray{\includegraphics[width= 0.17\textwidth]{romain.png}}; \noderight of = DC, node distance = 4 cm{\includegraphics[width= 0.17\textwidth]{ast_ob}}; \noderight of = TL, node distance = 3 cm{\includegraphics[width= 0.17\textwidth]{paf-asterix}}; \node[below of = DC, node distance =2.2 cm,text width= 0.17\textwidth,text centered,color =dc!60!black]{ Dendritic cell}; \node[below of = Pert, node distance =2.2 cm,text width= 0.22\textwidth,text centered,color =dc!60!black]{ Perturbators}; \node[below of = TL, node distance =2.2 cm,text width= 0.22\textwidth, text centered,color =lt!60!black]{ Naive Th lymphocytes}; \node[above of = TL, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 1.2 cm, color =lt!70!black]{ Th signals}; \node[above of =DC, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 2 cm, color =dc!70!black]{ DC signals};

   \path [line,color = dc] ([yshift=-0.8 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.8 cm]TL.west);
    \path [line,color = dc] ([yshift=-0.4 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.4 cm]TL.west);
     \path [line,color = dc] ([xshift=- 0.1 cm]DC.east) --  (TL.west);
     \path [line,color = dc] ([yshift=0.4 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.4 cm]TL.west);
      \path [line,color = dc] ([yshift=0.8 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.8 cm]TL.west);

\path line, color = gray -- (DC.west); \path line,color = lt -- ([yshift = -0.7cm]L.west); \path line,color = lt -- ([yshift = -0.35cm]L.west); \path [line,color = lt] ([xshift= -0.4cm]TL.east) -- (L.west);

\path [line,color = lt] ([xshift= -0.4cm, yshift = 0.35cm]TL.east) -- ([yshift = 0.35cm]L.west); \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.7cm]TL.east) -- ([yshift = 0.7cm]L.west); \end{tikzpicture}

# Experimental set up :
\framesubtitle{Ordralphabetix and Cétautomatix are two!}


```{tikz, tikz-ex2b, fig.cap = "DC Th dialogue", fig.ext = 'pdf', echo =FALSE, fig.num=1}
\definecolor{vert}{RGB}{ 109,205,89}
\definecolor{bleu}{RGB}{93,139,172}
\definecolor{marron}{RGB}{172,126,93}
\definecolor{marronfonce}{RGB}{120,88,65}
\definecolor{i1}{RGB}{122,181,168}
\definecolor{i2}{RGB}{122,165,181}
\definecolor{i3}{RGB}{122,181,139}
\definecolor{dc}{RGB}{206,184,181}
\definecolor{dc2}{RGB}{123, 87, 229}
\definecolor{lt}{RGB}{181,203,206}
\tikzstyle{line} = [draw, -latex, color = marron, line width=1pt]
\tikzstyle{block} = [rectangle, draw, fill=vert!20, draw= vert!80, text = marronfonce,
text width=3.5em, text centered, rounded corners, minimum height=2em,line width=1pt]
\tikzstyle{block3} = [rectangle, draw, fill=vert!10, draw= vert!90, text = marronfonce,
text width=3em, text centered, rounded corners, minimum height=1.1em,line width=0.7pt]
\tikzstyle{inp} = [rectangle, draw,  text = marronfonce,
text width=4.4em, text centered, rounded corners, minimum height=1em,line width=1pt]
  \begin{tikzpicture}[node distance = 2cm, auto]
    % nodes
     \node(DC){\includegraphics[width= 0.19\textwidth]{cellule_dend1}}; 
      \node[left of = DC, node distance = 3 cm,  color = gray](Pert){\includegraphics[width= 0.19\textwidth]{romain.png}}; 
       \node[right of = DC, node distance = 4 cm](TL){\includegraphics[width= 0.17\textwidth]{ast_ob}}; 
       \node[right of = TL, node distance = 3 cm](L){\includegraphics[width= 0.17\textwidth]{paf-asterix}};
        \node[below of = DC, node distance =2.2 cm,text width= 0.17\textwidth,text centered,color =dc!60!black]{Ordra };
 \node[below of = Pert, node distance =2.2 cm,text width= 0.22\textwidth,text centered,color =dc!60!black]{ Perturbators};
      \node[below of = TL, node distance =2.2 cm,text width= 0.22\textwidth, text centered,color =lt!60!black]{ Naive Th lymphocytes};
      \node[above of = TL, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 1.2 cm, color =lt!70!black]{ Th signals};
      \node[above of =DC, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 2 cm, color =dc!70!black]{ DC signals};

       \path [line,color = dc] ([yshift=-0.8 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.8 cm]TL.west);
        \path [line,color = dc] ([yshift=-0.4 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.4 cm]TL.west);
         \path [line,color = dc] ([xshift=- 0.1 cm]DC.east) --  (TL.west);
         \path [line,color = dc] ([yshift=0.4 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.4 cm]TL.west);
          \path [line,color = dc] ([yshift=0.8 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.8 cm]TL.west);

\path [line, color = gray](Pert) -- (DC.west);
 \path [line,color = lt]([xshift= -0.4cm, yshift = -0.7cm]TL.east) -- ([yshift = -0.7cm]L.west);
  \path [line,color = lt]([xshift= -0.4cm, yshift = -0.35cm]TL.east) -- ([yshift = -0.35cm]L.west);
  \path [line,color = lt] ([xshift= -0.4cm]TL.east) -- (L.west);

   \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.35cm]TL.east) -- ([yshift = 0.35cm]L.west);
   \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.7cm]TL.east) -- ([yshift = 0.7cm]L.west);
        \end{tikzpicture}

  \begin{tikzpicture}[node distance = 2cm, auto]
    % nodes
     \node(DC){\includegraphics[width= 0.19\textwidth]{cellule_dend2}}; 
      \node[left of = DC, node distance = 3 cm,  color = gray](Pert){\includegraphics[width= 0.19\textwidth]{romain.png}}; 
       \node[right of = DC, node distance = 4 cm](TL){\includegraphics[width= 0.17\textwidth]{ast_ob}}; 
       \node[right of = TL, node distance = 3 cm](L){\includegraphics[width= 0.17\textwidth]{paf-asterix}};
        \node[below of = DC, node distance =2.2 cm,text width= 0.17\textwidth,text centered,color =dc!60!black]{ Cetau};
 \node[below of = Pert, node distance =2.2 cm,text width= 0.22\textwidth,text centered,color =dc!60!black]{ Perturbators};
      \node[below of = TL, node distance =2.2 cm,text width= 0.22\textwidth, text centered,color =lt!60!black]{ Naive Th lymphocytes};
      \node[above of = TL, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 1.2 cm, color =lt!70!black]{ Th signals};
      \node[above of =DC, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 2 cm, color =dc!70!black]{ DC signals};

       \path [line,color = dc] ([yshift=-0.8 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.8 cm]TL.west);
        \path [line,color = dc] ([yshift=-0.4 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.4 cm]TL.west);
         \path [line,color = dc] ([xshift=- 0.1 cm]DC.east) --  (TL.west);
         \path [line,color = dc] ([yshift=0.4 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.4 cm]TL.west);
          \path [line,color = dc] ([yshift=0.8 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.8 cm]TL.west);

\path [line, color = gray](Pert) -- (DC.west);
 \path [line,color = lt]([xshift= -0.4cm, yshift = -0.7cm]TL.east) -- ([yshift = -0.7cm]L.west);
  \path [line,color = lt]([xshift= -0.4cm, yshift = -0.35cm]TL.east) -- ([yshift = -0.35cm]L.west);
  \path [line,color = lt] ([xshift= -0.4cm]TL.east) -- (L.west);

   \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.35cm]TL.east) -- ([yshift = 0.35cm]L.west);
   \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.7cm]TL.east) -- ([yshift = 0.7cm]L.west);
        \end{tikzpicture}

Statistical modelling

\framesubtitle{Understand the gallic language}

\begin{itemize} \item \textbf{Dataset description:}

\begin{itemize} \item $\boldsymbol{X}$: $n\times p$ design matrix \includegraphics[width =0.12\textwidth]{cellule_dend} \item $\boldsymbol{Y}$: $n\times q$ response matrix \includegraphics[width =0.12\textwidth]{ast_ob} \end{itemize}

\item \textbf{Question:} Which variables influence the responses?

\item \textbf{Approach:} \begin{itemize} \item Variable selection in $$ \boldsymbol{Y}=\boldsymbol{XB}+\boldsymbol{E}, $$ where \begin{itemize} \item $\boldsymbol{B}$: $p\times q$ \textbf{sparse} coefficients matrix \item $\boldsymbol{E}$: $n\times q$ error matrix with $$\forall i\in{1,\dots, n}, \; (E_{i,1},\dots,E_{i,q})\stackrel{iid}{\sim}~\mathcal{N}(0,\boldsymbol{\Sigma}_q)$$

\end{itemize} \item We take the dependence into account by estimating $\boldsymbol{\Sigma}_q$. \end{itemize} \end{itemize}

Statistical modelling

\framesubtitle{Understand the gallic language}

\begin{itemize} \item \textbf{Dataset description:}

\begin{itemize} \item $\boldsymbol{X}$: $n\times p$ design matrix \includegraphics[width =0.12\textwidth]{cellule_dend} \item $\boldsymbol{Y}$: $n\times q$ response matrix \includegraphics[width =0.12\textwidth]{ast_ob} \end{itemize}

\item \textbf{Question:} Which variables influence the responses?

\item \textbf{Approach:} \begin{itemize} \item Variable selection in $$ \boldsymbol{Y}=\boldsymbol{XB}+\boldsymbol{E}, $$ where \begin{itemize} \item $\boldsymbol{B}$: $p\times q$ \textbf{sparse} coefficients matrix \item $\boldsymbol{E}$: $n\times q$ error matrix with $$\forall i\in{1,\dots, n}, \; (E_{i,1},\dots,E_{i,q})\stackrel{iid}{\sim}~\mathcal{N}(0,\boldsymbol{\Sigma}_q)$$

\end{itemize} \item We take the dependence into account by estimating $\boldsymbol{\Sigma}_q$. \end{itemize} \end{itemize}

Statistical modelling

\begin{table} \centering \begin{tabular}{l:c:rrr} \hline & \textcolor{outcome}{\textbf{dc}} & \multicolumn{3}{c}{\textcolor{outcome}{\textbf{DC_sign}}} \ \hline & & IL12p70 & TNFa & IL10 \ \hline 1 & Ordra & 32.11 & 859.27 & 10.1 \ 2 & Ordra & 10.20 & 661.92 & 8.40 \ \vdots & \vdots & \vdots & \vdots & \vdots \ 428 & Cétau & 9.90 & 711.26 & 22.54 \ \hline \end{tabular} \end{table} \vspace{-0.5cm} $$\Downarrow$$

\vspace{-0.5cm} \begin{table} \centering \begin{tabular}{l:rrrrrr} \hline & \multicolumn{6}{c}{\textcolor{outcome}{\textbf{X}}} \ \hline & IL12p70 & TNFa & IL10 & IL12p70 & TNFa & IL10 \ & in Ordra & in Ordra & in Ordra & in Cétau & in Cétau & in Cétau \ \hline 1 & 32.11 & 859.27 & 10.1 & 0 & 0 & 0 \ 2 & 10.20 & 661.92 & 8.40 & 0 & 0 & 0 \ \vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \ 428 & 0 & 0 & 0 & 9.90 & 711.26 & 22.54 \ \hline \end{tabular} \end{table}

Differents penalties : for differents point of view

\begin{itemize} \item \textcolor{outcome}{\textbf{Lasso}} : \textit{select variables without taking into account potential links.} \begin{equation} \label{grouplasso} \widehat{b}L = \Am{b} \left\lbrace||y-\bX b||_2^2 + \lambda ||b||_1 \right\rbrace, \end{equation} \item \textcolor{outcome}{\textbf{Group-Lasso}} : \textit{select a group of variables.} \begin{equation} \label{grouplasso} \widehat{b}G = \Am{b_1, \dots, b_L} \left\lbrace||y-\sum_{1 \leq l \leq L}\bX_{(l)} b_{(l)}||2^2 + \lambda\sum{1 \leq l \leq L}\sqrt{p_l} ||b_l||_2\right\rbrace, \end{equation} \item \textcolor{outcome}{\textbf{Fused-Lasso}} : \textit{influence a group of variables to have the same coefficient.} \begin{equation}\label{fuselasso} \widehat{b}F =\Am{b}||y-\bX b||2^2 + \left\lbrace\lambda_1\sum{(i,j) \in \mathcal{G}} |b_i - b_j|+ \lambda_2 ||b||_1\right\rbrace, \end{equation} \end{itemize}

Differents penalties : for differents point of view

\begin{itemize} \item \textcolor{outcome}{\textbf{Lasso}} : \textit{select variables without taking into account potential links.} \begin{equation} \label{grouplasso} \widehat{b}L = \Am{b} \left\lbrace||y-\bX b||_2^2 + \lambda ||b||_1 \right\rbrace, \end{equation} \item \textcolor{outcome}{\textbf{Group-Lasso}} : \textit{select a group of variables.} \begin{equation} \label{grouplasso} \widehat{b}G = \Am{b_1, \dots, b_L} \left\lbrace||y-\sum_{1 \leq l \leq L}\bX_{(l)} b_{(l)}||2^2 + \lambda\sum{1 \leq l \leq L}\sqrt{p_l} ||b_l||_2\right\rbrace, \end{equation} \item \textcolor{outcome}{\textbf{Fused-Lasso}} : \textit{influence a group of variables to have the same coefficient.} \begin{equation}\label{fuselasso} \widehat{b}F =\Am{b}||y-\bX b||2^2 + \left\lbrace\lambda_1\sum{(i,j) \in \mathcal{G}} |b_i - b_j|+ \lambda_2 ||b||_1\right\rbrace, \end{equation} \end{itemize}

VariSel for one model type

set.seed(4)
load("raw_data.RData")
load("data_Tmod_sspdc.RData")
X <- Xbrt %>% as.data.frame() %>%
  mutate(pert_dc= pert_dc, dc= dc) %>%
  group_by(pert_dc, dc) %>%
  summarize_if(is.numeric, mean) %>% ungroup()  %>%
  as.data.frame()
# 
# Y <- Y_bxcx %>% as.data.frame() %>% 
#   mutate(pert_dc= pert_dc, dc= dc) %>% 
#   group_by(pert_dc, dc) %>% 
#   summarize_if(is.numeric, mean) %>% 
#   ungroup() %>% as.data.frame()
 DC_sign <- X_bxcx[,c(36,37, 40)]
 T_sign <- Y_bxcx[,c(2,10)]
# DC_sign <- X %>% select(-dc,-pert_dc) %>%
#   as.matrix() %>% scale()
# 
# T_sign <- Y %>% select(IFNg,  IL3) %>%
#   as.matrix() %>% scale()
# dc <- Y %>% pull(dc)
 dc[dc == " bDC"] <- "Ordra"
dc[dc == " bDC"] <- "Cetau"
X <-  model.matrix(~DC_sign:dc -1)
mod <-  train_VariSel( Y = T_sign, 
                       X = X, 
                       sepx = ":", 
                       type ="group_multi_regr",
                       type_S12_inv= "emp")
mod <-  train_VariSel( Y = T_sign,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="group_multi_regr") 

VariSel for one model type

set.seed(4)
load("raw_data.RData")
load("data_Tmod_sspdc.RData")
# X <- X_bxcx %>% as.data.frame() %>% 
#   mutate(pert_dc= pert_dc, dc= dc) %>% 
#   group_by(pert_dc, dc) %>% 
#   summarize_if(is.numeric, mean) %>% ungroup()  %>% 
#   as.data.frame()
# 
# Y <- Y_bxcx %>% as.data.frame() %>% 
#   mutate(pert_dc= pert_dc, dc= dc) %>% 
#   group_by(pert_dc, dc) %>% 
#   summarize_if(is.numeric, mean) %>% 
#   ungroup() %>% as.data.frame()
 DC_sign <- X_bxcx[,c(36,37, 40)]
 T_sign <- Y_bxcx[,c(2,10)]
# DC_sign <- X %>% select(-dc,-pert_dc) %>%
#   as.matrix() %>% scale()
# 
# T_sign <- Y %>% select(IFNg,  IL3) %>%
#   as.matrix() %>% scale()
# dc <- Y %>% pull(dc)
 dc <- as.character(dc)
dc[dc == " bDC"] <- "Ordra"
dc[dc == " bDC"] <- "Cetau"
X <-  model.matrix(~DC_sign:dc -1)
mod <-  train_VariSel( Y = T_sign, 
                       X = X, 
                       sepx = ":", 
                       type ="group_multi_regr",
                       type_S12_inv= "emp")
mod <-  train_VariSel( Y = T_sign,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="group_multi_regr") 

VariSel for one model type : Outcome

plot(mod)

Comparison of different modelling strategy

col <- pal_uchicago()(6)
m2 <-  train_VariSel( Y = T_sign,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="group_multi_both")
m3 <-  train_VariSel( Y = T_sign,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="fused_multi_both")
m4 <-  train_VariSel( Y = T_sign,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="fused_multi_regr")
m5 <-   train_VariSel( Y = T_sign,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="lasso_multi")
compar_path(mods = list(mod,m2,m3,m4))

Models selection

ct <- compar_type( Y = T_sign, regressors = DC_sign,  
                   group = dc, 
 types = c("group_multi_regr" , "group_multi_both" ,
  "fused_multi_regr", "fused_multi_both",
  "lasso_multi" ), times = 10)
plot_ct(ct) + labs(title ='')

Best models representation

bm <- get_best_models(ct,criterion = "MSE_boot")
plot_md(bm)

Conclusion

This is an R package to perform variable selection in multivariate linear models. It can \begin{itemize} \item Associate explicative variables \item Associate responses \item Associate both explicative variables and responses \item Let all variables 'free', without assoicaiting any of them \end{itemize}

Come and see the vignette! \textcolor{outcome}{https://github.com/Marie-PerrotDockes/VariSel}



Marie-PerrotDockes/VariSel documentation built on May 7, 2020, 1:09 a.m.