knitr::opts_chunk$set(dev = "cairo_pdf", message = 'hide', warning = FALSE, cache = TRUE)
library(VariSel)
library(tidyverse)
library(extrafont)
library(ggsci)
require(tidymodels)
require(furrr)
require(rlang)
require(patchwork)
loadfonts()
 theme_set(theme_bw() + theme_bw() +
             theme(strip.background = element_rect(fill = "white"), 
                   text = element_text(face="bold", family="LM Roman 10", size=16) ))

scale_colour_discrete <- function(...)scale_color_uchicago()
scale_fill_discrete <- function(...)scale_fill_uchicago()

A simple vision of the immune system :

\framesubtitle{Or how Astérix and Obélix can kick-off the Romans}

```{tikz, tikz-ex2, fig.cap = "DC Th dialogue", fig.ext = 'pdf', echo =FALSE} \definecolor{vert}{RGB}{ 109,205,89} \definecolor{bleu}{RGB}{93,139,172} \definecolor{marron}{RGB}{172,126,93} \definecolor{marronfonce}{RGB}{120,88,65} \definecolor{i1}{RGB}{122,181,168} \definecolor{i2}{RGB}{122,165,181} \definecolor{i3}{RGB}{122,181,139} \definecolor{dc}{RGB}{173,220,153} \definecolor{dc2}{RGB}{123, 87, 229} \definecolor{lt}{RGB}{153,173,220} \definecolor{pert}{RGB}{220,153,173} \tikzstyle{line} = [draw, -latex, color = marron, line width=1pt] \tikzstyle{block} = [rectangle, draw, fill=vert!20, draw= vert!80, text = marronfonce, text width=3.5em, text centered, rounded corners, minimum height=2em,line width=1pt] \tikzstyle{block3} = [rectangle, draw, fill=vert!10, draw= vert!90, text = marronfonce, text width=3em, text centered, rounded corners, minimum height=1.1em,line width=0.7pt] \tikzstyle{inp} = [rectangle, draw, text = marronfonce, text width=4.4em, text centered, rounded corners, minimum height=1em,line width=1pt] \begin{tikzpicture}[node distance = 2cm, auto] % nodes \node(DC){\includegraphics[width= 0.22\textwidth]{cellule_dend}}; \nodeleft of = DC, node distance = 3 cm, color = pert{\includegraphics[width= 0.17\textwidth]{romain.png}}; \noderight of = DC, node distance = 4 cm{\includegraphics[width= 0.17\textwidth]{ast_ob}}; \noderight of = TL, node distance = 3 cm{\includegraphics[width= 0.17\textwidth]{paf-asterix}}; \node[below of = DC, node distance =2.2 cm,text width= 0.17\textwidth,text centered,color =dc!70!black]{ Dendritic cells}; \node[below of = Pert, node distance =2.2 cm,text width= 0.22\textwidth,text centered,color =pert!70!black]{ Perturbators}; \node[below of = TL, node distance =2.2 cm,text width= 0.22\textwidth, text centered,color =lt!70!black]{ \;\;\;\; Th \;\; \;\; lymphocytes}; \node[above of = TL, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 1.2 cm, color =lt!90!black]{ Th responses}; \node[above of =DC, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 2 cm, color =dc!90!black]{ DC signals};

   \path [line,color = dc] ([yshift=-0.8 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.8 cm]TL.west);
    \path [line,color = dc] ([yshift=-0.4 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.4 cm]TL.west);
     \path [line,color = dc] ([xshift=- 0.1 cm]DC.east) --  (TL.west);
     \path [line,color = dc] ([yshift=0.4 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.4 cm]TL.west);
      \path [line,color = dc] ([yshift=0.8 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.8 cm]TL.west);

\path line, color = gray -- (DC.west); \path line,color = lt -- ([yshift = -0.7cm]L.west); \path line,color = lt -- ([yshift = -0.35cm]L.west); \path [line,color = lt] ([xshift= -0.4cm]TL.east) -- (L.west);

\path [line,color = lt] ([xshift= -0.4cm, yshift = 0.35cm]TL.east) -- ([yshift = 0.35cm]L.west); \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.7cm]TL.east) -- ([yshift = 0.7cm]L.west); \end{tikzpicture}

\tiny{ Grandclaudon, M., Perrot-Dockès, M, Trichot, C et al. \textit{ A Quantitative Multivariate Model of Human Dendritic Cell-T Helper Cell Communication (March 15, 2019)}. Available at \url{http://dx.doi.org/10.2139/ssrn.3353217} }
<!--   \node[below of = DC, node distance =3.2 cm,text width= 0.17\textwidth,text centered,color =dc!70!black]{Sentinnelles};    -->
<!--  \node[below of = Pert, node distance =3.2 cm,text width= 0.22\textwidth,text centered,color =pert!70!black]{ Perturbation}; -->
<!--       \node[below of = TL, node distance =3.2 cm,text width= 0.22\textwidth, text centered,color =lt!70!black]{ Warriors}; -->
<!--       \node[below of = TL, node distance =3.2 cm,text width= 0.15\textwidth, text centered, xshift = 1.2 cm, color =lt!90!black]{Actions}; -->
<!--       \node[above of =DC, node distance =3.2 cm,text width= 0.15\textwidth, text centered, xshift = 2 cm, color =dc!90!black]{Warnings}; -->

# Experimental set up :
\framesubtitle{Ordralphabetix and Cétautomatix are two!}


```{tikz, tikz-ex2b, fig.cap = "DC Th dialogue", fig.ext = 'pdf', echo =FALSE, fig.num=1}
\definecolor{vert}{RGB}{ 109,205,89}
\definecolor{bleu}{RGB}{93,139,172}
\definecolor{marron}{RGB}{172,126,93}
\definecolor{marronfonce}{RGB}{120,88,65}
\definecolor{i1}{RGB}{122,181,168}
\definecolor{i2}{RGB}{122,165,181}
\definecolor{i3}{RGB}{122,181,139}
\definecolor{dc}{RGB}{173,220,153}
\definecolor{dc2}{RGB}{123, 87, 229}
\definecolor{lt}{RGB}{153,173,220}
\definecolor{pert}{RGB}{220,153,173}
\tikzstyle{line} = [draw, -latex, color = marron, line width=1pt]
\tikzstyle{block} = [rectangle, draw, fill=vert!20, draw= vert!80, text = marronfonce,
text width=3.5em, text centered, rounded corners, minimum height=2em,line width=1pt]
\tikzstyle{block3} = [rectangle, draw, fill=vert!10, draw= vert!90, text = marronfonce,
text width=3em, text centered, rounded corners, minimum height=1.1em,line width=0.7pt]
\tikzstyle{inp} = [rectangle, draw,  text = marronfonce,
text width=4.4em, text centered, rounded corners, minimum height=1em,line width=1pt]
  \begin{tikzpicture}[node distance = 2cm, auto]
      \node(DC){\includegraphics[width= 0.19\textwidth]{cellule_dend1}}; 
      \node[left of = DC, node distance = 3 cm,  color = pert](Pert){\includegraphics[width= 0.17\textwidth]{romain.png}}; 
       \node[right of = DC, node distance = 4 cm](TL){\includegraphics[width= 0.17\textwidth]{ast_ob}}; 
       \node[right of = TL, node distance = 3 cm](L){\includegraphics[width= 0.17\textwidth]{paf-asterix}};
        \node[below of = DC, node distance =2.2 cm,text width= 0.17\textwidth,text centered,color =dc!70!black]{ \textbf{Ordra}};
 \node[below of = Pert, node distance =2.2 cm,text width= 0.22\textwidth,text centered,color =pert!70!black]{ Perturbators};
      \node[below of = TL, node distance =2.2 cm,text width= 0.22\textwidth, text centered,color =lt!70!black]{  \;\;\;\; Th \;\; \;\; lymphocytes};
      \node[above of = TL, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 1.2 cm, color =lt!90!black]{ Th responses};
      \node[above of =DC, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 2 cm, color =dc!90!black]{ DC signals};





       \path [line,color = dc] ([yshift=-0.8 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.8 cm]TL.west);
        \path [line,color = dc] ([yshift=-0.4 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.4 cm]TL.west);
         \path [line,color = dc] ([xshift=- 0.1 cm]DC.east) --  (TL.west);
         \path [line,color = dc] ([yshift=0.4 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.4 cm]TL.west);
          \path [line,color = dc] ([yshift=0.8 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.8 cm]TL.west);

\path [line, color = gray](Pert) -- (DC.west);
 \path [line,color = lt]([xshift= -0.4cm, yshift = -0.7cm]TL.east) -- ([yshift = -0.7cm]L.west);
  \path [line,color = lt]([xshift= -0.4cm, yshift = -0.35cm]TL.east) -- ([yshift = -0.35cm]L.west);
  \path [line,color = lt] ([xshift= -0.4cm]TL.east) -- (L.west);

   \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.35cm]TL.east) -- ([yshift = 0.35cm]L.west);
   \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.7cm]TL.east) -- ([yshift = 0.7cm]L.west);
        \end{tikzpicture}

  \begin{tikzpicture}[node distance = 2cm, auto]
     \node(DC){\includegraphics[width= 0.19\textwidth]{cellule_dend2}}; 
      \node[left of = DC, node distance = 3 cm,  color = pert](Pert){\includegraphics[width= 0.17\textwidth]{romain.png}}; 
       \node[right of = DC, node distance = 4 cm](TL){\includegraphics[width= 0.17\textwidth]{ast_ob}}; 
       \node[right of = TL, node distance = 3 cm](L){\includegraphics[width= 0.17\textwidth]{paf-asterix}};
        \node[below of = DC, node distance =2.2 cm,text width= 0.17\textwidth,text centered,color =dc!70!black]{ \textbf{Cetau}};
 \node[below of = Pert, node distance =2.2 cm,text width= 0.22\textwidth,text centered,color =pert!70!black]{ Perturbators};
      \node[below of = TL, node distance =2.2 cm,text width= 0.22\textwidth, text centered,color =lt!70!black]{  \;\;\;\; Th \;\; \;\; lymphocytes};
      \node[above of = TL, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 1.2 cm, color =lt!90!black]{ Th responses};
      \node[above of =DC, node distance =1.5 cm,text width= 0.15\textwidth, text centered, xshift = 2 cm, color =dc!90!black]{ DC signals};





       \path [line,color = dc] ([yshift=-0.8 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.8 cm]TL.west);
        \path [line,color = dc] ([yshift=-0.4 cm,xshift=- 0.1 cm]DC.east) -- ([yshift=-0.4 cm]TL.west);
         \path [line,color = dc] ([xshift=- 0.1 cm]DC.east) --  (TL.west);
         \path [line,color = dc] ([yshift=0.4 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.4 cm]TL.west);
          \path [line,color = dc] ([yshift=0.8 cm, xshift=- 0.1 cm]DC.east) --  ([yshift=0.8 cm]TL.west);

\path [line, color = gray](Pert) -- (DC.west);
 \path [line,color = lt]([xshift= -0.4cm, yshift = -0.7cm]TL.east) -- ([yshift = -0.7cm]L.west);
  \path [line,color = lt]([xshift= -0.4cm, yshift = -0.35cm]TL.east) -- ([yshift = -0.35cm]L.west);
  \path [line,color = lt] ([xshift= -0.4cm]TL.east) -- (L.west);

   \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.35cm]TL.east) -- ([yshift = 0.35cm]L.west);
   \path [line,color = lt] ([xshift= -0.4cm, yshift = 0.7cm]TL.east) -- ([yshift = 0.7cm]L.west);
        \end{tikzpicture}

Statistical modelling

\framesubtitle{Understand the Gallic language}

\begin{itemize} \item \textbf{Dataset description:}

\begin{itemize} \item \textcolor{dc!70!black}{$\boldsymbol{X}$}: $n\times p$ design matrix : the DC signals \includegraphics[width =0.12\textwidth]{cellule_dend} \item \textcolor{lt!70!black}{$\boldsymbol{Y}$}: $n\times q$ response matrix : the Th responses \includegraphics[width =0.12\textwidth]{ast_ob} \end{itemize}

\item \textbf{Question:} Which variables influence the responses?

\item \textbf{Approach:} \begin{itemize} \item Variable selection in $$ \boldsymbol{Y}=\boldsymbol{XB}+\boldsymbol{E}, $$ where \begin{itemize} \item $\boldsymbol{B}$: $p\times q$ \textbf{sparse} coefficients matrix \item $\boldsymbol{E}$: $n\times q$ error matrix with $$\forall i\in{1,\dots, n}, \; (E_{i,1},\dots,E_{i,q})\stackrel{iid}{\sim}~\mathcal{N}(0,\boldsymbol{\Sigma}_q)$$

\end{itemize} \item We take the dependence into account by estimating $\boldsymbol{\Sigma}_q$. \end{itemize} \end{itemize}

Statistical modelling

\framesubtitle{Understand the Gallic language}

\begin{itemize} \item \textbf{Dataset description:}

\begin{itemize} \item \textcolor{dc!70!black}{$\boldsymbol{X}$}: $n\times p$ design matrix : the DC signals \includegraphics[width =0.12\textwidth]{cellule_dend} \item \textcolor{lt!70!black}{$\boldsymbol{Y}$}: $n\times q$ response matrix : the Th responses \includegraphics[width =0.12\textwidth]{ast_ob} \end{itemize}

\item \textbf{Question:} Which variables influence the responses?

\item \textbf{Approach:} \begin{itemize} \item Variable selection in $$ \boldsymbol{Y}=\boldsymbol{XB}+\boldsymbol{E}, $$ where \begin{itemize} \item $\boldsymbol{B}$: $p\times q$ \textbf{sparse} coefficients matrix \item $\boldsymbol{E}$: $n\times q$ error matrix with $$\forall i\in{1,\dots, n}, \; (E_{i,1},\dots,E_{i,q})\stackrel{iid}{\sim}~\mathcal{N}(0,\boldsymbol{\Sigma}_q)$$

\end{itemize} \item We take the dependence into account by estimating $\boldsymbol{\Sigma}_q$. \end{itemize} \end{itemize}

Differents penalties : for different point of view

\begin{itemize} \item \textcolor{outcome}{\textbf{Lasso}} : \textit{select variables without taking into account potential links.} \begin{equation} \label{grouplasso} \widehat{b}L = \Am{b} \left\lbrace||y-\bX b||_2^2 + \lambda ||b||_1 \right\rbrace, \end{equation} \item \textcolor{outcome}{\textbf{Group-Lasso}} : \textit{select a group of variables.} \begin{equation} \label{grouplasso} \widehat{b}G = \Am{b_1, \dots, b_L} \left\lbrace||y-\sum_{1 \leq \ell \leq L}\bX_{(\ell)} b_{(\ell)}||2^2 + \lambda\sum{1 \leq \ell \leq L}\sqrt{p_\ell} ||b_\ell||_2\right\rbrace, \end{equation} \item \textcolor{outcome}{\textbf{Fused-Lasso}} : \textit{influence a group of variables to have the same coefficient.} \begin{equation}\label{fuselasso} \widehat{b}F =\Am{b}||y-\bX b||2^2 + \left\lbrace\lambda_1\sum{(i,j) \in \mathcal{G}} |b_i - b_j|+ \lambda_2 ||b||_1\right\rbrace, \end{equation} \end{itemize}

Differents penalties : for different point of view

\begin{itemize} \item \textcolor{outcome}{\textbf{Lasso}} : \textit{select variables without taking into account potential links.} \begin{equation} \label{grouplasso} \widehat{b}L = \Am{b} \left\lbrace||y-\bX b||_2^2 + \lambda ||b||_1 \right\rbrace, \end{equation} \item \textcolor{outcome}{\textbf{Group-Lasso}} : \textit{select a group of variables.} \begin{equation} \label{grouplasso} \widehat{b}G = \Am{b_1, \dots, b_L} \left\lbrace||y-\sum_{1 \leq \ell \leq L}\bX_{(\ell)} b_{(\ell)}||2^2 + \lambda\sum{1 \leq \ell \leq L}\sqrt{p_\ell} ||b_\ell||_2\right\rbrace, \end{equation} \item \textcolor{outcome}{\textbf{Fused-Lasso}} : \textit{influence a group of variables to have the same coefficient.} \begin{equation}\label{fuselasso} \widehat{b}F =\Am{b}||y-\bX b||2^2 + \left\lbrace\lambda_1\sum{(i,j) \in \mathcal{G}} |b_i - b_j|+ \lambda_2 ||b||_1\right\rbrace, \end{equation} \end{itemize}

VariSel for one model type

set.seed(4)
load("raw_data.RData")
load("data_Tmod_sspdc.RData")
X <- Xbrt %>% as.data.frame() %>%
  mutate(pert_dc= pert_dc, dc= dc) %>%
  group_by(pert_dc, dc) %>%
  summarize_if(is.numeric, mean) %>% ungroup()  %>%
  as.data.frame()
# 
# Y <- Y_bxcx %>% as.data.frame() %>% 
#   mutate(pert_dc= pert_dc, dc= dc) %>% 
#   group_by(pert_dc, dc) %>% 
#   summarize_if(is.numeric, mean) %>% 
#   ungroup() %>% as.data.frame()
 DC_sign <- X_bxcx[,c(36,37, 40)]
 T_resp <- Y_bxcx[,c(2,10)]
# DC_sign <- X %>% select(-dc,-pert_dc) %>%
#   as.matrix() %>% scale()
# 
# T_resp <- Y %>% select(IFNg,  IL3) %>%
#   as.matrix() %>% scale()
# dc <- Y %>% pull(dc)
 dc <- as.character(dc)
 dc[dc == "bDC "] <- "Ordra"
dc[dc == "MoDC"] <- "Cetau"
mod <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="group_multi_regr") 
X <-  model.matrix(~DC_sign:dc -1)
mod <-  train_VariSel( Y = T_resp, 
                       X = X, 
                       sepx = ":", 
                       type ="group_multi_regr")

VariSel for one model type

mod <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="group_multi_regr") 
X <-  model.matrix(~DC_sign:dc -1)
mod <-  train_VariSel( Y = T_resp, 
                       X = X, 
                       sepx = ":", 
                       type ="group_multi_regr")

VariSel for one model type : Outcome

plot(mod)

Different modelling strategy

col <- pal_uchicago()(6)
mod <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="group_multi_regr") 
m2 <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="group_multi_both")
m3 <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="fused_multi_both")
m4 <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="fused_multi_regr")
m5 <-   train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="lasso_multi")
compar_path(mods = list(mod,m2,m3,m4))

Different modelling strategy

col <- pal_uchicago()(6)
mod <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="group_multi_regr") 
m2 <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="group_multi_both")
m3 <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="fused_multi_both")
m4 <-  train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="fused_multi_regr")
m5 <-   train_VariSel( Y = T_resp,
                       regressors = DC_sign, 
                       group = dc, 
                       type ="lasso_multi")
compar_path(mods = list(mod,m2,m3,m4))

Models selection

ct <- compar_type( Y = T_resp, regressors = DC_sign,  
                   group = dc, 
 types = c("group_multi_regr" , "group_multi_both" ,
  "fused_multi_regr", "fused_multi_both",
  "lasso_multi" ), times = 10)
plot_ct(ct) + labs(title ='')

Best models representation

bm <- get_best_models(ct,criterion = "MSE_boot")
plot_md(bm)

Conclusion

This is an R package to perform variable selection in multivariate linear models. It can \begin{itemize} \item Associate explicative variables \item Associate responses \item Associate both explicative variables and responses \item Let all variables 'free', without assoiciating any of them \end{itemize}

Come and see the vignette! \textcolor{outcome}{https://github.com/Marie-PerrotDockes/VariSel}



Marie-PerrotDockes/VariSel documentation built on May 7, 2020, 1:09 a.m.