In kkdey/flashr: Factor Loading Adaptive SHrinkage in R

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "README-"
)

flashr

Repo for flashr R package: Factor Loading Adaptive SHrinkage for matrix and tensor data.

Installation

To install dependencies, run in R:

install.packages(c("irlba", "tensr", "devtools"))
devtools::install_github("stephens999/ashr", ref = "uni")

The "uni" branch of the ashr package is required to use any mixing distribution other than a normal.

Because this is currently a private repo, to install flashr you will need a private access token (PAT) which you can generate here: https://github.com/settings/tokens

Then you can run in R:

devtools::install_github("stephenslab/flashr", auth_token = "xxx")

where you replace xxx with your PAT.

To get help ??flashr

Application of FLASH

We load the flashr package

library(flashr)

library(singleCellRNASeqMouseDeng2014)
deng.counts <- exprs(Deng2014MouseESC)
deng.meta_data <- pData(Deng2014MouseESC)
deng.gene_names <- rownames(deng.counts)

We apply the greedy version of FLASH with the variances determined by the voom weights.

ll_deng <- flash.greedy(voom_data, K=10, flash_para = list(tol=1e-3, maxiter_r1 = 50,
                partype="known", sigmae2_true = voom_weights,
                nonnegative=FALSE));

ll_deng <- get(data("flash_deng_ex"))

Processing the factors obtained to check the proportion of variance explained by each factor as well as sparsity and other related features of factors the user may be interested in.

```{echo=TRUE, eval=FALSE} postprocess_ll <- flash_factor_postprocess(ll_deng$l,ll_deng$f, voom_data) pve_percentage <- postprocess_ll$PVE*100

The user can visualize the loadings using the stacked Barchart representation through the function *FactorGGStack* or through multi panel bar chart plot via *FactorGGBar* functionalities.

```r
omega <- ll_deng$l

annotation <- data.frame(
  sample_id = paste0("X", c(1:NROW(omega))),
  label = factor(deng.meta_data$cell_type,
                        levels = c("zy", "early2cell",
"mid2cell", "late2cell","4cell", "8cell", "16cell","earlyblast","midblast","lateblast") ) )

rownames(omega) <- annotation$sample_id

FactorGGStack(loadings = omega[,-1],
                annotation = annotation,
                palette = c(RColorBrewer::brewer.pal(8, "Accent"),RColorBrewer::brewer.pal(4, "Spectral")),
                yaxis_label = "Development Phase",
                order_sample = TRUE,
                figure_title = "Factor Loadings Structure Plot",
                legend_labels = pve_percentage[-1],
                scale=TRUE,
                axis_tick = list(axis_ticks_length = .1,
                                 axis_ticks_lwd_y = .1,
                                 axis_ticks_lwd_x = .1,
                                 axis_label_size = 7,
                                 axis_label_face = "bold"))

Structure Plot

FactorGGBar(loadings = omega,
            annotation = annotation,
            palette = list("mid"="white",
                           "low"="red",
                           "high"="blue",
                           "midpoint"=0),
            yaxis_label = "Population Type",
            figure_title = " ",
            axis_tick = list(axis_ticks_length = .1,
                             axis_ticks_lwd_y = .1,
                             axis_ticks_lwd_x = .1,
                             axis_label_size = 7,
                             axis_label_face = "bold"),
            legend_labels=pve_percentage,
            scale=TRUE,
            panel=list(panel_rows=2,
                       panel_title="Factor Loadings Bar plot",
                       panel_title_fontsize=10,
                       panel_title_font=3))

Structure Plot

The top distinguishing features separating the factors can be extracted as follows

ll_f_scale <- apply(ll_deng$f[,-1],2,function(x)
                                  {
                                      if(sd(x)!=0) {return (x/sd(x))}
                                      else {return (x)}
    })

normalize <- function(x) { return (x/sum(x))}

abs_f_scale <- apply(ll_deng$f[,-1],2,function(x) normalize(abs(x)))
indices <- CountClust::ExtractTopFeatures(abs_f_scale, top_features=100, method="poisson", options="min")
imp_features <- apply(indices, c(1,2), function(x) deng.gene_names[x])

imp_features[1:6,1:6]