knitr::opts_chunk$set(fig.width = 8,
                      fig.height = 8,
                      fig.path = 'figures/temp/4_statistics_integrative/',
                      echo = FALSE,
                      warning = FALSE)

Multiblocks data analysis

The RGCCA and SGCCA approaches

@tenenhaus_variable_2014

@tenenhaus_regularized_2011

The mixOmics software

@rohart_mixomics:_2017

Application to the LATMX2 dataset

selvar.i <- 20 # number of features to select

latmx2.mset <- phenomis::reading(LATMX2::statistics_intraomics_dir.c(),
                                 report.c = "none")

sgccda.ls <- list()

for (gene.c in LATMX2::genes.vc()) {

  # gene.c <- "LAT"
  wtgene.vc <- c("WT", gene.c)

  for (tissue.c in LATMX2::tissues.vc()) {

    # tissue.c <- "plasma"

    mset <- LATMX2::subsetting(latmx2.mset,
                               genes.vc = wtgene.vc,
                               tissues.vc = tissue.c,
                               common_samples.l = TRUE)

    # mset <- ProMetIA::abbrev_mset(mset)

    clinics.eset <- mset[["clinics"]]

    gene.fc <- factor(Biobase::pData(clinics.eset)[, "gene"],
                      levels = wtgene.vc)
    sex.fc <- factor(Biobase::pData(clinics.eset)[, "sex"],
                      levels = LATMX2::sex.vc())

    mset <- mset[, setdiff(names(mset), "clinics")]

    exprs.ls <- MultiDataSet::as.list(mset)

    # ## using 'namecustom' metabolite names to avoid duplication between datasets
    # 
    # fdata.ls <- Biobase::fData(mset)
    # 
    # for (metset_name.c in grep("met", names(exprs.ls), value = TRUE)) {
    #   exprs.mn <- exprs.ls[[metset_name.c]]
    #   fdata.df <- fdata.ls[[metset_name.c]]
    #   rownames(exprs.mn) <- fdata.df[, "namecustom"]
    #   exprs.ls[[metset_name.c]] <- exprs.mn
    # }
    # 
    # rownames.ls <- lapply(exprs.ls, rownames)
    # rownames.vc <- unlist(rownames.ls)
    # rownames_dup.vc <- rownames.vc[duplicated(rownames.vc)]
    # stopifnot(length(rownames_dup.vc) == 0)

    ## Correcting the 'sex' effect

    exprs.ls <- lapply(exprs.ls,
                       function(exprs.mn)
                         t(sva::ComBat(exprs.mn, sex.fc)))

    ## Design matrix

    # We create the design matrix. The weights were set to 0.1 between the blocks of omics and to 1 between the blocks of omics and the response block.

    design.mn <- matrix(0.1,
                        ncol = length(exprs.ls),
                        nrow = length(exprs.ls), 
                        dimnames = list(names(exprs.ls),
                                        names(exprs.ls)))

    diag(design.mn) <- 0

    keep_x.ls <- lapply(names(exprs.ls),
                        function(set.c) rep(selvar.i, 2))
    names(keep_x.ls) <- names(exprs.ls)

    block_splsda <- mixOmics::block.splsda(X = exprs.ls,
                                           Y = gene.fc, 
                                           ncomp = 2,
                                           keepX = keep_x.ls,
                                           design = design.mn)

    gene_tissue.c <- paste0(gene.c, "_", tissue.c)

    sgccda.ls[[gene_tissue.c]] <- list(gene.fc = gene.fc,
                                       sex.fc = sex.fc,
                                       exprs.ls = exprs.ls,
                                       block_splsda = block_splsda)

  }
}

Scores

plot_diablo <- lapply(sgccda.ls,
                      function(data_model.ls)
                        mixOmics::plotDiablo(data_model.ls[["block_splsda"]]))

Loadings

sgccda_loadings.ls <- lapply(sgccda.ls,
                             function(data_model.ls) {
                               loadings.ls <- data_model.ls[["block_splsda"]][["loadings"]]
                               loadings.ls[["Y"]] <- NULL
                               sapply(loadings.ls,
                                      function(loadings.mn) {
                                        loadings.vn <- loadings.mn[, "comp1"]
                                        loadings.vn <- loadings.vn[abs(loadings.vn) > 0]
                                        loadings.vn <- loadings.vn[order(abs(loadings.vn),
                                                                         decreasing = TRUE)]
                                        paste0(ifelse(loadings.vn > 0, "- ", "+ "), names(loadings.vn))
                                      })
                             })
lapply(sgccda_loadings.ls, knitr::kable)
plot_loadings <- lapply(names(sgccda.ls),
                        function(gene_tissue.c)
                          mixOmics::plotLoadings(sgccda.ls[[gene_tissue.c]][["block_splsda"]],
                                                 comp = 1,
                                                 legend.color = LATMX2::palette.vc()[c("WT", substr(gene_tissue.c, 1, 3))],
                                                 contrib = 'max', method = 'median'))

Plot individuals

plot_indiv <- lapply(names(sgccda.ls),
                     function(gene_tissue.c)
                       mixOmics::plotIndiv(sgccda.ls[[gene_tissue.c]][["block_splsda"]],
                                           ind.names = TRUE, 
                                           legend = TRUE,
                                           ellipse = TRUE,
                                           col.per.group = LATMX2::palette.vc()[c("WT", substr(gene_tissue.c, 1, 3))]))
plot_arrow <- lapply(names(sgccda.ls),
                     function(gene_tissue.c)
                       mixOmics::plotArrow(sgccda.ls[[gene_tissue.c]][["block_splsda"]],
                                           ind.names = FALSE, 
                                           col = LATMX2::palette.vc()[as.character(sgccda.ls[[gene_tissue.c]][["gene.fc"]])],
                                           title = gene_tissue.c))

Plot variables

plot_var <- lapply(sgccda.ls,
                   function(data_model.ls)
                     mixOmics::plotVar(data_model.ls[["block_splsda"]],
                                       var.names = FALSE, 
                                       style = 'graphics', 
                                       legend = TRUE))

Circos

circos_plot <- lapply(names(sgccda.ls),
                      function(gene_tissue.c)
                        mixOmics::circosPlot(sgccda.ls[[gene_tissue.c]][["block_splsda"]],
                                             cutoff = 0.7,
                                             line = TRUE,
                                             comp = 1,
                                             color.Y = LATMX2::palette.vc()[c("WT",
                                                                              substr(gene_tissue.c, 1, 3))]))

Heatmap

cim_diablo <- lapply(sgccda.ls,
                     function(data_model.ls)
                       mixOmics::cimDiablo(data_model.ls[["block_splsda"]],
                                           comp = 1,
                                           color.Y = LATMX2::palette.vc()[c("WT", "LAT")]))

Network

net_work <- lapply(sgccda.ls,
                   function(data_model.ls)
                     mixOmics::network(data_model.ls[["block_splsda"]],
                                       color.node = c("#1F78B4", '#33A02C'),
                                       shape.node = c("rectangle", "rectangle"),
                                       lty.edge = "solid", lwd.edge =  1,
                                       cutoff = 0.75))

Integrating the variable selection results in the dataset

for (set.c in setdiff(LATMX2::sets.vc(), "clinics")) {

  eset <- latmx2.mset[[set.c]]

  fdata.df <- Biobase::fData(eset)

  tissue.c <- unlist(strsplit(set.c, "_"))[2]

  for (gene.c in LATMX2::genes.vc()) {

    block_splsda <- sgccda.ls[[paste0(gene.c, "_", tissue.c)]][["block_splsda"]]

    loadings.ls <- block_splsda[["loadings"]]

    loadings.vn <- loadings.ls[[set.c]][, "comp1"]

    loadings.vn <- loadings.vn[abs(loadings.vn) > 0]

    mixomics.vn <- numeric(nrow(fdata.df))
    names(mixomics.vn) <- rownames(fdata.df)

    stopifnot(all(names(loadings.vn) %in% names(mixomics.vn)))

    mixomics.vn[names(loadings.vn)] <- loadings.vn

    fdata.df[, paste0("mixomics_", gene.c)] <- mixomics.vn

  }

  Biobase::fData(eset) <- fdata.df

  stopifnot(methods::validObject(eset))

  latmx2.mset <- MultiDataSet::add_eset(latmx2.mset,
                                        eset,
                                        dataset.type = set.c,
                                        GRanges = NA,
                                        overwrite = TRUE,
                                        warnings = FALSE)

}

latmx2.mset <- latmx2.mset[, LATMX2::sets.vc()]

Re-ordering metadata

latmx2.mset <- LATMX2::order_mset(latmx2.mset)

Saving (not run)

phenomis::writing(latmx2.mset,
                  file.path(gsub(LATMX2::data_dir.c(),
                                   "../../LATMX2/inst/extdata",
                                   LATMX2::statistics_integrative_dir.c())),
                  overwrite.l = TRUE,
                  report.c = "none")

References



ProMetIS/LATMX2 documentation built on March 30, 2020, 11:42 p.m.