knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

We are using a list of neurogenesis datasets of both human and mouse model from different studies to demonstrate the use of SJD package. Here we have a list of datasets NeuroGenesis4, and one dataframe NeuroGenesis.info containing meta informationtot helps visualize the results. Users are suggested to make the dataframe including information prior to visualization.

Install and load SJD package

To install this package in R, run the following commands:

library(devtools)
install_github("CHuanSite/SJD")
library(SJD)

Jointly decompose 4 NeuroGenesis genomics data with SJD package

Load data

The whole structured NeuroGenesis4 and its meta information NeuroGenesis.info are deposited to googledrive and the downsized subsets are built in the SJD package. Please follow the below commands to download the real data and info For users who wants to save time by using the subset data, please skip downloading from google drive and follow the commands below.

We download the full data sets from the google drive, and load it into R

install.packages("googledrive", repos = "http://cran.us.r-project.org")

library(googledrive)

## Download NeuroGenesis4
drive_download(file = "https://drive.google.com/u/0/uc?export=download&id=1jf4f_dxps47yT89m8Tkpviyci_XoOtGk", path = "./NeuroGenesis4.RData", overwrite = TRUE)

## Download NeruGenesis4.info
drive_download(file = "https://drive.google.com/file/d/1aGYAxZDYJKOLLO1LmFk9vPYUmtmS1TjZ/view?usp=sharing", path = "./NeuroGenesis4.info.RData", overwrite = TRUE)

load(file = "./NeuroGenesis4.RData")
load(file = "./NeuroGenesis4.info.RData")

This block of codes containing the small sampled data, which is faster to compute than the full scale data,

# data("NeuroGenesis4", package = "SJD")
# data("NeuroGenesis4.info", package = "SJD")

# str(NeuroGenesis4)

Preprocess data

Here we input the list of datasets (expression matrices) and wrap them to be ready for SJD analysis. sjdWrap identifies the shared genes among all datasets regardless of species and output a new list of datasets with uniformed gene names

SJDdataIN = sjdWrap(
  data.list = NeuroGenesis4,
  species.vector = c("human", "human", "human", "mouse"),
  geneType.vector = c("symbol","ensembl","symbol","symbol"),
  geneType.out = "symbol",
  species.out = "human"
  )

Apply the decomposition method

After the transformation and alignment among the data sets, the next step is to apply the decomposition method, we give examples on different methods to illustrate the process

grp = list(
  Shared.All.4 = c(1 : 4), 
  Shared.bulk.2 = c(1,2),
  Shared.sc.2 = c(3, 4),
  Hs.Meisnr.1 = c(1),
  Hs.AZ.1 = c(2),
  Gesch.1 = c(3),
  Telley.1 = c(4))

dims = c(2, 2, 2, 2, 2, 2, 2) # must have same length as "grp"

# sep
# sepPCA.out = sepPCA(SJDdataIN, dims)
# sepICA.out = sepICA(SJDdataIN, dims)
# sepNMF.out = sepNMF(SJDdataIN, dims)

# concat
# concatPCA.out = concatPCA(SJDdataIN, grp, dims)
# concatICA.out = concatICA(SJDdataIN, grp, dims)
# concatNMF.out = concatNMF(SJDdataIN, grp, dims)

# joint
# jointPCA.out = jointPCA(SJDdataIN, grp, dims)
# jointICA.out = jointICA(SJDdataIN, grp, dims)
# jointNMF.out = jointNMF(SJDdataIN, grp, dims)

# sequential
twoStageLCA.out = twoStageLCA(dataset = SJDdataIN, group = grp, comp_num = dims)
# twoStageiLCA.out = twoStageiLCA(dataset = SJDdataIN, group = grp, comp_num = dims)

Plot

library(ggplot2)
library(gridExtra)

Plot each dim in each dataset individually

Here we need to call the SJDScorePlotter function to generate the SJDScorePlotter.obj, a list of ggplot objects that can be used

SampleMetaNamesTable = data.frame(
    row.names = names(NeuroGenesis4),
    Type = c('Yaxis','Yaxis','2Dscatter','2Dscatter'),
    XaxisColumn = c("X","DAYx","tSNE_1","tsne1:ch1"),
    YaxisColumn = c("PJDscores","PJDscores","tSNE_2","tsne2:ch1"),
    COLaxisColumn = c("color","colorBYlabelsX","PJDscores","PJDscores"),
    PCHColumn = c("","","",""),
    cexx=c(1,1,2,2)
)
SJDScorePlotter.obj = SJDScorePlotter(
     SJDalg = "twoStageLCA",
     scores = twoStageLCA.out$score_list,
     lbb = "NeuroGenesis4.p2",
     info = NeuroGenesis4.info,
     SampleMetaNamesTable = SampleMetaNamesTable
)
print(names(SJDScorePlotter.obj))

Save the whole list to a .RData file

# save(SJDScorePlotter.obj, file = "./SJDScorePlotter.obj.RData")

Assemble the images based on dataset

assemble.byDataset.obj = assemble.byDataset(SJDScorePlotter.obj = SJDScorePlotter.obj, dataset_name = "Meissner.inVitro.bulk.Hs", SJD_algorithm = "twoStageLCA", group = NA)
print(names(assemble.byDataset.obj))

Plot all images corresponding from the assemble.byDataset.obj. We can construct them into a whole big image through the grid.arrange from the grid.Extra package.

assemble.byDataset.obj[[1]]
assemble.byDataset.obj[[2]]
g = grid.arrange(
  assemble.byDataset.obj[[3]],
  assemble.byDataset.obj[[4]],
  nrow = 1, ncol = 2
)

Assemble the images based on component

assemble.byComponent.obj = assemble.byComponent(SJDScorePlotter.obj = SJDScorePlotter.obj, component = c(1, 2), SJD_algorithm = "twoStageLCA", group = 'Shared.All.4')
print(names(assemble.byComponent.obj))
assemble.byComponent.obj[[1]]
assemble.byComponent.obj[[2]]
assemble.byComponent.obj[[3]]
assemble.byComponent.obj[[4]]


CHuanSite/SJD documentation built on Nov. 29, 2024, 5:52 a.m.