Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----echo = TRUE, results = "hide", message = FALSE---------------------------
##Install FLASHMM from CRAN.
# install.packages("FLASHMM")
##Install FLASHMM from Github.
# devtools::install_github("https://github.com/Baderlab/FLASHMM")
##Load the package.
library(FLASHMM)
## ----Reference dataset, echo = TRUE, message = FALSE--------------------------
##Generate a reference dataset by simuRNAseq function.
set.seed(2502)
refdata <- simuRNAseq(nGenes = 1000, nCells = 10000)
##counts
counts <- refdata$counts
##metadata
metadata <- refdata$metadata
head(metadata)
rm(refdata)
## ----Simulate dataset, echo = TRUE, message = FALSE---------------------------
##Generate the scRNA-seq data by simuRNAseq function.
set.seed(2503)
dat <- simuRNAseq(counts, metadata = metadata, nGenes = 100, nCells = 100000, nsam = 25,
ncls = 10, ntrt = 2, nDEgenes = 10, minbeta = 0.5, maxbeta = 1, var.randomeffects = 0.1)
str(dat)
rm(counts, metadata)
## ----Expression matrix, echo = TRUE, message = FALSE--------------------------
##Gene expression matrix, Y = log2(1 + counts)
Y <- log2(1 + dat$counts)
dat$counts <- NULL #Remove the counts.
##Design matrix for fixed effects
X <- model.matrix(~ 0 + log(libsize) + cls + cls:trt, data = dat$meta)
##Design matrix for random effects
Z <- model.matrix(~ 0 + as.factor(sam), data = dat$meta)
##Dimension of random effects
d <- ncol(Z)
rm(dat)
## ----LMM fitting, echo = TRUE, message = FALSE, warning = FALSE---------------
##Option 1: fit LMM by cell-level data.
max.iter <- 100
epsilon <- 1e-5
fit <- lmmfit(Y, X, Z, d = d, max.iter = max.iter, epsilon = epsilon)
##Option 2: fit LMM by summary-level data.
##(1) Compute the summary-level data.
n <- nrow(X)
XX <- t(X)%*%X
XY <- t(Y%*%X)
ZX <- t(Z)%*%X
ZY <- t(Y%*%Z)
ZZ <- t(Z)%*%Z
Ynorm <- rowSums(Y*Y)
rm(X, Y, Z) #release the memory.
##(2) Fit LMM.
fitss <- lmm(XX, XY, ZX, ZY, ZZ, Ynorm = Ynorm, n = n, d = d,
max.iter = max.iter, epsilon = epsilon)
identical(fit, fitss) #The two LMM fits are identical.
rm(fitss)
## -----------------------------------------------------------------------------
##Testing coefficients (fixed effects)
test <- lmmtest(fit)
#head(test)
##The testing t-value and p-values are also provided in the LMM fit.
range(test - cbind(t(fit$coef), t(fit$t), t(fit$p))) #identical
#fit$t[, 1:4]
#fit$p[, 1:4]
fit$coef[, 1:4]
## -----------------------------------------------------------------------------
contrast <- cbind("BvsA" = numeric(nrow(fit$coef)))
index <- grep(":", rownames(fit$coef))
contrast[index, ] <- 1/length(index)
##Test the contrast.
test <- lmmtest(fit, contrast = contrast)
head(test)
## -----------------------------------------------------------------------------
BvsA <- paste0(paste0("cls", 1:10, ":trtB"), collapse = "+")
BvsA <- paste0("(", BvsA, ")/10")
BvsA
contrast <- contrast.matrix(contrast = c(BvsA = BvsA), model.matrix.names = rownames(fit$coef))
test <- lmmtest(fit, contrast = contrast)
head(test)
## ----DE genes, echo = TRUE, message = FALSE, warning = FALSE------------------
##(1) Check the genes for which LMM fitting converges.
gc <- (apply(abs(fit$dlogL), 2, max) < epsilon)
sum(gc)
##(2) Hypothesis testing for variance components:
## H0, theta </= 0 vs H1, theta > 0.
z <- fit$theta["var1", ]/fit$se.theta["var1", ]
p <- pnorm(z, lower.tail = FALSE)
sum(p < 0.05)
##(3) The DE genes specific to a cell-type
##Coefficients, t-values, and p-values for the genes specific to a cell-type.
index <- grep(":", rownames(fit$coef))
ce <- fit$coef[index, gc]
tv <- fit$t[index, gc]
pv <- fit$p[index, gc]
out <- data.frame(
gene = rep(colnames(ce), nrow(ce)),
cluster = rep(rownames(ce), each = ncol(ce)),
coef = c(t(ce)), t = c(t(tv)), p = c(t(pv)))
##Adjust p-values by FDR.
out$FDR <- p.adjust(out$p, method = "fdr")
##The DE genes with FDR < 0.05 and abs(logFC) > 0.5
out <- out[order(out$p), ]
rownames(out) <- NULL
out[(out$FDR < 0.05) & (abs(out$coef) > 0.5) , ]
##The true DE genes
#dat$DEgenes
## ----LMM_ML, echo = TRUE, message = FALSE, warning = FALSE--------------------
##Fitting LMM by ML method
#fit <- lmmfit(Y, X, Z, d = d, method = "ML", max.iter = max.iter, epsilon = epsilon)
fit <- lmm(XX, XY, ZX, ZY, ZZ, Ynorm = Ynorm, n = n, d = d, method = "ML",
max.iter = max.iter, epsilon = epsilon)
##The DE genes specific to a cell-type
##Coefficients, t-values, and p-values
index <- NULL
index <- grep(":", rownames(fit$coef))
ce <- fit$coef[index, gc]
tv <- fit$t[index, gc]
pv <- fit$p[index, gc]
out <- NULL
out <- data.frame(
gene = rep(colnames(ce), nrow(ce)),
cluster = rep(rownames(ce), each = ncol(ce)),
coef = c(t(ce)), t = c(t(tv)), p = c(t(pv)))
##Adjusting p-values by FDR
out$FDR <- p.adjust(out$p, method = "fdr")
##The DE genes with FDR < 0.05 and abs(logFC) > 0.5
out <- out[order(out$p), ]
rownames(out) <- NULL
out[(out$FDR < 0.05) & (abs(out$coef) > 0.5) , ]
##
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.