inst/doc/randRotationIntro.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
options(digits = 4)

## ---- eval=FALSE--------------------------------------------------------------
#  if(!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("randRotation")

## ----message=FALSE------------------------------------------------------------
library(randRotation)
set.seed(0)
# Dataframe of phenotype data (sample information)
pdata <- data.frame(batch = as.factor(rep(1:3, c(10,10,10))),
                    phenotype = rep(c("Control", "Cancer"), c(5,5)))
features <- 1000

# Matrix with random gene expression data
edata <- matrix(rnorm(features * nrow(pdata)), features)
rownames(edata) <- paste("feature", 1:nrow(edata))

xtabs(data = pdata)

## -----------------------------------------------------------------------------
mod1 <- model.matrix(~1+phenotype, pdata)
head(mod1)

## -----------------------------------------------------------------------------
rr <- initBatchRandrot(Y = edata, X = mod1, coef.h = 2, batch = pdata$batch)

## -----------------------------------------------------------------------------
statistic <- function(Y, batch, mod){
  # (I) Batch effect correction with "Combat" from the "sva" package
  Y <- sva::ComBat(dat = Y, batch = batch, mod = mod)
  
  # (II) Linear model fit
  fit1 <- limma::lmFit(Y, design = mod)
  fit1 <- limma::eBayes(fit1)
  abs(fit1$t[,2])
}

## ----message=FALSE, results='hide'--------------------------------------------
rs1 <- rotateStat(initialised.obj = rr, R = 10, statistic = statistic,
                   batch = pdata$batch, mod = mod1)

## -----------------------------------------------------------------------------
rs1

## -----------------------------------------------------------------------------
p.vals <- pFdr(rs1)
hist(p.vals, col = "lightgreen");abline(h = 100, col = "blue", lty = 2)
qqunif(p.vals)

## -----------------------------------------------------------------------------
library(limma)
library(sva)

edata.combat <- ComBat(dat = edata, batch = pdata$batch, mod = mod1)
fit1 <- lmFit(edata.combat, mod1)
fit1 <- eBayes(fit1)

# P-values from t-statistics
p.vals.nonrot <- topTable(fit1, coef = 2, number = Inf, sort.by="none")$P.Value

hist(p.vals.nonrot, col = "lightgreen");abline(h = 100, col = "blue", lty = 2)
qqunif(p.vals.nonrot)
plot(p.vals, p.vals.nonrot, log = "xy", pch = 20)
abline(0,1, col = 4, lwd = 2)

## -----------------------------------------------------------------------------
# Specification of the full model
mod1 <- model.matrix(~1+phenotype, pdata)

# We select "phenotype" as the coefficient associated with H0
# All other coefficients are considered as "determined" coefficients
rr <- initRandrot(Y = edata, X = mod1, coef.h = 2)

coefs <- function(Y, mod){
  t(coef(lm.fit(x = mod, y = t(Y))))
}

# Specification of the H0 model
mod0 <- model.matrix(~1, pdata)

coef01 <- coefs(edata, mod0)
coef02 <- coefs(randrot(rr), mod0)

head(cbind(coef01, coef02))

all.equal(coef01, coef02)

## -----------------------------------------------------------------------------
coef11 <- coefs(edata, mod1)
coef12 <- coefs(randrot(rr), mod1)

head(cbind(coef11, coef12))

## -----------------------------------------------------------------------------
mod2 <- mod1
mod2[,2] <- mod2[,2] - 0.5

coef11 <- coefs(edata, mod2)
coef12 <- coefs(randrot(rr), mod2)

head(cbind(coef11, coef12))

## ----message=FALSE, results='hold'--------------------------------------------
library(limma)

mod1 = model.matrix(~phenotype, pdata)

## -----------------------------------------------------------------------------
# Linear model fit
fit0 <- lmFit(edata, mod1)
fit0 <- eBayes(fit0)

# P values for phenotype coefficient
p0 <- topTable(fit0, coef = 2, number = Inf, adjust.method = "none",
               sort.by = "none")$P.Value
hist(p0, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1))
abline(1,0, col = "blue", lty = 2)
qqunif(p0)

## -----------------------------------------------------------------------------
library(sva)
edata.combat = ComBat(edata, batch = pdata$batch, mod = mod1)

## -----------------------------------------------------------------------------
# Linear model fit
fit1 <- lmFit(edata.combat, mod1)
fit1 <- eBayes(fit1)

# P value for phenotype coefficient
p.combat <- topTable(fit1, coef = 2, number = Inf, adjust.method = "none",
                     sort.by = "none")$P.Value
hist(p.combat, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1))
abline(1,0, col = "blue", lty = 2)
qqunif(p.combat)

## -----------------------------------------------------------------------------
init1 <- initBatchRandrot(edata, mod1, coef.h = 2, batch = pdata$batch)

## -----------------------------------------------------------------------------
statistic <- function(Y, batch, mod, coef){
  Y.tmp <- sva::ComBat(dat = Y, batch = batch, mod = mod)

  fit1 <- limma::lmFit(Y.tmp, mod)
  fit1 <- limma::eBayes(fit1)
  # The "abs" is needed for "pFdr" to calculate 2-tailed statistics
  abs(fit1$t[,coef])
}

## ----message=FALSE, results='hide'--------------------------------------------
res1 <- rotateStat(initialised.obj = init1, R = 10, statistic = statistic,
                   batch = pdata$batch, mod = mod1, coef = 2)

## -----------------------------------------------------------------------------
p.rot <- pFdr(res1)
head(p.rot)

hist(p.rot, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1))
abline(1,0, col = "blue", lty = 2)
qqunif(p.rot)

## -----------------------------------------------------------------------------
plot(density(log(p.rot/p0)), col = "salmon", "Log p ratios",
     panel.first = abline(v=0, col = "grey"),
     xlim = range(log(c(p.rot/p0, p.combat/p0))))
lines(density(log(p.combat/p0)), col = "blue")
legend("topleft", legend = c("log(p.combat/p0)", "log(p.rot/p0)"),
       lty = 1, col = c("blue", "salmon"))

## -----------------------------------------------------------------------------
plot(p0, p.combat, log = "xy", pch = 20, col = "lightblue", ylab = "")
points(p0, p.rot, pch = 20, col = "salmon")
abline(0,1, lwd = 1.5, col = "black")
legend("topleft", legend = c("p.combat", "p.rot"), pch = 20,
       col = c("lightblue", "salmon"))

## -----------------------------------------------------------------------------
plot(density(log(p.combat/p.rot)), col = "blue",
     main = "log(p.combat / p.rot )", panel.first = abline(v=0, col = "grey"))

## -----------------------------------------------------------------------------
fdr.q  <- pFdr(res1, "fdr.q")
fdr.qu <- pFdr(res1, "fdr.qu")
fdr.BH <- pFdr(res1, "BH")

FDRs <- cbind(fdr.q, fdr.qu, fdr.BH)
ord1 <- order(res1$s0, decreasing = TRUE)

FDRs.sorted <- FDRs[ord1,]

matplot(FDRs.sorted, type = "l", lwd = 2)
legend("bottomright", legend = c("fdr.q", "fdr.qu", "BH"), lty = 1:5, lwd = 2,
       col = 1:6)

head(FDRs.sorted)


## ----warning=FALSE------------------------------------------------------------
edata[,] <- rnorm(length(edata))
group <- as.factor(rep(1:3, 10))

# add group effect for the first 100 features
group.effect <- rep(c(0,0,1), 10)
edata[1:100,] <- t(t(edata[1:100,]) + group.effect)

mod.groups <- model.matrix(~ group)

contrasts1 <- limma::makeContrasts("2vs3" = group2 - group3,
                                   levels = mod.groups)
contrasts1

## -----------------------------------------------------------------------------
mod.cont <- contrastModel(X = mod.groups, C = contrasts1)

## -----------------------------------------------------------------------------
init1 <- initBatchRandrot(edata, mod.cont, batch = pdata$batch)

## ----message=FALSE, results='hide', warning=FALSE-----------------------------
statistic <- function(Y, batch, mod, cont){
  Y.tmp <- sva::ComBat(dat = Y, batch = batch, mod = mod)

  fit1 <- limma::lmFit(Y.tmp, mod)
  
  fit1 <- limma::contrasts.fit(fit1, cont)
  fit1 <- limma::eBayes(fit1)
  
  # The "abs" is needed for "pFdr" to calculate 2-tailed statistics
  abs(fit1$t[,1])
}

res1 <- rotateStat(initialised.obj = init1, R = 20, statistic = statistic,
                   batch = pdata$batch, mod = mod.groups, cont = contrasts1)

## -----------------------------------------------------------------------------
p.rot <- pFdr(res1)
head(p.rot)

hist(p.rot, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1))
abline(1,0, col = "blue", lty = 2)
qqunif(p.rot)

## -----------------------------------------------------------------------------
plot(density(res1$s0), main = "", ylim = c(0,1), col = 2)
lines(density(res1$stats[[1]]), col = 1)
legend("topright", col = 1:2, lty = 1,
       legend = c("null-distribution by rotation", "test statistic"))

## ----message=FALSE, results='hide', warning=FALSE-----------------------------
res2 <- rotateStat(initialised.obj = init1, R = 20, statistic = statistic,
                   batch = pdata$batch, mod = mod.groups, cont = contrasts1)

p.rot2 <- pFdr(res2)

plot(density(res2$s0), main = "", ylim = c(0,1), col = 2)
lines(density(res2$stats[[1]]), col = 1)
legend("topright", col = 1:2, lty = 1,
       legend = c("null-distribution by rotation", "test statistic"))

## -----------------------------------------------------------------------------
plot(p.rot, p.rot2, pch = 19, log = "xy")
abline(0,1, col = "red")

## -----------------------------------------------------------------------------
pdata$individual <- sort(c(1:15, 1:15))
colnames(pdata)[2] <- "tissue"
pdata$tissue <- c("Control", "Cancer")
pdata

## -----------------------------------------------------------------------------
edata[,] <- rnorm(length(edata))
for(i in seq(1,ncol(edata),2)){
  tmp1 <- rnorm(nrow(edata))
  edata[,i] <- edata[,i] + tmp1
  edata[,i+1] <- edata[,i+1] + tmp1
}

## -----------------------------------------------------------------------------
library(nlme)
df1 <- data.frame(pdata, d1 = edata[1,])
spl1 <- split(1:nrow(pdata), pdata$batch)

covs1 <- function(., df1, i){
  df1$d1 <- .

  me1 <- lme(d1 ~ tissue, data = df1[i,], random = ~1|individual)
  getVarCov(me1, type = "marginal")[[1]]
}

covs1 <- sapply(spl1,
                function(samps)rowMeans(apply(edata, 1, covs1, df1, samps)))
cov1 <- matrix(rowMeans(covs1), 2, 2)
cormat <- cov2cor(cov1)
cormat

## -----------------------------------------------------------------------------
cormat <-  diag(5) %x% cormat
cormat <- list(cormat, cormat, cormat)

mod1 <- model.matrix(~1+tissue, pdata)
rr1 <- initBatchRandrot(Y = edata, X = mod1, coef.h = 2, batch = pdata$batch,
                        cormat = cormat)

statistic <- function(Y, batch, mod, df1){
  # Batch effect correction
  Y <- limma::removeBatchEffect(Y, batch = batch, design = mod)
  
  apply(Y, 1, function(j){
    df1$d1 <- j
    me0 <- nlme::lme(d1 ~ 1, data = df1, random = ~1|individual, method = "ML")
    me1 <- nlme::lme(d1 ~ tissue, data = df1, random = ~1|individual, method = "ML")
    
    abs(coef(me1)[1,2] / (sqrt(vcov(me1)[2,2])))
  })
  
}

rs1 <- rotateStat(initialised.obj = rr1, R = 4, statistic = statistic, 
                  batch = pdata$batch, mod = mod1, df1 = df1, parallel = TRUE)

p1 <- pFdr(rs1)

hist(p1, freq = FALSE); abline(h = 1, lty = 2, lwd = 2, col = "blue")
qqunif(p1)

## -----------------------------------------------------------------------------
sessionInfo()

Try the randRotation package in your browser

Any scripts or data that you put into this service are public.

randRotation documentation built on April 14, 2021, 6:01 p.m.