Introduction

Basic Workflow

library(EMMIXcontrasts2)
library(Biobase)
knitr::opts_chunk$set(fig.width=8, fig.height=6) 
set.seed(123)

Pre-processing

Gene Expression Data

RNASeq Data

Voom @law2014voom @limma2015

Choosing the number of groups, g

Manually or via BIC

Scores

can be used to rank genes with out p values if desired

Null-Distribution

permuations

p-values

from the permuations

Experimental design

allows for compliacted set ups

Time

see example in @emmix2014 and third case study in this document

Blocking

Case Studies

Golden Spike

@zhu2010preferred

library(repmis)
repmis::source_data("https://github.com/andrewthomasjones/goldenspikeData/blob/master/goldenspike.Rdata?raw=true")

SEQC

@su2014comprehensive

#library(erccdashboard)
#data(SEQC.Example)
#add extra column to goldenspike make clearer which are null genes
goldenspike$isNull<-abs(goldenspike$fold)==1
gold2<-as.matrix(goldenspike[,seq_len(6)])

Emmix<-emmixwire(gold2,g='BIC', maxg=3, ncov=3,nvcov=3,n1=3,n2=3, debug=0,itmax=100,epsilon=1e-4)
M<-nrow(gold2)
scores<-abs(scores.wire(Emmix))
pTrue<-cumsum(goldenspike$isNull[order(scores, decreasing = TRUE)])/(seq_len(M))
library(ggplot2)
library(reshape2)
combined <- data.frame(n = seq_len(M), pTrue = pTrue)
combined2<-melt(combined, id='n')
combined3<-subset(combined2, n<=1000)
p<-ggplot(data=combined3, aes(x=n, y=value, colour=variable)) + geom_line()
p<-p+scale_colour_discrete(guide=FALSE)+scale_x_continuous(name = "Gene Rank")+ scale_y_continuous(name = "FPR")+theme_classic() 
p<-p+ggtitle(paste("Golden Spike - false positive rate among top ranked genes"))
print(p)

Alon Cancer data

@alon1999broad

library(vsn)
library(colonCA)
data(colonCA, package = 'colonCA')
alon_data<-(exprs(vsn2(colonCA)))

RNASeq Data

@fission2014

library(fission)
data("fission")

References



andrewthomasjones/EMMIXcontrasts documentation built on June 26, 2019, 5:46 a.m.