````r cat("") cat("") cat("") cat("") cat("") cat("")
Chip experiment ======================================================== Feature Selection was done with the ArrayPro Software of TECAN. Feature Selection Parameters were chosen the following: Every grid was inspected and (schlieren) were excluded from the data measurement. Throughout all measured data following parameters were used: * Raw Intensity: Whole cell * Background: Local Corners, Median Background * Net Intensity: Raw Intensity - Median background (negative ignored) * Normalization: None * Spot size was set to 20x20 pixel in the case of the secon chip The rest of the data values was chosen as proposed by the Array Pro Software. For down stream analysis, the net intensity values (raw-medianBg) were used! ```r library(googleVis) op <- options(gvis.plot.tag='chart') options(stringsAsFactors=FALSE) library(ggplot2) library(reshape2) library(limma) library("geneplotter") library(rCharts) library(ggplot2) library(sqldf) library(genefilter) library("Biostrings") library("RCurl") library(arrayQualityMetrics) library("vsn") library("MmPalateMiRNA") library("latticeExtra") library(CustomMicroarrayPackage) library(Biostrings)
#Variable Declarations for downstream analysis normMethod = "quantile" #VARDEFINITION ndups = 6 #VARDEFINITION spacing = 1 #VARDEFINITION minDetectLim = 0.15 #VARDEFINITION (minimal detection limit) subsetByManualControl = FALSE #VARDEFINITION (#)filtering option) subsetByPMvsMM = TRUE #VARDEFINITION (filtering option) orderingVar = "Name" #VARDEFINITION addInterestCand = c("900000","20285","20285_1","20285_2" ) #VARDEFINITION (additional candidates to be labeled)
````r
gprDir = file.path(getwd(), "..") #VARDEFINITION analysisPath = file.path( getwd() ) #VARDEFINITION
data(chipAnnotDF,package="CustomMicroarrayPackage") data(galNRC5_01,package="CustomMicroarrayPackage") gridNameTable = galNRC5_01
db <- dbConnect(SQLite(), dbname=system.file("data/ncrna.chip2.annotation.db",package="CustomMicroarrayPackage"))
setwd(gprDir)
chipDataL <- readAndProcessChipData2_1( currDir=gprDir,GALFile = gridNameTable )
RG = chipDataL$RG
RG$genes$ID = ifelse( RG$genes$ID == "0","Cy3_Anchor-spot",RG$genes$ID ) RG$genes$ID = ifelse( substr(RG$genes$ID,1,3) == "unk","unknown",RG$genes$ID ) RG$genes$ID[which(is.na(RG$genes$Block))] = "unknown"
RG$genes[RG$genes$ID == "e620",]$Name = "3.1"
RG$genes = addProbeTypeAndChipTypeToRGGenes(RG=RG,dbcon=db,anchorName="Cy3_Anchor-spot",noSignal="unknown", refCol = "ID") # processing the chip
message( paste0("All Oligos are present on the chip: ", dim(RG$genes[substr(x=RG$genes$ID, start=1, stop=1) == "e",] )[1]/ndups == dim(galNRC5_01[substr(x=galNRC5_01$ID, start=1, stop=1) == "e",] )[1]/ndups ) )
RG = extractNetIntensityValues(RG, deleteOther=TRUE) setwd(analysisPath)
design = rep(c(1, -1), length(RG$targets[, 1])/2)
```r RG$targets
cat(paste0( "<h2>Comparison is done by log(Cy5/Cy3), therefore the reference for the fold change is: <b>",RG$targets[1,2],"</b></h2>" ))
plotPrintTipLoess( RG )
plotBoxDensityFromRG(RG)
#creating a data.table for ggplot plotting plotMAofArrays( MA.RG(RG) ) # geom_smooth(aes(x = AveExpr, y = logFC), data=dat, method="loess")
arrayQualityMetrics(expressionset = MA.RG(RG), outdir = "AQM_beforeNormalization", force = TRUE, do.logtransform = FALSE)
The file can be accessed here:
cat("<a href=\"AQM_beforeNormalization/index.html\"> Quality Report before Normalization </a>")
MA = normalizeTwoColorArray(RG, method=normMethod)
#creating a data.table for ggplot plotting plotMAofArrays( MA ) # geom_smooth(aes(x = AveExpr, y = logFC), data=dat, method="loess")
plotBoxDensityFromRG(RG.MA(MA))
arrayQualityMetrics(expressionset = MA, outdir = "AQM_afterNormalization", force = TRUE, do.logtransform = FALSE)
The file can be accessed here:
cat("<a href=\"AQM_afterNormalization/index.html\"> Quality Report after Normalization </a>")
MAnonFilt = MA #backup variable MA = orderTwoColorArray(MA, by=orderingVar) #Ordering of MA by name MA = filterSpotsBySpikeIn(MA=MA, normMethod=normMethod, subsetByManualControl = subsetByManualControl, subsetByPMvsMM = subsetByPMvsMM, ndups=ndups, spacing=spacing, writeLog=FALSE, design = design )
#creating a data.table for ggplot plotting plotMAofArrays( MA )
MA = orderTwoColorArray(MA, by=orderingVar) resL = performDiffExpAnalysis(MA, ndups = ndups, spacing=spacing )
tt = resL$ResultTable #Quering the database of the chip annotation for extra information additionalData = dbGetQuery(db, paste( "SELECT * FROM oligoIDMapping oidm JOIN oligo o ON oidm.oligo_oligoTableID = o.oligoTableID JOIN secondary seco ON seco.secondaryTableID = o.secondary_secondaryTableID LEFT OUTER JOIN contig c ON c.contigTableID = oidm.contig_contigTableID LEFT OUTER JOIN annotationSummary ans ON ans.contigAnnotation_contig_contigTableID = c.contigTableID JOIN geneAnnot ga ON ga.contigAnnotation_contig_contigTableID = c.contigTableID LEFT OUTER JOIN predictionAdditional preda ON preda.oligo_oligoTableID = o.oligoTableID WHERE o.oligoTableID IN( ",paste( sub("e","",tt$ID) , collapse=","),");",sep="")) #Adding this data to the Result Table additionalData$coordinates = paste0(additionalData$contigChromosome, ":", additionalData$contigStart, "-", additionalData$contigEnd) addData = additionalData[,c("oligoTableID","contigID","gc","length","microgram", "mp","ncRNABioType","ncRNAOrientation", "repeatName", "bindingEfficiency", "contigChromosome","geneRegion", "coordinates", "predictionProgram", "disease", "rsnumber", "gene","geneName","geneTranscriptID", "sequence")] addData$sequence = as.character( reverseComplement(DNAStringSet( addData$sequence ) ) ) addData$oligoTableID = paste0("e",addData$oligoTableID) dat.tt = merge( tt, addData, by.x = "ID", by.y="oligoTableID", sort=FALSE ) dat.tt$logPrev = -log10(dat.tt$P.Value) labelDF.tt = dat.tt[dat.tt$adj.P.Val < 0.05 ,] if(dim(labelDF.tt)[1] == 0){labelDF.tt = dat.tt[1:20,]; message("There is no significant candidate in the data, therefore the top 20 are displayed")} #Only in this case we want to display additional candidates: if( length(addInterestCand) != 0){ labelDF.tt = rbind( labelDF.tt, dat.tt[ dat.tt$Name %in% addInterestCand, ] ) labelDF.tt = unique(labelDF.tt) } dat.tt$ncRNABioTypeSub = ifelse( dat.tt$ncRNABioType %in% c("miRNA","snoRNA","tRNA","piRNA","lincRNA"), dat.tt$ncRNABioType, "other" ) dat.tt$Significance = 'p.value > 0.05 (BH)' if( dim( dat.tt[dat.tt$adj.P.Val < 0.05 ,])[1] != 0 ){ dat.tt[dat.tt$adj.P.Val < 0.05 ,]$Significance = 'p.value < 0.05 (BH)' } #writing csv file fncsv = paste0("resultTable.csv") write.table(dat.tt,file=fncsv, sep="\t", col.names=TRUE, row.names=FALSE)
The ResultTable can be accessed here:
cat(paste0("<a href=\"",fncsv,"\"> Differential Expression result (csv) </a>"))
g1 = ggplot( dat.tt, aes(x=AveExpr, y=logFC), environment=environment() ) + geom_point(colour="lightgrey") + labs(title="MAPlot (Averaged over all arrays)", x="Intensity (A)", y="log2 Fold Change (M)") + geom_text( data = labelDF.tt, aes( x= AveExpr, y=logFC, label=Name), show_guide = FALSE, size=2, color="red") + geom_hline( aes(yintercept=-minDetectLim, linetype="dashed"), show_guide = FALSE ) + geom_hline( aes(yintercept=minDetectLim, linetype="dashed"), show_guide = FALSE ) g1
g2 = ggplot( dat.tt, aes(x=AveExpr, y=logFC, colour = ncRNABioTypeSub), environment=environment() ) + geom_point() + labs(title="MAPlot (Averaged over all arrays)", x="Intensity (A)", y="log2 Fold Change (M)") + geom_text( data = labelDF.tt, aes( x= AveExpr, y=logFC, label=Name), show_guide = FALSE, size=2, color="black") + geom_hline( aes(yintercept=-minDetectLim, linetype="dashed"), show_guide = FALSE ) + geom_hline( aes(yintercept=minDetectLim, linetype="dashed"), show_guide = FALSE ) g2
g = ggplot( dat.tt, aes(x=logFC, y=logPrev, colour = Significance), environment=environment() ) + geom_point() + scale_color_manual(values=c( "red","lightgrey")) + labs(title="Vulcano Plot (Averaged over all arrays)", x="log2 Fold Change (M)", y="-log10(p-value)") + geom_text( data = labelDF.tt, aes( x= logFC, y=logPrev, label=Name), show_guide = FALSE, size=2, color="black") #-0.07 g
````r tablegvis = labelDF.tt[,c("ID","Name","contigID","probe.type","chipType","logFC", "AveExpr","t","P.Value","adj.P.Val","B","ncRNABioType","ncRNAOrientation","geneName")] plot(createGVisTableFromDF(tablegvis))
### Fold Change by condition, of top five differentially expressed candidates ````r #Creating an Expression set #Creating an Expression set minAnnot = unique(dat.tt[,c("Block", "Row","Column","ID","Name","probe.type","chipType","logFC", "AveExpr","ncRNABioTypeSub")]) #Since due to left out join, there could be some entries duplicated whereby only one attributes differe (disease for example is a one to many relation) MAave = avedups( MA, ndups=ndups, spacing=spacing ) featAnnot = merge( MAave$genes, minAnnot, by=colnames(MAave$genes), sort=FALSE) exprsData = MAave$M rownames(exprsData) = featAnnot$Name rownames( MAave$targets ) = sub(".*-","", sub("\\.gpr","",rownames( MAave$targets ))) sampleType = paste( rownames( MAave$targets) ,MAave$targets$Cy5, sep="_") #sample type always needs to be inspected colnames(exprsData) = rownames(MAave$targets) rownames(featAnnot) = featAnnot$Name eSet = ExpressionSet(exprsData, phenoData= AnnotatedDataFrame(MAave$targets), featureData = AnnotatedDataFrame(featAnnot), annotation="customAnnot") eSet$SampleType = as.factor(sampleType)
discr = as.factor(sub( ".*_","", eSet$SampleType)) rocs = rowpAUCs(eSet, discr, p=0.2) #calculates the area under the curve top5Cand = which(names(genefilter::area(rocs)) %in% unique(c(dat.tt[1:5,"Name"],addInterestCand))) for( j in top5Cand ){ y = exprs(eSet[j,]) ord=order(discr) featureName = featureNames(eSet[j,]) par(mfrow=c(1,2)) genefilter::plot(rocs[j], main=featureName) plot( y[ord], pch=c(1,16)[discr[ord]], col=c("black","red")[discr[ord]], main=featureName, ylab=expression(log[2]~intensity), xlab="samples" ) text( x=1:24, y=y[ord]+0.01, labels=phenoData(eSet)$Sample[ord], cex = 0.4 ) }
````r dataToPlot = dat.tt colnames(dataToPlot)[ which(colnames(dataToPlot) == "P.Value") ] = "PValue" colnames(dataToPlot)[ which(colnames(dataToPlot) == "adj.P.Val") ] = "adjPValue"
dataToPlot = dataToPlot[,c("ID", "Name", "chipType", "logFC",
"AveExpr", "PValue", "adjPValue", "ncRNABioType", "geneRegion", "coordinates", "predictionProgram"
, "disease", "rsnumber", "gene", "geneName", "geneTranscriptID")]
n2 = nPlot( logFC ~ AveExpr, data=dataToPlot, type='scatterChart', group='chipType' )
n2$chart(tooltipContent= "#! function(key, x, y, e){
return 'Name: ' + e.point.Name + '
' +
'PValue: ' + e.point.PValue + '
' +
'adjPValue: ' + e.point.adjPValue + '
' +
'ncRNABioType: ' + e.point.ncRNABioType + '
' +
'geneRegion: ' + e.point.geneRegion + '
' +
'geneName: ' + e.point.geneName + '
' +
'geneTranscriptID: ' + e.point.geneTranscriptID + '
' +
'gene: ' + e.point.gene + '
' +
'coordinates: ' + e.point.coordinates + '
' +
'predictionProgram: ' + e.point.predictionProgram + '
' +
'disease: ' + e.point.disease + '
' +
'rsnumber: ' + e.point.rsnumber + '
' +
'coordinates: ' + e.point.coordinates
} !#")
n2$print("chart1")
Additional Information: Size Distribution of non-coding RNA contigs on the microarray ---------------------------------------------------------------------- ```r sizeData = dbGetQuery(db, "SELECT * FROM oligoIDMapping oidm JOIN oligo o ON o.oligoTableID = oidm.oligo_oligoTableID JOIN contig c ON c.contigTableID = oidm.contig_contigTableID JOIN annotationSummary annots ON annots.contigAnnotation_contig_contigTableID = c.contigTableID" ) #binning the contigSize Data #bins of 5 till 100 , then everything in one bin #first remove all contigs < 18 sizeData = sizeData[sizeData$contigLength > 18,] sizeData = sizeData[sizeData$category == "PM",] binningContigLength = function( data, varToBin, binsFrom=18,binsTo=100,binsBy=5 ){ bins = seq(binsFrom,binsTo, by=binsBy) lowerBin = bins upperBin = bins + 5 upperBin[length(upperBin)] = max(sizeData$contigLength)+1 contigBin = rep("", dim(data)[1]) for(i in c(1:length(upperBin)) ){ contigBin[which( data$contigLength < upperBin[i] & data$contigLength >= lowerBin[i])] = paste0(lowerBin[i],"-",upperBin[i]-1) } return(contigBin) } contigBins = binningContigLength(sizeData) sizeData$contigBins = contigBins contL = split(sizeData, f=as.factor(sizeData$contigID) ) contigOligoCoverage = unlist(lapply( contL, function(x){return(dim(x)[1])} )) rm(contL) contigData = sizeData contigData = contigData[!duplicated(contigData$contigID),] contigData$contigOligoCoverage = contigOligoCoverage contigData = contigData[,c("contigBins","ncRNABioType","contigOligoCoverage")] contigData$contigOligoCoverage = as.character(contigData$contigOligoCoverage) contigData[ !(contigData$ncRNABioType %in% c("miRNA","piRNA","snoRNA","tRNA","rRNA","lincRNA")),]$ncRNABioType = "other" contigData[,3] = as.numeric(contigData[,3]) contigBins = melt( table(contigData[,1:2]) ) contigData_new = data.frame( "binBio"=paste0(contigData$contigBins,"_",contigData$ncRNABioType), "OligoCov"=contigData[,3] ) for( binbio in unique(contigData_new$binBio) ){ occ = which(contigData_new$binBio == binbio) summedVal = sum( contigData_new[occ,"OligoCov"] ) contigData_new[occ,]$OligoCov = summedVal } contigData_new = contigData_new[!duplicated(contigData_new$binBio),] oligoCoverageByBin = data.frame( "contigBins"=sub("_.*","",contigData_new$binBio), "ncRNABioType"=sub(".*_","",contigData_new$binBio), "OligoCoverage"=contigData_new$OligoCov ) contigBins= merge( contigBins, oligoCoverageByBin, by=c("contigBins","ncRNABioType"), all.x=TRUE ) contigBins$OligoCoverage = ifelse(is.na(contigBins$OligoCoverage), 0, contigBins$OligoCoverage) colnames(contigBins) = c("contigBins", "ncRNABioType", "Freq", "OligoCoverage") n1 = nPlot( Freq~contigBins, data = contigBins,group="ncRNABioType", type="multiBarChart" ) n1$chart(tooltipContent= "#! function(key, x, y, e){ return '<b>Contig Bin: </b> ' + e.point.contigBins + '<br/>' + '<b>ncRNA Biotype: </b> ' + e.point.ncRNABioType + '<br/>' + '<b>Contigs in Bin: </b> ' + e.point.Freq + '<br/>' + '<b>OligoCoverage: </b> ' + e.point.OligoCoverage } !#") n1$print("chart2")
````r save.image(file="session.rda")
Additional Information for reproducibility ---------------------------------------------------------------------- ### R package versions ````r sessionInfo()
````r
```
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.