In Merguerrero/BasicP: Function to support a microarray data analysis pipeline

title: "Basic examples of using BasicP functions" author: "Mercedes Guerrero and Alex Sanchez" date: "r Sys.Date()" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Basic examples of using BasicP functions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8}

require(devtools)
install("/home/mguerrero/Dropbox/Practiques-Grau_Biotec-UVic_Mercedes_Guerrero/BasicP", dependencies=TRUE)
install_git("https://github.com/alexsanchezpla/links2File.git")
library(BasicP)
library(links2File)
require(affy)
require(oligo) #ExonStudy
require(limma)
require(gplots)

The goal of this Vignette is to illustrate the use of some functions exported from the package.

`readOrLoad.rawData'Vignette Info`

This function reads data from cel files or loads previously saved data.

readCELS <- TRUE 
my.targets <- "./celfiles/targets.txt"
targets<- read.table("./celfiles/targets.txt", head=TRUE, sep="\t", row.names = 1)
my.fileNames <-paste("./celfiles/",rownames(targets),sep="")
rawDataFileName <- "rawData.Rda"
my.outputDir <- "."
isExonStudy <- FALSE
orgPackage <- "org.Hs.eg" 

rawData <- BasicP::readOrLoad.RawData(readCELS = readCELS, phenoDat = my.targets,
fileNames = my.fileNames, dataFName =rawDataFileName, outputDir = my.outputDir, exonSt = isExonStudy)

rawData

`createOrLoadAnnotations'Vignette Info`

This function creates annotation files or loads previously saved data.

loadAnnotations <- FALSE
chipPackAvailable <- TRUE
platformDesignPackAvailable <- FALSE
chipPackage <- "hgu133a2"
platformDesignPackage <- NULL
outputDir <- "./ResultsDir"
annotationsFileName <- "Annotations"
entrezTableFileName <-"Entrezs.Rda"
symbolsTableFileName <-"Symbols.Rda"
controlsTableFileName <- "controls.Rda"

anotacions <- BasicP::createOrLoadAnnotations (loadAnnotations= loadAnnotations, chipPackAvailable = chipPackAvailable, platformDesignPackAvailable = platformDesignPackAvailable,chipPackage = chipPackage, platformDesignPackage = platformDesignPackage, outputDir = outputDir,annotationsFileName = annotationsFileName,entrezTableFileName = entrezTableFileName, symbolsTableFileName = symbolsTableFileName, controlsTableFileName = controlsTableFileName)

summary(anotacions)

`normalization'Vignette Info`

function to normalize the raw data.

load("rawData.Rda")
rawData <- my.raw
normMethod <- "RMA"
my.targets <- read.AnnotatedDataFrame("./celfiles/targets.txt", header = TRUE, row.names = 1)
celFilesDir <-"./celfiles"
loadFile <- FALSE 
normalized.eset.FileName <-  "normalizedData.Rda"   
outputDir <- "./ResultsDir"
exonStudy <- FALSE

eset_norm <- BasicP::normalization(my.data = rawData, method = normMethod, targetsinfo = my.targets, inputDir = celFilesDir, loadFile = loadFile , normalizedFName = normalized.eset.FileName, outputDir = outputDir, exonSt = exonStudy)

head(eset_norm)

`normplots2File'Vignette Info`

Function that makes a report of the normalization

load("./ResultsDir/normalizedData.Rda")
repes <- duplicated(exprs(my.norm), MARGIN=1)
exprs(my.norm) <- exprs(my.norm)[!repes,]
eset_norm <- my.norm
my.colors <- rainbow(length(sampleNames(eset_norm)))
my.names <- pData(eset_norm)$ShortName
myCex<- 0.8
dim3 <- FALSE
fileName <- "NormalizedPlots.pdf"
outputDir <- "./ResultsDir"
PCAPlots <- TRUE
csv <- "csv2"

BasicP::normplots2File(my.data = eset_norm, sampleNames = my.names, my.colors = my.colors, my.groups = pData(eset_norm)$Group, my.method = "average",my.cex = myCex ,posText = 2, dim3 = FALSE,fileName = fileName, outputDir = outputDir,PCAPlots = TRUE, csv = fileType)

`filterData'Vignette Info`

Function to filter the data

load("./ResultsDir/normalizedData.Rda")
repes <- duplicated(exprs(my.norm), MARGIN=1)
exprs(my.norm) <- exprs(my.norm)[!repes,]
eset_norm <- my.norm
load("./ResultsDir/controls.Rda")
removeNAs <- TRUE
load("./ResultsDir/Entrezs.Rda")
entrezs <- entrezTable
SignalFilter <- TRUE
signalThreshold <- 50
signalFilter.Function <- "filt.by.Signal"
signalThreshold.as.percentage <- TRUE
VarFilter <- TRUE 
variabilityThreshold <- 50
variability.Function <- "sdf"
variabilityThreshold.as.percentage <- TRUE
pairing.Function <- NULL
my.targets <-read.AnnotatedDataFrame("./celfiles/targets.txt", header = TRUE, row.names = 1)
doReport <- TRUE
outputDir <- "./ResultsDir"
FilteringReportFileName <- "FilteringReport.txt"

exprs.filtered <- BasicP::filterData(expres = exprs(eset_norm),controls = names(controlsTable),removeNAs = TRUE, entrezs = entrezs ,bySignal = SignalFilter,signalThr = signalThreshold, grups = pData(eset_norm)$grupo, sigFun.Name = signalFilter.Function, sigThr.as.perc = signalThreshold.as.percentage, byVar = VarFilter, variabilityThr = variabilityThreshold, varFun.Name = variability.Function,
varThr.as.perc = variabilityThreshold.as.percentage, pairingFun.Name = pairing.Function, targets = my.targets, doReport = doReport, outputDir = outputDir, filteringReportFName = FilteringReportFileName)

head(exprs.filtered)

`saveData'Vignette Info`

Function to save R objects in files.

load("./ResultsDir/normalizedData.Rda")
repes <- duplicated(exprs(my.norm), MARGIN=1)
exprs(my.norm) <- exprs(my.norm)[!repes,]
eset_norm <- my.norm
normalized.all.FileName <- "normalized.all"
fileType <-"csv2"
symbolsTable <- load("./ResultsDir/Symbols.Rda")
expres.all.FileName <- "expres.Rda"
linksFileName <- "Links.txt"
outputDir <- "./ResultsDir"

BasicP::saveData(expres = exprs(eset_norm), expres.csv.FileName = normalized.all.FileName, csvType=fileType, description = "Normalized values for all genes", anotPackage = NULL, symbolsVector = symbolsTable, SYMBOL = "SYMBOL", expres.bin.FileName = expres.all.FileName, linksFile = linksFileName, outputDir = outputDir)

BasicP::saveData(expres = exprs.filtered, expres.csv.FileName = "normalized.filtered", csvType = fileType, description = "Normalized values for filtered genes", anotPackage = NULL, symbolsVector = symbolsTable, SYMBOL = "SYMBOL", expres.bin.FileName = "exprs.filtered.Rda",  linksFile = linksFileName, outputDir = outputDir)

`lmAnalysis'Vignette Info`

Setting design and contrasts matrix to proceed with the linear model analysis.

targets <- read.table(file ="./celfiles/targets.txt" , header = TRUE, sep = "\t")
column2design<- 5
lev <- targets[,column2design] 
design <- model.matrix( ~ 0 + lev)        
colnames(design) <- levels(lev)
rownames(design) <- targets$ShortName
numParameters <- ncol(design)
print(design)

cont.matrix<-makeContrasts(AvsB= (A -B), AvsL= (A-L), BvsL=(B-L),levels=design)
print(cont.matrix)

Linear model analysis.Function included in dolmAnalysis.

load("./ResultsDir/exprs.filtered.Rda")
contrasts2test <- 1:ncol(cont.matrix)
anotPackage = NULL
comparison =  "Estudi"
outputDir = "./ResultsDir"
ENTREZIDs = "entrezTable"
SYMBOLIDs = "symbolsTable"
linksFile = "Links.txt"
fitFileName = "fit.Rda"
csvType= "csv"
rows2HTML= NULL
anotFileName <- "Annotations"
runMulticore = 0 
toTIFF= FALSE

fitMain <- BasicP::lmAnalysis(exprs.filtered = exprs.filtered, design = design, cont.matrix = cont.matrix, contrasts2test = contrasts2test, anotPackage = anotPackage, outputDir = outputDir, comparison = comparison, Expressions_And_Top = TRUE , showParams = FALSE , use.dupCorr = FALSE, block = NULL, nDups = 1 , ENTREZIDs = ENTREZIDs, SYMBOLIDs = SYMBOLIDs, linksFile = linksFile,fitFileName = fitFileName , csvType=csvType, rows2HTML = NULL, anotFileName = anotFileName)

fitMain

`doLmAnalysis'Vignette Info`

Function to analyze the linerar model with the different parameters.

lmParsList <- list()
Estudi <- list(dades = NULL,
               expresFileName = "exprs.filtered.Rda",
               targets = targets,
               designMat = design,
               contMat = cont.matrix,
               whichContrasts = 1:ncol(cont.matrix),
               anotPack = NULL,
               outputDir = outputDir,
               ExpressionsAndTop = TRUE,
               showLmParams = FALSE, 
               use.dupCorr = FALSE,
               block = NULL,
               nDups = 1,
               comparisonName = comparison,  
               ENTREZIDs = "entrezTable",
               SYMBOLIDs = "symbolsTable",
               fileOfLinks = linksFile,
               fitFileName = fitFileName,
               csvType=csvType,
               rows2HTML = NULL,
               anotFilename = anotFileName
               )

lmParsList <- add2parsList(lmParsList, Estudi)

for(ix in 1:length(lmParsList))
{
  fit.Main   <- BasicP::doLmAnalysis(lmParsList[ix])
}

fit.Main

`GeneAnnotation'Vignette Info`

Function that annotates genes. Inside doGeneAnnotation

genes2annotate <- entrezs[unique(rownames(fitMain$p.value))]

genesAnnotated <-BasicP::GeneAnnotation(egIDs = genes2annotate, anotPackage = "org.Hs.eg", toHTML = TRUE, outputDir = outputDir, filename = "Annotations",  myTitle = "Annotations for all genes analyzed", specie = "homo sapiens", info2show = c( "Affymetrix", "EntrezGene", "GeneSymbol", "GeneName", "KEGG", "GO"), linksFile = linksFile, maxGenes = NULL)

`doGeneAnnotation'Vignette Info`

Function to annotate the genes.

organisme = "hsa"
anotList <- list(fitMain = NULL, fitFileName = fitFileName, my.IDs = "entrezTable", anotPackage = "org.Hs.eg", toHTML = TRUE, outputDir = outputDir, anotFilename = "Annotations", titleAnotations = "Annotations for all genes analyzed", specie = "homo sapiens", info2show = c( "Affymetrix", "EntrezGene", "GeneSymbol", "GeneName", "KEGG", "GO"), linksFile = linksFile, numGenesPerPage = NULL)

BasicP::doGeneAnnotation(anotList)

`multipleComp'Vignette Info`

Function to make multiple comparations. Inside doMultCompAnalysis.

compName <- c("Group1")
wCont <- 1:3
pValCutOff <- c(0.01)
adjMethod <- c("none")
minLogFoldChange <- c(1)
load("./ResultsDir/Symbols.Rda")
titleText = paste("for", ifelse(adjMethod=="none","p-values","adj. p-values"), "<", pValCutOff, "and |logFC| >", minLogFoldChange, sep = " ")
geneListFName = paste("geneList",compName,ifelse(adjMethod=="none","pvalues","adj-pvalues"),"LT",pValCutOff,"Rda",sep = ".")

geneList <-  BasicP::multipleComp(fitMain = fitMain, whichContrasts = wCont, comparisonName = compName, titleText = titleText, outputDir = outputDir,  anotPackage = "org.Hs.eg", my.symbols = symbolsTable, linksFile = linksFile, multCompMethod = "separate", adjustMethod = adjMethod, selectionType = "any", P.Value.cutoff = pValCutOff, plotVenn = TRUE, colsVenn = NULL, vennColors = c("red","yellow","green","blue","pink") , cexVenn = 1, geneListFName=geneListFName, csvType=csvType, minLFC=minLogFoldChange)

head(geneList,n=20)

`doMultCompAnalysis'Vignette Info`

Function to perform Multi comparations analysis.

mcParsList <- list()
compName <- c("Group1", "Group2")
wCont <- 1:ncol(cont.matrix)
pValCutOff <- c(0.01, 0.01) 
adjMethod <- c("none", "none")
minLogFoldChange <- c(1, 1) 
for(i in 1:length(compName))
{  
mci <- list(fitMain = fitMain,
              fitFileName = fitFileName,
              whichContrasts = wCont[[i]],
              comparisonName = compName[i],
              titleText = paste("for",ifelse(adjMethod[i]=="none",
              "p-values","adj. p-values"), "<", pValCutOff[i], "and |logFC| >",
               minLogFoldChange[i], sep = " "),
              anotPackage = "org.Hs.eg",
              my.symbols = symbolsTable,
              outputDir = outputDir,
              fileOfLinks = linksFile,
              multCompMethod = "separate",
              adjustMethod = adjMethod[i], 
              selectionType = "any",
              P.Value.cutoff = pValCutOff[i],
              plotVenn = TRUE,
              colsVenn = NULL,
                vennColors= c("red","yellow","green","blue","pink"),  
              cexVenn = 1,
              geneListFName = paste("geneList",compName[i],
              ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"),
              "LT",pValCutOff[i],"Rda",sep = "."),
                minLogFC = minLogFoldChange[i],
              csvType = csvType)

  mcParsList <- add2parsList(mcParsList, mci)
}

for(ix in 1:length(mcParsList))
{
  geneList.MCi <- BasicP::doMultCompAnalysis(mcParsList[ix])   
}

head(geneList.MCi,n=20)

`clusterAnalysis'Vignette Info`

Function to draw the clusters.Inside doClusterAnalysis.

s2clust <-  which(as.logical(apply(design[,as.logical(apply(abs(as.matrix(cont.matrix[,wCont[[i]]])),1,sum))],1,sum)))
geneListFName = paste("geneList",  compName[i],     ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"), "LT", pValCutOff[i], "Rda", sep = ".")
pal <- colorpanel(n = 32, low = "green", mid = "white", high = "magenta")
load("./ResultsDir/geneList.Group1.pvalues.LT.0.01.Rda")
 clust <- BasicP::clusterAnalysis(expres = exprs.filtered,
                           genes = geneList, 
                           samples = s2clust, 
                           sampleNames =       as.character(targets$ShortName)[s2clust],
                           comparisonName = "Compar 1",
                           anotPackage = "org.Hs.eg",
                           my.symbols = symbolsTable,
                           outputDir = outputDir, 
                           fileOfLinks = linksFile,                              
                           numClusters = 2,
                           rowDistance = NULL,
                           colDistance = NULL,
                           RowVals = TRUE,
                           ColVals = FALSE,
                           escala = "row",
                           colorsSet = pal,
                           densityInfo = "density",
                           colsForGroups =    c("pink","pink","pink","pink","pink","blue","blue","blue","blue","blue"),
                           cexForColumns = 0.8,
                           cexForRows = 0.8,
                           Title = paste("Compar 1 with",
                            ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"),
                            "<",
                            pValCutOff[i], ifelse(minLogFoldChange[i]==0, "", paste("\n and |logFC|>=", minLogFoldChange[i], sep=""))),
                           csvType = "csv2")

summary(clust)

`doClusterAnalysis'Vignette Info`

Function to draw the clusters.

clustParsList <- list()
for(i in 1:length(compName))
{
clustPar <- list(expres = NULL,
                 expresFileName = "exprs.filtered.Rda",
                 geneListFName = paste("geneList",  compName[i],     ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"), "LT", pValCutOff[i], "Rda", sep = "."),
                genes2cluster = NULL, 
                samples2cluster = s2clust, 
                sampleNames = as.character(targets$ShortName)[s2clust], 
                comparisonName = compName[i], 
                anotPackage = "org.Hs.eg",
                my.symbols = symbolsTable,
                outputDir = outputDir,
                fileOfLinks = linksFile,
                numClusters = 2,
                rowDistance = NULL,
                colDistance = NULL,
                RowVals = TRUE,
                ColVals = FALSE,
                escala = "row",
                colorsSet = pal,
                densityInfo = "density",
                colsForGroups = c("pink","pink","pink","pink","pink","blue","blue","blue","blue","blue"),
                cexForColumns = 0.8,
                cexForRows = 0.8,
                Title = paste(compName[i],
                                 "with",
                                 ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"),
                                 "<",
                                 pValCutOff[i], ifelse(minLogFoldChange[i]==0, "", paste("\n and |logFC|>=", minLogFoldChange[i], sep=""))),
                   paste("Comparison:", compName[i], sep=" "),
                   csvType = csvType)

clustParsList <- add2parsList(clustParsList, clustPar)
}

for(ix in 1:length(clustParsList))
  {
    hm.Estudi <- BasicP::doClusterAnalysis(clustParsList[ix])
  }

summary(hm.Estudi)

`GOAnalysis'Vignette Info`

Function to carry out the GO analysis. Inside doGOAnalysis

library(GOstats)

GOResult<-BasicP::GOAnalysis(fitMain = fitMain, whichContrasts = wCont, comparison.Name = "Estudi", outputDir = outputDir, anotPackage = "org.Hs.eg", my.IDs = entrezTable, addGeneNames = TRUE, fileOfLinks = linksFile, thrLogFC = 1, cutoffMethod = "adjusted", P.Value.cutoff = rep(0.05, length(wCont)), pval = 0.05, min.count = 3, ontologias = c("MF", "BP", "CC"), testDirections = c("over", "under"), minNumGens = 0)

ERROR: No results met the specified criteria. Returning 0-row data.frame

`doGOAnalysis'Vignette Info`

Function to carry out the GO analysis.

GOParsList <-list()
GOPar <- list(fitFileName = fitFileName,
              whichContrasts = wCont,        
              comparisonName = "Estudi",
              anotPackage =  "org.Hs.eg",
              my.IDs = "entrezTable",        
              addGeneNames = TRUE, 
              outputDir = outputDir, 
              fileOfLinks = linksFile,
              fitMain = NULL,
              cutoffMethod = "adjusted",  
              P.Value.cutoff = rep(0.05, length(wCont)),
              pvalGOterms = 0.05,
              min.count = 3,
              ontologias = c("MF", "BP", "CC"),
              testDirections = c("over", "under"),
              minNumGens = 0)

GOParsList <- add2parsList(GOParsList, GOPar)


if(runMulticore == 2 || runMulticore == 3){  
    foreach(ix = 1:length(GOParsList)) %dopar% {
      GOList <- BasicP::doGOAnalysis (GOParsList[ix])
    }
  }else{
    for(ix in 1:length(GOParsList)) {
      GOList <- BasicP::doGOAnalysis (GOParsList[ix])
    }
  }

`KEGGAnalysis'Vignette Info`

Function to do KEGG analysis. Inside doKEGGAnalysis.

load("./ResultsDir/fit.Rda")
wCont <- 1:3
comparison.Name <-"Estudi"
outputDir <- "./ResultsDir"
orgPackage <-"org.Hs.eg"
organisme <-"hsa"
load("./ResultsDir/Entrezs.Rda")
addGeneNames <- TRUE
linksFileName <- "Links.txt"
cutoffMethod <-"unadjusted"
pval <- 0.05
minNumGens <-0
runMulticore <- 0

KEGGResult <- BasicP::KEGGAnalysis(fitMain = fit.main, whichContrasts = wCont,  comparison.Name = comparison.Name, outputDir = outputDir, anotPackage = orgPackage, organisme = organisme, my.IDs = entrezTable, addGeneNames = addGeneNames, fileOfLinks = linksFileName, cutoffMethod = cutoffMethod, P.Value.cutoff = rep(0.05, length(wCont)), pval = pval,thrLogFC = 1, minNumGens = minNumGens)

MENSAJE: KEGG.db contains mappings based on older data because the original resource was removed from the the public domain before the most recent update was produced. This package should now be considered deprecated and future versions of Bioconductor may not have it available. Users who want more current data are encouraged to look at the KEGGREST or reactome.db packages

`doKEGGAnalysis'Vignette Info`

Function to do KEGG analysis. Inside doKEGGAnalysis.

KEGGParsList <- list()
KEGGPar <- list(fitFileName = "fit.Rda",
                whichContrasts = 1:3,
                comparisonName = "Estudi",
                outputDir = "./ResultsDir",
                anotPackage = "org.Hs.eg", 
                organisme = "mmu",    
                my.IDs = "entrezTable",
                addGeneNames = TRUE,
                fileOfLinks = linksFileName,
                fitMain = NULL,
                cutoffMethod = "unadjusted",
                P.Value.cutoff = rep(0.01, length(wCont)), 
                pvalKEGGterms = 0.05, 
                minNumGens = 0) 

KEGGParsList <- add2parsList (KEGGParsList, KEGGPar)


for(i in 1:length(KEGGParsList))
{
  KEGGList <- BasicP::doKEGGAnalysis(KEGGParsList[i])
}

Merguerrero/BasicP documentation built on May 20, 2019, 2:24 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Merguerrero/BasicP
Function to support a microarray data analysis pipeline

In Merguerrero/BasicP: Function to support a microarray data analysis pipeline

`readOrLoad.rawData'Vignette Info`

`createOrLoadAnnotations'Vignette Info`

`normalization'Vignette Info`

`normplots2File'Vignette Info`

`filterData'Vignette Info`

`saveData'Vignette Info`

`lmAnalysis'Vignette Info`

`doLmAnalysis'Vignette Info`

`GeneAnnotation'Vignette Info`

`doGeneAnnotation'Vignette Info`

`multipleComp'Vignette Info`

`doMultCompAnalysis'Vignette Info`

`clusterAnalysis'Vignette Info`

`doClusterAnalysis'Vignette Info`

`GOAnalysis'Vignette Info`

`doGOAnalysis'Vignette Info`

`KEGGAnalysis'Vignette Info`

`doKEGGAnalysis'Vignette Info`

R Package Documentation

Browse R Packages

We want your feedback!

Merguerrero/BasicP Function to support a microarray data analysis pipeline

In Merguerrero/BasicP: Function to support a microarray data analysis pipeline

readOrLoad.rawData'Vignette Info

createOrLoadAnnotations'Vignette Info

normalization'Vignette Info

normplots2File'Vignette Info

filterData'Vignette Info

saveData'Vignette Info

lmAnalysis'Vignette Info

doLmAnalysis'Vignette Info

GeneAnnotation'Vignette Info

doGeneAnnotation'Vignette Info

multipleComp'Vignette Info

doMultCompAnalysis'Vignette Info

clusterAnalysis'Vignette Info

doClusterAnalysis'Vignette Info

GOAnalysis'Vignette Info

doGOAnalysis'Vignette Info

KEGGAnalysis'Vignette Info

doKEGGAnalysis'Vignette Info

R Package Documentation

Browse R Packages

We want your feedback!

Merguerrero/BasicP
Function to support a microarray data analysis pipeline

`readOrLoad.rawData'Vignette Info`

`createOrLoadAnnotations'Vignette Info`

`normalization'Vignette Info`

`normplots2File'Vignette Info`

`filterData'Vignette Info`

`saveData'Vignette Info`

`lmAnalysis'Vignette Info`

`doLmAnalysis'Vignette Info`

`GeneAnnotation'Vignette Info`

`doGeneAnnotation'Vignette Info`

`multipleComp'Vignette Info`

`doMultCompAnalysis'Vignette Info`

`clusterAnalysis'Vignette Info`

`doClusterAnalysis'Vignette Info`

`GOAnalysis'Vignette Info`

`doGOAnalysis'Vignette Info`

`KEGGAnalysis'Vignette Info`

`doKEGGAnalysis'Vignette Info`