title: "Basic examples of using BasicP functions" author: "Mercedes Guerrero and Alex Sanchez" date: "r Sys.Date()" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Basic examples of using BasicP functions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8}


require(devtools)
install("/home/mguerrero/Dropbox/Practiques-Grau_Biotec-UVic_Mercedes_Guerrero/BasicP", dependencies=TRUE)
install_git("https://github.com/alexsanchezpla/links2File.git")
library(BasicP)
library(links2File)
require(affy)
require(oligo) #ExonStudy
require(limma)
require(gplots)

The goal of this Vignette is to illustrate the use of some functions exported from the package.

readOrLoad.rawData'Vignette Info

This function reads data from cel files or loads previously saved data.

readCELS <- TRUE 
my.targets <- "./celfiles/targets.txt"
targets<- read.table("./celfiles/targets.txt", head=TRUE, sep="\t", row.names = 1)
my.fileNames <-paste("./celfiles/",rownames(targets),sep="")
rawDataFileName <- "rawData.Rda"
my.outputDir <- "."
isExonStudy <- FALSE
orgPackage <- "org.Hs.eg" 

rawData <- BasicP::readOrLoad.RawData(readCELS = readCELS, phenoDat = my.targets,
fileNames = my.fileNames, dataFName =rawDataFileName, outputDir = my.outputDir, exonSt = isExonStudy)

rawData

createOrLoadAnnotations'Vignette Info

This function creates annotation files or loads previously saved data.

loadAnnotations <- FALSE
chipPackAvailable <- TRUE
platformDesignPackAvailable <- FALSE
chipPackage <- "hgu133a2"
platformDesignPackage <- NULL
outputDir <- "./ResultsDir"
annotationsFileName <- "Annotations"
entrezTableFileName <-"Entrezs.Rda"
symbolsTableFileName <-"Symbols.Rda"
controlsTableFileName <- "controls.Rda"

anotacions <- BasicP::createOrLoadAnnotations (loadAnnotations= loadAnnotations, chipPackAvailable = chipPackAvailable, platformDesignPackAvailable = platformDesignPackAvailable,chipPackage = chipPackage, platformDesignPackage = platformDesignPackage, outputDir = outputDir,annotationsFileName = annotationsFileName,entrezTableFileName = entrezTableFileName, symbolsTableFileName = symbolsTableFileName, controlsTableFileName = controlsTableFileName)

summary(anotacions)

normalization'Vignette Info

function to normalize the raw data.

load("rawData.Rda")
rawData <- my.raw
normMethod <- "RMA"
my.targets <- read.AnnotatedDataFrame("./celfiles/targets.txt", header = TRUE, row.names = 1)
celFilesDir <-"./celfiles"
loadFile <- FALSE 
normalized.eset.FileName <-  "normalizedData.Rda"   
outputDir <- "./ResultsDir"
exonStudy <- FALSE

eset_norm <- BasicP::normalization(my.data = rawData, method = normMethod, targetsinfo = my.targets, inputDir = celFilesDir, loadFile = loadFile , normalizedFName = normalized.eset.FileName, outputDir = outputDir, exonSt = exonStudy)

head(eset_norm)

normplots2File'Vignette Info

Function that makes a report of the normalization

load("./ResultsDir/normalizedData.Rda")
repes <- duplicated(exprs(my.norm), MARGIN=1)
exprs(my.norm) <- exprs(my.norm)[!repes,]
eset_norm <- my.norm
my.colors <- rainbow(length(sampleNames(eset_norm)))
my.names <- pData(eset_norm)$ShortName
myCex<- 0.8
dim3 <- FALSE
fileName <- "NormalizedPlots.pdf"
outputDir <- "./ResultsDir"
PCAPlots <- TRUE
csv <- "csv2"

BasicP::normplots2File(my.data = eset_norm, sampleNames = my.names, my.colors = my.colors, my.groups = pData(eset_norm)$Group, my.method = "average",my.cex = myCex ,posText = 2, dim3 = FALSE,fileName = fileName, outputDir = outputDir,PCAPlots = TRUE, csv = fileType)

filterData'Vignette Info

Function to filter the data

load("./ResultsDir/normalizedData.Rda")
repes <- duplicated(exprs(my.norm), MARGIN=1)
exprs(my.norm) <- exprs(my.norm)[!repes,]
eset_norm <- my.norm
load("./ResultsDir/controls.Rda")
removeNAs <- TRUE
load("./ResultsDir/Entrezs.Rda")
entrezs <- entrezTable
SignalFilter <- TRUE
signalThreshold <- 50
signalFilter.Function <- "filt.by.Signal"
signalThreshold.as.percentage <- TRUE
VarFilter <- TRUE 
variabilityThreshold <- 50
variability.Function <- "sdf"
variabilityThreshold.as.percentage <- TRUE
pairing.Function <- NULL
my.targets <-read.AnnotatedDataFrame("./celfiles/targets.txt", header = TRUE, row.names = 1)
doReport <- TRUE
outputDir <- "./ResultsDir"
FilteringReportFileName <- "FilteringReport.txt"

exprs.filtered <- BasicP::filterData(expres = exprs(eset_norm),controls = names(controlsTable),removeNAs = TRUE, entrezs = entrezs ,bySignal = SignalFilter,signalThr = signalThreshold, grups = pData(eset_norm)$grupo, sigFun.Name = signalFilter.Function, sigThr.as.perc = signalThreshold.as.percentage, byVar = VarFilter, variabilityThr = variabilityThreshold, varFun.Name = variability.Function,
varThr.as.perc = variabilityThreshold.as.percentage, pairingFun.Name = pairing.Function, targets = my.targets, doReport = doReport, outputDir = outputDir, filteringReportFName = FilteringReportFileName)

head(exprs.filtered)

saveData'Vignette Info

Function to save R objects in files.

load("./ResultsDir/normalizedData.Rda")
repes <- duplicated(exprs(my.norm), MARGIN=1)
exprs(my.norm) <- exprs(my.norm)[!repes,]
eset_norm <- my.norm
normalized.all.FileName <- "normalized.all"
fileType <-"csv2"
symbolsTable <- load("./ResultsDir/Symbols.Rda")
expres.all.FileName <- "expres.Rda"
linksFileName <- "Links.txt"
outputDir <- "./ResultsDir"

BasicP::saveData(expres = exprs(eset_norm), expres.csv.FileName = normalized.all.FileName, csvType=fileType, description = "Normalized values for all genes", anotPackage = NULL, symbolsVector = symbolsTable, SYMBOL = "SYMBOL", expres.bin.FileName = expres.all.FileName, linksFile = linksFileName, outputDir = outputDir)

BasicP::saveData(expres = exprs.filtered, expres.csv.FileName = "normalized.filtered", csvType = fileType, description = "Normalized values for filtered genes", anotPackage = NULL, symbolsVector = symbolsTable, SYMBOL = "SYMBOL", expres.bin.FileName = "exprs.filtered.Rda",  linksFile = linksFileName, outputDir = outputDir)

lmAnalysis'Vignette Info

Setting design and contrasts matrix to proceed with the linear model analysis.

targets <- read.table(file ="./celfiles/targets.txt" , header = TRUE, sep = "\t")
column2design<- 5
lev <- targets[,column2design] 
design <- model.matrix( ~ 0 + lev)        
colnames(design) <- levels(lev)
rownames(design) <- targets$ShortName
numParameters <- ncol(design)
print(design)

cont.matrix<-makeContrasts(AvsB= (A -B), AvsL= (A-L), BvsL=(B-L),levels=design)
print(cont.matrix)

Linear model analysis.Function included in dolmAnalysis.

load("./ResultsDir/exprs.filtered.Rda")
contrasts2test <- 1:ncol(cont.matrix)
anotPackage = NULL
comparison =  "Estudi"
outputDir = "./ResultsDir"
ENTREZIDs = "entrezTable"
SYMBOLIDs = "symbolsTable"
linksFile = "Links.txt"
fitFileName = "fit.Rda"
csvType= "csv"
rows2HTML= NULL
anotFileName <- "Annotations"
runMulticore = 0 
toTIFF= FALSE

fitMain <- BasicP::lmAnalysis(exprs.filtered = exprs.filtered, design = design, cont.matrix = cont.matrix, contrasts2test = contrasts2test, anotPackage = anotPackage, outputDir = outputDir, comparison = comparison, Expressions_And_Top = TRUE , showParams = FALSE , use.dupCorr = FALSE, block = NULL, nDups = 1 , ENTREZIDs = ENTREZIDs, SYMBOLIDs = SYMBOLIDs, linksFile = linksFile,fitFileName = fitFileName , csvType=csvType, rows2HTML = NULL, anotFileName = anotFileName)

fitMain

doLmAnalysis'Vignette Info

Function to analyze the linerar model with the different parameters.

lmParsList <- list()
Estudi <- list(dades = NULL,
               expresFileName = "exprs.filtered.Rda",
               targets = targets,
               designMat = design,
               contMat = cont.matrix,
               whichContrasts = 1:ncol(cont.matrix),
               anotPack = NULL,
               outputDir = outputDir,
               ExpressionsAndTop = TRUE,
               showLmParams = FALSE, 
               use.dupCorr = FALSE,
               block = NULL,
               nDups = 1,
               comparisonName = comparison,  
               ENTREZIDs = "entrezTable",
               SYMBOLIDs = "symbolsTable",
               fileOfLinks = linksFile,
               fitFileName = fitFileName,
               csvType=csvType,
               rows2HTML = NULL,
               anotFilename = anotFileName
               )

lmParsList <- add2parsList(lmParsList, Estudi)

for(ix in 1:length(lmParsList))
{
  fit.Main   <- BasicP::doLmAnalysis(lmParsList[ix])
}

fit.Main

GeneAnnotation'Vignette Info

Function that annotates genes. Inside doGeneAnnotation

genes2annotate <- entrezs[unique(rownames(fitMain$p.value))]

genesAnnotated <-BasicP::GeneAnnotation(egIDs = genes2annotate, anotPackage = "org.Hs.eg", toHTML = TRUE, outputDir = outputDir, filename = "Annotations",  myTitle = "Annotations for all genes analyzed", specie = "homo sapiens", info2show = c( "Affymetrix", "EntrezGene", "GeneSymbol", "GeneName", "KEGG", "GO"), linksFile = linksFile, maxGenes = NULL)

doGeneAnnotation'Vignette Info

Function to annotate the genes.

organisme = "hsa"
anotList <- list(fitMain = NULL, fitFileName = fitFileName, my.IDs = "entrezTable", anotPackage = "org.Hs.eg", toHTML = TRUE, outputDir = outputDir, anotFilename = "Annotations", titleAnotations = "Annotations for all genes analyzed", specie = "homo sapiens", info2show = c( "Affymetrix", "EntrezGene", "GeneSymbol", "GeneName", "KEGG", "GO"), linksFile = linksFile, numGenesPerPage = NULL)

BasicP::doGeneAnnotation(anotList)

multipleComp'Vignette Info

Function to make multiple comparations. Inside doMultCompAnalysis.

compName <- c("Group1")
wCont <- 1:3
pValCutOff <- c(0.01)
adjMethod <- c("none")
minLogFoldChange <- c(1)
load("./ResultsDir/Symbols.Rda")
titleText = paste("for", ifelse(adjMethod=="none","p-values","adj. p-values"), "<", pValCutOff, "and |logFC| >", minLogFoldChange, sep = " ")
geneListFName = paste("geneList",compName,ifelse(adjMethod=="none","pvalues","adj-pvalues"),"LT",pValCutOff,"Rda",sep = ".")

geneList <-  BasicP::multipleComp(fitMain = fitMain, whichContrasts = wCont, comparisonName = compName, titleText = titleText, outputDir = outputDir,  anotPackage = "org.Hs.eg", my.symbols = symbolsTable, linksFile = linksFile, multCompMethod = "separate", adjustMethod = adjMethod, selectionType = "any", P.Value.cutoff = pValCutOff, plotVenn = TRUE, colsVenn = NULL, vennColors = c("red","yellow","green","blue","pink") , cexVenn = 1, geneListFName=geneListFName, csvType=csvType, minLFC=minLogFoldChange)

head(geneList,n=20)

doMultCompAnalysis'Vignette Info

Function to perform Multi comparations analysis.

mcParsList <- list()
compName <- c("Group1", "Group2")
wCont <- 1:ncol(cont.matrix)
pValCutOff <- c(0.01, 0.01) 
adjMethod <- c("none", "none")
minLogFoldChange <- c(1, 1) 
for(i in 1:length(compName))
{  
mci <- list(fitMain = fitMain,
              fitFileName = fitFileName,
              whichContrasts = wCont[[i]],
              comparisonName = compName[i],
              titleText = paste("for",ifelse(adjMethod[i]=="none",
              "p-values","adj. p-values"), "<", pValCutOff[i], "and |logFC| >",
               minLogFoldChange[i], sep = " "),
              anotPackage = "org.Hs.eg",
              my.symbols = symbolsTable,
              outputDir = outputDir,
              fileOfLinks = linksFile,
              multCompMethod = "separate",
              adjustMethod = adjMethod[i], 
              selectionType = "any",
              P.Value.cutoff = pValCutOff[i],
              plotVenn = TRUE,
              colsVenn = NULL,
                vennColors= c("red","yellow","green","blue","pink"),  
              cexVenn = 1,
              geneListFName = paste("geneList",compName[i],
              ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"),
              "LT",pValCutOff[i],"Rda",sep = "."),
                minLogFC = minLogFoldChange[i],
              csvType = csvType)

  mcParsList <- add2parsList(mcParsList, mci)
}

for(ix in 1:length(mcParsList))
{
  geneList.MCi <- BasicP::doMultCompAnalysis(mcParsList[ix])   
}

head(geneList.MCi,n=20)

clusterAnalysis'Vignette Info

Function to draw the clusters.Inside doClusterAnalysis.

s2clust <-  which(as.logical(apply(design[,as.logical(apply(abs(as.matrix(cont.matrix[,wCont[[i]]])),1,sum))],1,sum)))
geneListFName = paste("geneList",  compName[i],     ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"), "LT", pValCutOff[i], "Rda", sep = ".")
pal <- colorpanel(n = 32, low = "green", mid = "white", high = "magenta")
load("./ResultsDir/geneList.Group1.pvalues.LT.0.01.Rda")
 clust <- BasicP::clusterAnalysis(expres = exprs.filtered,
                           genes = geneList, 
                           samples = s2clust, 
                           sampleNames =       as.character(targets$ShortName)[s2clust],
                           comparisonName = "Compar 1",
                           anotPackage = "org.Hs.eg",
                           my.symbols = symbolsTable,
                           outputDir = outputDir, 
                           fileOfLinks = linksFile,                              
                           numClusters = 2,
                           rowDistance = NULL,
                           colDistance = NULL,
                           RowVals = TRUE,
                           ColVals = FALSE,
                           escala = "row",
                           colorsSet = pal,
                           densityInfo = "density",
                           colsForGroups =    c("pink","pink","pink","pink","pink","blue","blue","blue","blue","blue"),
                           cexForColumns = 0.8,
                           cexForRows = 0.8,
                           Title = paste("Compar 1 with",
                            ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"),
                            "<",
                            pValCutOff[i], ifelse(minLogFoldChange[i]==0, "", paste("\n and |logFC|>=", minLogFoldChange[i], sep=""))),
                           csvType = "csv2")

summary(clust)

doClusterAnalysis'Vignette Info

Function to draw the clusters.

clustParsList <- list()
for(i in 1:length(compName))
{
clustPar <- list(expres = NULL,
                 expresFileName = "exprs.filtered.Rda",
                 geneListFName = paste("geneList",  compName[i],     ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"), "LT", pValCutOff[i], "Rda", sep = "."),
                genes2cluster = NULL, 
                samples2cluster = s2clust, 
                sampleNames = as.character(targets$ShortName)[s2clust], 
                comparisonName = compName[i], 
                anotPackage = "org.Hs.eg",
                my.symbols = symbolsTable,
                outputDir = outputDir,
                fileOfLinks = linksFile,
                numClusters = 2,
                rowDistance = NULL,
                colDistance = NULL,
                RowVals = TRUE,
                ColVals = FALSE,
                escala = "row",
                colorsSet = pal,
                densityInfo = "density",
                colsForGroups = c("pink","pink","pink","pink","pink","blue","blue","blue","blue","blue"),
                cexForColumns = 0.8,
                cexForRows = 0.8,
                Title = paste(compName[i],
                                 "with",
                                 ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"),
                                 "<",
                                 pValCutOff[i], ifelse(minLogFoldChange[i]==0, "", paste("\n and |logFC|>=", minLogFoldChange[i], sep=""))),
                   paste("Comparison:", compName[i], sep=" "),
                   csvType = csvType)

clustParsList <- add2parsList(clustParsList, clustPar)
}

for(ix in 1:length(clustParsList))
  {
    hm.Estudi <- BasicP::doClusterAnalysis(clustParsList[ix])
  }

summary(hm.Estudi)

GOAnalysis'Vignette Info

Function to carry out the GO analysis. Inside doGOAnalysis

library(GOstats)

GOResult<-BasicP::GOAnalysis(fitMain = fitMain, whichContrasts = wCont, comparison.Name = "Estudi", outputDir = outputDir, anotPackage = "org.Hs.eg", my.IDs = entrezTable, addGeneNames = TRUE, fileOfLinks = linksFile, thrLogFC = 1, cutoffMethod = "adjusted", P.Value.cutoff = rep(0.05, length(wCont)), pval = 0.05, min.count = 3, ontologias = c("MF", "BP", "CC"), testDirections = c("over", "under"), minNumGens = 0)

ERROR: No results met the specified criteria. Returning 0-row data.frame

doGOAnalysis'Vignette Info

Function to carry out the GO analysis.

GOParsList <-list()
GOPar <- list(fitFileName = fitFileName,
              whichContrasts = wCont,        
              comparisonName = "Estudi",
              anotPackage =  "org.Hs.eg",
              my.IDs = "entrezTable",        
              addGeneNames = TRUE, 
              outputDir = outputDir, 
              fileOfLinks = linksFile,
              fitMain = NULL,
              cutoffMethod = "adjusted",  
              P.Value.cutoff = rep(0.05, length(wCont)),
              pvalGOterms = 0.05,
              min.count = 3,
              ontologias = c("MF", "BP", "CC"),
              testDirections = c("over", "under"),
              minNumGens = 0)

GOParsList <- add2parsList(GOParsList, GOPar)


if(runMulticore == 2 || runMulticore == 3){  
    foreach(ix = 1:length(GOParsList)) %dopar% {
      GOList <- BasicP::doGOAnalysis (GOParsList[ix])
    }
  }else{
    for(ix in 1:length(GOParsList)) {
      GOList <- BasicP::doGOAnalysis (GOParsList[ix])
    }
  }

KEGGAnalysis'Vignette Info

Function to do KEGG analysis. Inside doKEGGAnalysis.

load("./ResultsDir/fit.Rda")
wCont <- 1:3
comparison.Name <-"Estudi"
outputDir <- "./ResultsDir"
orgPackage <-"org.Hs.eg"
organisme <-"hsa"
load("./ResultsDir/Entrezs.Rda")
addGeneNames <- TRUE
linksFileName <- "Links.txt"
cutoffMethod <-"unadjusted"
pval <- 0.05
minNumGens <-0
runMulticore <- 0

KEGGResult <- BasicP::KEGGAnalysis(fitMain = fit.main, whichContrasts = wCont,  comparison.Name = comparison.Name, outputDir = outputDir, anotPackage = orgPackage, organisme = organisme, my.IDs = entrezTable, addGeneNames = addGeneNames, fileOfLinks = linksFileName, cutoffMethod = cutoffMethod, P.Value.cutoff = rep(0.05, length(wCont)), pval = pval,thrLogFC = 1, minNumGens = minNumGens)

MENSAJE: KEGG.db contains mappings based on older data because the original resource was removed from the the public domain before the most recent update was produced. This package should now be considered deprecated and future versions of Bioconductor may not have it available. Users who want more current data are encouraged to look at the KEGGREST or reactome.db packages

doKEGGAnalysis'Vignette Info

Function to do KEGG analysis. Inside doKEGGAnalysis.

KEGGParsList <- list()
KEGGPar <- list(fitFileName = "fit.Rda",
                whichContrasts = 1:3,
                comparisonName = "Estudi",
                outputDir = "./ResultsDir",
                anotPackage = "org.Hs.eg", 
                organisme = "mmu",    
                my.IDs = "entrezTable",
                addGeneNames = TRUE,
                fileOfLinks = linksFileName,
                fitMain = NULL,
                cutoffMethod = "unadjusted",
                P.Value.cutoff = rep(0.01, length(wCont)), 
                pvalKEGGterms = 0.05, 
                minNumGens = 0) 

KEGGParsList <- add2parsList (KEGGParsList, KEGGPar)


for(i in 1:length(KEGGParsList))
{
  KEGGList <- BasicP::doKEGGAnalysis(KEGGParsList[i])
}


Merguerrero/BasicP documentation built on May 20, 2019, 2:24 p.m.