title: "Basic examples of using BasicP functions"
author: "Mercedes Guerrero and Alex Sanchez"
date: "r Sys.Date()
"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Basic examples of using BasicP functions}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
require(devtools) install("/home/mguerrero/Dropbox/Practiques-Grau_Biotec-UVic_Mercedes_Guerrero/BasicP", dependencies=TRUE) install_git("https://github.com/alexsanchezpla/links2File.git") library(BasicP) library(links2File) require(affy) require(oligo) #ExonStudy require(limma) require(gplots)
The goal of this Vignette is to illustrate the use of some functions exported from the package.
readOrLoad.rawData'Vignette Info
This function reads data from cel files or loads previously saved data.
readCELS <- TRUE my.targets <- "./celfiles/targets.txt" targets<- read.table("./celfiles/targets.txt", head=TRUE, sep="\t", row.names = 1) my.fileNames <-paste("./celfiles/",rownames(targets),sep="") rawDataFileName <- "rawData.Rda" my.outputDir <- "." isExonStudy <- FALSE orgPackage <- "org.Hs.eg" rawData <- BasicP::readOrLoad.RawData(readCELS = readCELS, phenoDat = my.targets, fileNames = my.fileNames, dataFName =rawDataFileName, outputDir = my.outputDir, exonSt = isExonStudy) rawData
createOrLoadAnnotations'Vignette Info
This function creates annotation files or loads previously saved data.
loadAnnotations <- FALSE chipPackAvailable <- TRUE platformDesignPackAvailable <- FALSE chipPackage <- "hgu133a2" platformDesignPackage <- NULL outputDir <- "./ResultsDir" annotationsFileName <- "Annotations" entrezTableFileName <-"Entrezs.Rda" symbolsTableFileName <-"Symbols.Rda" controlsTableFileName <- "controls.Rda" anotacions <- BasicP::createOrLoadAnnotations (loadAnnotations= loadAnnotations, chipPackAvailable = chipPackAvailable, platformDesignPackAvailable = platformDesignPackAvailable,chipPackage = chipPackage, platformDesignPackage = platformDesignPackage, outputDir = outputDir,annotationsFileName = annotationsFileName,entrezTableFileName = entrezTableFileName, symbolsTableFileName = symbolsTableFileName, controlsTableFileName = controlsTableFileName) summary(anotacions)
normalization'Vignette Info
function to normalize the raw data.
load("rawData.Rda") rawData <- my.raw normMethod <- "RMA" my.targets <- read.AnnotatedDataFrame("./celfiles/targets.txt", header = TRUE, row.names = 1) celFilesDir <-"./celfiles" loadFile <- FALSE normalized.eset.FileName <- "normalizedData.Rda" outputDir <- "./ResultsDir" exonStudy <- FALSE eset_norm <- BasicP::normalization(my.data = rawData, method = normMethod, targetsinfo = my.targets, inputDir = celFilesDir, loadFile = loadFile , normalizedFName = normalized.eset.FileName, outputDir = outputDir, exonSt = exonStudy) head(eset_norm)
normplots2File'Vignette Info
Function that makes a report of the normalization
load("./ResultsDir/normalizedData.Rda") repes <- duplicated(exprs(my.norm), MARGIN=1) exprs(my.norm) <- exprs(my.norm)[!repes,] eset_norm <- my.norm my.colors <- rainbow(length(sampleNames(eset_norm))) my.names <- pData(eset_norm)$ShortName myCex<- 0.8 dim3 <- FALSE fileName <- "NormalizedPlots.pdf" outputDir <- "./ResultsDir" PCAPlots <- TRUE csv <- "csv2" BasicP::normplots2File(my.data = eset_norm, sampleNames = my.names, my.colors = my.colors, my.groups = pData(eset_norm)$Group, my.method = "average",my.cex = myCex ,posText = 2, dim3 = FALSE,fileName = fileName, outputDir = outputDir,PCAPlots = TRUE, csv = fileType)
filterData'Vignette Info
Function to filter the data
load("./ResultsDir/normalizedData.Rda") repes <- duplicated(exprs(my.norm), MARGIN=1) exprs(my.norm) <- exprs(my.norm)[!repes,] eset_norm <- my.norm load("./ResultsDir/controls.Rda") removeNAs <- TRUE load("./ResultsDir/Entrezs.Rda") entrezs <- entrezTable SignalFilter <- TRUE signalThreshold <- 50 signalFilter.Function <- "filt.by.Signal" signalThreshold.as.percentage <- TRUE VarFilter <- TRUE variabilityThreshold <- 50 variability.Function <- "sdf" variabilityThreshold.as.percentage <- TRUE pairing.Function <- NULL my.targets <-read.AnnotatedDataFrame("./celfiles/targets.txt", header = TRUE, row.names = 1) doReport <- TRUE outputDir <- "./ResultsDir" FilteringReportFileName <- "FilteringReport.txt" exprs.filtered <- BasicP::filterData(expres = exprs(eset_norm),controls = names(controlsTable),removeNAs = TRUE, entrezs = entrezs ,bySignal = SignalFilter,signalThr = signalThreshold, grups = pData(eset_norm)$grupo, sigFun.Name = signalFilter.Function, sigThr.as.perc = signalThreshold.as.percentage, byVar = VarFilter, variabilityThr = variabilityThreshold, varFun.Name = variability.Function, varThr.as.perc = variabilityThreshold.as.percentage, pairingFun.Name = pairing.Function, targets = my.targets, doReport = doReport, outputDir = outputDir, filteringReportFName = FilteringReportFileName) head(exprs.filtered)
saveData'Vignette Info
Function to save R objects in files.
load("./ResultsDir/normalizedData.Rda") repes <- duplicated(exprs(my.norm), MARGIN=1) exprs(my.norm) <- exprs(my.norm)[!repes,] eset_norm <- my.norm normalized.all.FileName <- "normalized.all" fileType <-"csv2" symbolsTable <- load("./ResultsDir/Symbols.Rda") expres.all.FileName <- "expres.Rda" linksFileName <- "Links.txt" outputDir <- "./ResultsDir" BasicP::saveData(expres = exprs(eset_norm), expres.csv.FileName = normalized.all.FileName, csvType=fileType, description = "Normalized values for all genes", anotPackage = NULL, symbolsVector = symbolsTable, SYMBOL = "SYMBOL", expres.bin.FileName = expres.all.FileName, linksFile = linksFileName, outputDir = outputDir) BasicP::saveData(expres = exprs.filtered, expres.csv.FileName = "normalized.filtered", csvType = fileType, description = "Normalized values for filtered genes", anotPackage = NULL, symbolsVector = symbolsTable, SYMBOL = "SYMBOL", expres.bin.FileName = "exprs.filtered.Rda", linksFile = linksFileName, outputDir = outputDir)
lmAnalysis'Vignette Info
Setting design and contrasts matrix to proceed with the linear model analysis.
targets <- read.table(file ="./celfiles/targets.txt" , header = TRUE, sep = "\t") column2design<- 5 lev <- targets[,column2design] design <- model.matrix( ~ 0 + lev) colnames(design) <- levels(lev) rownames(design) <- targets$ShortName numParameters <- ncol(design) print(design) cont.matrix<-makeContrasts(AvsB= (A -B), AvsL= (A-L), BvsL=(B-L),levels=design) print(cont.matrix)
Linear model analysis.Function included in dolmAnalysis.
load("./ResultsDir/exprs.filtered.Rda") contrasts2test <- 1:ncol(cont.matrix) anotPackage = NULL comparison = "Estudi" outputDir = "./ResultsDir" ENTREZIDs = "entrezTable" SYMBOLIDs = "symbolsTable" linksFile = "Links.txt" fitFileName = "fit.Rda" csvType= "csv" rows2HTML= NULL anotFileName <- "Annotations" runMulticore = 0 toTIFF= FALSE fitMain <- BasicP::lmAnalysis(exprs.filtered = exprs.filtered, design = design, cont.matrix = cont.matrix, contrasts2test = contrasts2test, anotPackage = anotPackage, outputDir = outputDir, comparison = comparison, Expressions_And_Top = TRUE , showParams = FALSE , use.dupCorr = FALSE, block = NULL, nDups = 1 , ENTREZIDs = ENTREZIDs, SYMBOLIDs = SYMBOLIDs, linksFile = linksFile,fitFileName = fitFileName , csvType=csvType, rows2HTML = NULL, anotFileName = anotFileName) fitMain
doLmAnalysis'Vignette Info
Function to analyze the linerar model with the different parameters.
lmParsList <- list() Estudi <- list(dades = NULL, expresFileName = "exprs.filtered.Rda", targets = targets, designMat = design, contMat = cont.matrix, whichContrasts = 1:ncol(cont.matrix), anotPack = NULL, outputDir = outputDir, ExpressionsAndTop = TRUE, showLmParams = FALSE, use.dupCorr = FALSE, block = NULL, nDups = 1, comparisonName = comparison, ENTREZIDs = "entrezTable", SYMBOLIDs = "symbolsTable", fileOfLinks = linksFile, fitFileName = fitFileName, csvType=csvType, rows2HTML = NULL, anotFilename = anotFileName ) lmParsList <- add2parsList(lmParsList, Estudi) for(ix in 1:length(lmParsList)) { fit.Main <- BasicP::doLmAnalysis(lmParsList[ix]) } fit.Main
GeneAnnotation'Vignette Info
Function that annotates genes. Inside doGeneAnnotation
genes2annotate <- entrezs[unique(rownames(fitMain$p.value))] genesAnnotated <-BasicP::GeneAnnotation(egIDs = genes2annotate, anotPackage = "org.Hs.eg", toHTML = TRUE, outputDir = outputDir, filename = "Annotations", myTitle = "Annotations for all genes analyzed", specie = "homo sapiens", info2show = c( "Affymetrix", "EntrezGene", "GeneSymbol", "GeneName", "KEGG", "GO"), linksFile = linksFile, maxGenes = NULL)
doGeneAnnotation'Vignette Info
Function to annotate the genes.
organisme = "hsa" anotList <- list(fitMain = NULL, fitFileName = fitFileName, my.IDs = "entrezTable", anotPackage = "org.Hs.eg", toHTML = TRUE, outputDir = outputDir, anotFilename = "Annotations", titleAnotations = "Annotations for all genes analyzed", specie = "homo sapiens", info2show = c( "Affymetrix", "EntrezGene", "GeneSymbol", "GeneName", "KEGG", "GO"), linksFile = linksFile, numGenesPerPage = NULL) BasicP::doGeneAnnotation(anotList)
multipleComp'Vignette Info
Function to make multiple comparations. Inside doMultCompAnalysis.
compName <- c("Group1") wCont <- 1:3 pValCutOff <- c(0.01) adjMethod <- c("none") minLogFoldChange <- c(1) load("./ResultsDir/Symbols.Rda") titleText = paste("for", ifelse(adjMethod=="none","p-values","adj. p-values"), "<", pValCutOff, "and |logFC| >", minLogFoldChange, sep = " ") geneListFName = paste("geneList",compName,ifelse(adjMethod=="none","pvalues","adj-pvalues"),"LT",pValCutOff,"Rda",sep = ".") geneList <- BasicP::multipleComp(fitMain = fitMain, whichContrasts = wCont, comparisonName = compName, titleText = titleText, outputDir = outputDir, anotPackage = "org.Hs.eg", my.symbols = symbolsTable, linksFile = linksFile, multCompMethod = "separate", adjustMethod = adjMethod, selectionType = "any", P.Value.cutoff = pValCutOff, plotVenn = TRUE, colsVenn = NULL, vennColors = c("red","yellow","green","blue","pink") , cexVenn = 1, geneListFName=geneListFName, csvType=csvType, minLFC=minLogFoldChange) head(geneList,n=20)
doMultCompAnalysis'Vignette Info
Function to perform Multi comparations analysis.
mcParsList <- list() compName <- c("Group1", "Group2") wCont <- 1:ncol(cont.matrix) pValCutOff <- c(0.01, 0.01) adjMethod <- c("none", "none") minLogFoldChange <- c(1, 1) for(i in 1:length(compName)) { mci <- list(fitMain = fitMain, fitFileName = fitFileName, whichContrasts = wCont[[i]], comparisonName = compName[i], titleText = paste("for",ifelse(adjMethod[i]=="none", "p-values","adj. p-values"), "<", pValCutOff[i], "and |logFC| >", minLogFoldChange[i], sep = " "), anotPackage = "org.Hs.eg", my.symbols = symbolsTable, outputDir = outputDir, fileOfLinks = linksFile, multCompMethod = "separate", adjustMethod = adjMethod[i], selectionType = "any", P.Value.cutoff = pValCutOff[i], plotVenn = TRUE, colsVenn = NULL, vennColors= c("red","yellow","green","blue","pink"), cexVenn = 1, geneListFName = paste("geneList",compName[i], ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"), "LT",pValCutOff[i],"Rda",sep = "."), minLogFC = minLogFoldChange[i], csvType = csvType) mcParsList <- add2parsList(mcParsList, mci) } for(ix in 1:length(mcParsList)) { geneList.MCi <- BasicP::doMultCompAnalysis(mcParsList[ix]) } head(geneList.MCi,n=20)
clusterAnalysis'Vignette Info
Function to draw the clusters.Inside doClusterAnalysis.
s2clust <- which(as.logical(apply(design[,as.logical(apply(abs(as.matrix(cont.matrix[,wCont[[i]]])),1,sum))],1,sum))) geneListFName = paste("geneList", compName[i], ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"), "LT", pValCutOff[i], "Rda", sep = ".") pal <- colorpanel(n = 32, low = "green", mid = "white", high = "magenta") load("./ResultsDir/geneList.Group1.pvalues.LT.0.01.Rda") clust <- BasicP::clusterAnalysis(expres = exprs.filtered, genes = geneList, samples = s2clust, sampleNames = as.character(targets$ShortName)[s2clust], comparisonName = "Compar 1", anotPackage = "org.Hs.eg", my.symbols = symbolsTable, outputDir = outputDir, fileOfLinks = linksFile, numClusters = 2, rowDistance = NULL, colDistance = NULL, RowVals = TRUE, ColVals = FALSE, escala = "row", colorsSet = pal, densityInfo = "density", colsForGroups = c("pink","pink","pink","pink","pink","blue","blue","blue","blue","blue"), cexForColumns = 0.8, cexForRows = 0.8, Title = paste("Compar 1 with", ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"), "<", pValCutOff[i], ifelse(minLogFoldChange[i]==0, "", paste("\n and |logFC|>=", minLogFoldChange[i], sep=""))), csvType = "csv2") summary(clust)
doClusterAnalysis'Vignette Info
Function to draw the clusters.
clustParsList <- list() for(i in 1:length(compName)) { clustPar <- list(expres = NULL, expresFileName = "exprs.filtered.Rda", geneListFName = paste("geneList", compName[i], ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"), "LT", pValCutOff[i], "Rda", sep = "."), genes2cluster = NULL, samples2cluster = s2clust, sampleNames = as.character(targets$ShortName)[s2clust], comparisonName = compName[i], anotPackage = "org.Hs.eg", my.symbols = symbolsTable, outputDir = outputDir, fileOfLinks = linksFile, numClusters = 2, rowDistance = NULL, colDistance = NULL, RowVals = TRUE, ColVals = FALSE, escala = "row", colorsSet = pal, densityInfo = "density", colsForGroups = c("pink","pink","pink","pink","pink","blue","blue","blue","blue","blue"), cexForColumns = 0.8, cexForRows = 0.8, Title = paste(compName[i], "with", ifelse(adjMethod[i]=="none","pvalues","adj-pvalues"), "<", pValCutOff[i], ifelse(minLogFoldChange[i]==0, "", paste("\n and |logFC|>=", minLogFoldChange[i], sep=""))), paste("Comparison:", compName[i], sep=" "), csvType = csvType) clustParsList <- add2parsList(clustParsList, clustPar) } for(ix in 1:length(clustParsList)) { hm.Estudi <- BasicP::doClusterAnalysis(clustParsList[ix]) } summary(hm.Estudi)
GOAnalysis'Vignette Info
Function to carry out the GO analysis. Inside doGOAnalysis
library(GOstats) GOResult<-BasicP::GOAnalysis(fitMain = fitMain, whichContrasts = wCont, comparison.Name = "Estudi", outputDir = outputDir, anotPackage = "org.Hs.eg", my.IDs = entrezTable, addGeneNames = TRUE, fileOfLinks = linksFile, thrLogFC = 1, cutoffMethod = "adjusted", P.Value.cutoff = rep(0.05, length(wCont)), pval = 0.05, min.count = 3, ontologias = c("MF", "BP", "CC"), testDirections = c("over", "under"), minNumGens = 0)
ERROR: No results met the specified criteria. Returning 0-row data.frame
doGOAnalysis'Vignette Info
Function to carry out the GO analysis.
GOParsList <-list() GOPar <- list(fitFileName = fitFileName, whichContrasts = wCont, comparisonName = "Estudi", anotPackage = "org.Hs.eg", my.IDs = "entrezTable", addGeneNames = TRUE, outputDir = outputDir, fileOfLinks = linksFile, fitMain = NULL, cutoffMethod = "adjusted", P.Value.cutoff = rep(0.05, length(wCont)), pvalGOterms = 0.05, min.count = 3, ontologias = c("MF", "BP", "CC"), testDirections = c("over", "under"), minNumGens = 0) GOParsList <- add2parsList(GOParsList, GOPar) if(runMulticore == 2 || runMulticore == 3){ foreach(ix = 1:length(GOParsList)) %dopar% { GOList <- BasicP::doGOAnalysis (GOParsList[ix]) } }else{ for(ix in 1:length(GOParsList)) { GOList <- BasicP::doGOAnalysis (GOParsList[ix]) } }
KEGGAnalysis'Vignette Info
Function to do KEGG analysis. Inside doKEGGAnalysis.
load("./ResultsDir/fit.Rda") wCont <- 1:3 comparison.Name <-"Estudi" outputDir <- "./ResultsDir" orgPackage <-"org.Hs.eg" organisme <-"hsa" load("./ResultsDir/Entrezs.Rda") addGeneNames <- TRUE linksFileName <- "Links.txt" cutoffMethod <-"unadjusted" pval <- 0.05 minNumGens <-0 runMulticore <- 0 KEGGResult <- BasicP::KEGGAnalysis(fitMain = fit.main, whichContrasts = wCont, comparison.Name = comparison.Name, outputDir = outputDir, anotPackage = orgPackage, organisme = organisme, my.IDs = entrezTable, addGeneNames = addGeneNames, fileOfLinks = linksFileName, cutoffMethod = cutoffMethod, P.Value.cutoff = rep(0.05, length(wCont)), pval = pval,thrLogFC = 1, minNumGens = minNumGens)
MENSAJE: KEGG.db contains mappings based on older data because the original resource was removed from the the public domain before the most recent update was produced. This package should now be considered deprecated and future versions of Bioconductor may not have it available. Users who want more current data are encouraged to look at the KEGGREST or reactome.db packages
doKEGGAnalysis'Vignette Info
Function to do KEGG analysis. Inside doKEGGAnalysis.
KEGGParsList <- list() KEGGPar <- list(fitFileName = "fit.Rda", whichContrasts = 1:3, comparisonName = "Estudi", outputDir = "./ResultsDir", anotPackage = "org.Hs.eg", organisme = "mmu", my.IDs = "entrezTable", addGeneNames = TRUE, fileOfLinks = linksFileName, fitMain = NULL, cutoffMethod = "unadjusted", P.Value.cutoff = rep(0.01, length(wCont)), pvalKEGGterms = 0.05, minNumGens = 0) KEGGParsList <- add2parsList (KEGGParsList, KEGGPar) for(i in 1:length(KEGGParsList)) { KEGGList <- BasicP::doKEGGAnalysis(KEGGParsList[i]) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.