inst/doc/using_paxtoolsr.R

## ----knitrSetup, include=FALSE------------------------------------------------
library(knitr)
opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center", tidy=TRUE)


## ----style, include=FALSE, echo=FALSE, results='asis'-------------------------
BiocStyle::markdown()

## ----installPaxtoolsr, eval=FALSE---------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("paxtoolsr")

## ----loadLibrary, message=FALSE, warning=FALSE--------------------------------
library(paxtoolsr)

## ----searchHelp, eval=FALSE, tidy=FALSE---------------------------------------
#  help.search("paxtoolsr")

## ----showHelp, eval=FALSE, tidy=FALSE-----------------------------------------
#  help(graphPc)
#  ?graphPc

## ----paxtoolsMergeExample-----------------------------------------------------
file1 <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr")
file2 <- system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr")

mergedFile <- mergeBiopax(file1, file2)

## ----summarize, results='hold'------------------------------------------------
s1 <- summarize(file1)
s2 <- summarize(file2)
s3 <- summarize(mergedFile)

s1$Catalysis
s2$Catalysis
s3$Catalysis

## ----paxtoolsValidationExample, eval=FALSE------------------------------------
#  errorLog <- validate(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"), onlyErrors=TRUE)

## ----loadSifFromFile----------------------------------------------------------
sif <- toSif(system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr"))

## ----viewSifHead--------------------------------------------------------------
head(sif)

## ----toSifnxExample, tidy=TRUE------------------------------------------------
# Select additional node and edge properties
inputFile <- system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr")

results <- toSifnx(inputFile=inputFile)

## ----downloadSif, eval=FALSE--------------------------------------------------
#  results <- downloadPc2(version="12")

## ----paxtoolsrCache, eval=FALSE-----------------------------------------------
#  Sys.getenv("PAXTOOLSR_CACHE")

## ----searchResultsExample, eval=FALSE-----------------------------------------
#  ## Search Pathway Commons for "glycolysis"-related pathways
#  searchResults <- searchPc(q="glycolysis", type="pathway")

## ----searchResultsExampleVerbose----------------------------------------------
## Search Pathway Commons for "glycolysis"-related pathways
searchResults <- searchPc(q="glycolysis", type="pathway", verbose=TRUE)

## ----searchResultsType--------------------------------------------------------
str(searchResults)

## ----searchResultsXpathExample------------------------------------------------
xpathSApply(searchResults, "/searchResponse/searchHit/name", xmlValue)[1]
xpathSApply(searchResults, "/searchResponse/searchHit/pathway", xmlValue)[1]

## ----convertXmlToDf, message=FALSE--------------------------------------------
library(plyr)
searchResultsDf <- ldply(xmlToList(searchResults), data.frame)

# Simplified results
simplifiedSearchResultsDf <- searchResultsDf[, c("name", "uri", "biopaxClass")]
head(simplifiedSearchResultsDf)

## ----saveBiopaxFileFromPcQuery, message=FALSE, results='hide'-----------------
## Use an XPath expression to extract the results of interest. In this case, the URIs (IDs) for the pathways from the results
tmpSearchResults <- xpathSApply(searchResults, "/searchResponse/searchHit/uri", xmlValue)

## Generate temporary file to save content into
biopaxFile <- tempfile()

## Extract a URI for a pathway in the search results and save into a file  
idx <- which(grepl("panther", simplifiedSearchResultsDf$uri) &
             grepl("glycolysis", simplifiedSearchResultsDf$name, ignore.case=TRUE))
uri <- simplifiedSearchResultsDf$uri[idx]
saveXML(getPc(uri, format="BIOPAX"), biopaxFile)

## ----traverse-----------------------------------------------------------------
# Convert the Uniprot ID to a URI that would be found in Pathway Commons
uri <- paste0("http://identifiers.org/uniprot/P31749")

# Get URIs for only the ModificationFeatures of the protein
xml <- traverse(uri=uri, path="ProteinReference/entityFeature:ModificationFeature")

# Extract all the URIs
uris <- xpathSApply(xml, "//value/text()", xmlValue)

# For the first URI get the modification position and type
tmpXml <- traverse(uri=uris[1], path="ModificationFeature/featureLocation:SequenceSite/sequencePosition")
cat("Modification Position: ", xpathSApply(tmpXml, "//value/text()", xmlValue))

tmpXml <- traverse(uri=uris[1], path="ModificationFeature/modificationType/term")
cat("Modification Type: ", xpathSApply(tmpXml, "//value/text()", xmlValue))

## ----loadGraphLibraries, message=FALSE----------------------------------------
library(igraph)

## ----plotGraph----------------------------------------------------------------
sif <- toSif(system.file("extdata", "biopax3-short-metabolic-pathway.owl", package="paxtoolsr"))

# graph.edgelist requires a matrix
g <- graph.edgelist(as.matrix(sif[,c(1,3)]), directed=FALSE)
plot(g, layout=layout.fruchterman.reingold)

## ----graphQueryExampleSingle, figure.width=20, figure.height=20---------------
gene <- "BDNF"
t1 <- graphPc(source=gene, kind="neighborhood", format="SIF", verbose=TRUE)
t2 <- t1[which(t1[,2] == "controls-state-change-of"),]

# Show only 100 interactions for simplicity
g <- graph.edgelist(as.matrix(t2[1:100, c(1,3)]), directed=FALSE)
plot(g, layout=layout.fruchterman.reingold)

## ----graphQueryExample, fig.width=7, fig.height=7-----------------------------
genes <- c("AKT1", "IRS1", "MTOR", "IGF1R")
t1 <- graphPc(source=genes,
              kind="PATHSBETWEEN",
              format="SIF",
              verbose=TRUE)
t2 <- t1[which(t1[,2] == "controls-state-change-of"),]

# Show only 100 interactions for simplicity
g <- graph.edgelist(as.matrix(t2[1:100, c(1,3)]), directed=FALSE)
plot(g, layout=layout.fruchterman.reingold)

## ----dataOverlayExample, fig.width=7, fig.height=7----------------------------
library(RColorBrewer)

# Generate a color palette that goes from white to red that contains 10 colors
numColors <- 10
colors <- colorRampPalette(brewer.pal(9, "Reds"))(numColors)

# Generate values that could represent some experimental values
values <- runif(length(V(g)$name))

# Scale values to generate indicies from the color palette  
xrange <- range(values)
newrange <- c(1, numColors)

factor <- (newrange[2]-newrange[1]) / (xrange[2]-xrange[1])
scaledValues <- newrange[1] + (values-xrange[1]) * factor
indicies <- as.integer(scaledValues)

# Color the nodes based using the indicies and the color palette created above
g <- set.vertex.attribute(g, "color", value=colors[indicies])

#get.vertex.attribute(h, "color")

plot(g, layout = layout.fruchterman.reingold)

## ----sifNetStats--------------------------------------------------------------
# Degree for each node in the igraph network
degree(g)
#Number of nodes
length(V(g)$name)
#Clustering coefficient
transitivity(g)
#Network density
graph.density(g)
# Network diameter
diameter(g)

## ----sifShortestPath----------------------------------------------------------
# Get the first shortest path between two nodes
tmp <- get.shortest.paths(g, from="IRS1", to="MTOR")

# igraph seems to return different objects on Linux versus OS X for get.shortest.paths()
if(is(tmp[[1]], "list")) {
	path <- tmp[[1]][[1]]	# Linux
} else {
  path <- tmp[[1]] # OS X
}

# Convert from indicies to vertex names
V(g)$name[path]

## ----gseaExampleLibraries, message=FALSE--------------------------------------
library(paxtoolsr) # To retrieve data from Pathway Commons
library(clusterProfiler) # Enrichment analysis
library(org.Hs.eg.db)
library(XML) # To parse XML files

## ----gseaExampleGenGeneSet, results='hide', eval=FALSE------------------------
#  # Generate a Gene Set
#  ## Search Pathway Commons for "glycolysis"-related pathways
#  searchResults <- searchPc(q="glycolysis", type="pathway")
#  
#  ## Use an XPath expression to extract the results of interest. In this case, the URIs (IDs) for the pathways from the results
#  searchResults <- xpathSApply(searchResults, "/searchResponse/searchHit/uri", xmlValue)
#  
#  ## Generate temporary files to save content into
#  biopaxFile <- tempfile()
#  
#  ## Extract the URI for the first pathway in the search results and save into a file
#  uri <- searchResults[2]
#  saveXML(getPc(uri, "BIOPAX"), biopaxFile)

## ----gseaExampleGenGeneSetSave, results='hide'--------------------------------
## Generate temporary files to save content into
gseaFile <- tempfile()

## Generate a gene set for the BioPAX pathway with gene symbols
### NOTE: Not all search results are guaranteed to result in gene sets
tmp <- toGSEA(biopaxFile, gseaFile, "HGNC Symbol", FALSE)
geneSet <- tmp$geneSet

## ----gseaExample--------------------------------------------------------------
library(clusterProfiler)

# Example gene list at the end of some end analysis 
geneList <- c("ALDOA", "ENO1", "GAPDH", "GPI", "HK1", "PFKL", "PGK1", "PKM")

# Read Pathway Commons V12 KEGG dataset inluded with package
gmt <- readGmt(system.file("extdata", "test_PathwayCommons12.kegg.hgnc.gmt", package = "paxtoolsr"), returnInfo = TRUE)  

geneSetList <- lapply(seq_along(gmt), function(x, n, i) { 
  tmp <- x[[i]]
  data.frame(id=n[i], name=tmp[["name"]], gene=tmp[["geneSet"]], stringsAsFactors=FALSE)
}, x=gmt, n=names(gmt))

tmp <- do.call("rbind", geneSetList)
rownames(tmp) <- 1:nrow(tmp) # For convenience 

pc2gene <- tmp[, c("id", "gene")]
pc2name <- tmp[, c("id", "name")]

enrichOutput <- clusterProfiler::enricher(geneList, pvalueCutoff=0.05, minGSSize=10, maxGSSize=500, TERM2GENE=pc2gene, TERM2NAME=pc2name)
enrichOutput@result

## ----idMapping, eval=FALSE----------------------------------------------------
#  sif <- toSif(system.file("extdata", "raf_map_kinase_cascade_reactome.owl", package="paxtoolsr"))
#  
#  ids <- c(sif$PARTICIPANT_A, sif$PARTICIPANT_B)
#  
#  output <- clusterProfiler::bitr(ids, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
#  output

## ----filePathExample, eval=FALSE----------------------------------------------
#  toSif("/directory/file")
#  #or
#  toSif("X:\\directory\\file")

## ----changeHeapSize, eval=FALSE-----------------------------------------------
#  options(java.parameters="-Xmx1024m")
#  
#  library(paxtoolsr)
#  
#  # Megabyte size
#  mbSize <- 1048576.0
#  
#  runtime <- .jcall("java/lang/Runtime", "Ljava/lang/Runtime;", "getRuntime")
#  maxMemory <- .jcall(runtime, "J", "maxMemory")
#  maxMemoryMb <- maxMemory / mbSize
#  cat("Max Memory: ", maxMemoryMb, "\n")

## ----sessionInfo--------------------------------------------------------------
sessionInfo()

Try the paxtoolsr package in your browser

Any scripts or data that you put into this service are public.

paxtoolsr documentation built on Nov. 8, 2020, 8:29 p.m.