Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
library(PhyloProfile),
collapse = TRUE,
comment = "#>",
dev = 'png',
crop = NULL
)
## ---- echo = FALSE, results = 'asis'------------------------------------------
data("mainLongRaw", package="PhyloProfile")
knitr::kable(head(mainLongRaw, 10), row.names = FALSE)
## ---- echo=FALSE, fig.cap="PhyloProfile's GUI", out.width = '100%'------------
knitr::include_graphics("./posterSub.png")
## ---- fig.show='hold', dev='png'----------------------------------------------
### An example for plotting the clustered profiles tree.
### See ?getDendrogram for more details.
#' Load built-in data
data("finalProcessedProfile", package="PhyloProfile")
data <- finalProcessedProfile
#' Calculate distance matrix
#' Check ?getDistanceMatrix
profileType <- "binary"
profiles <- getDataClustering(
data, profileType, var1AggregateBy, var2AggregateBy)
method <- "mutualInformation"
distanceMatrix <- getDistanceMatrix(profiles, method)
#' Create clustered profile tree
clusterMethod <- "complete"
dd <- clusterDataDend(distanceMatrix, clusterMethod)
getDendrogram(dd)
## -----------------------------------------------------------------------------
### An example for calculating gene age for the built-in data set.
### See ?estimateGeneAge for more details.
#' Load built-in data
data("fullProcessedProfile", package="PhyloProfile")
#' Choose the working rank and the reference taxon
rankName <- "class"
refTaxon <- "Mammalia"
#' Count taxa within each supertaxon
taxonIDs <- levels(as.factor(fullProcessedProfile$ncbiID))
sortedInputTaxa <- sortInputTaxa(
taxonIDs, rankName, refTaxon, NULL
)
taxaCount <- plyr::count(sortedInputTaxa, "supertaxon")
#' Set cutoff for 2 additional variables and the percentage of present species
#' in each supertaxon
var1Cutoff <- c(0,1)
var2Cutoff <- c(0,1)
percentCutoff <- c(0,1)
#' Estimate gene age
estimateGeneAge(
fullProcessedProfile, taxaCount, rankName, refTaxon,
var1Cutoff, var2Cutoff, percentCutoff
)
## -----------------------------------------------------------------------------
### An example for calculating core gene set for the built-in data set.
### See ?getCoreGene for more details.
#' Load built-in data
data("fullProcessedProfile", package="PhyloProfile")
#' Choose the working rank and a set of taxa of interest
rankName <- "class"
refTaxon <- "Mammalia"
taxaCore <- c("Mammalia", "Saccharomycetes", "Insecta")
#' Set cutoff for 2 additional variables and the percentage of present species
#' in each supertaxon
var1Cutoff <- c(0.75, 1.0)
var2Cutoff <- c(0.75, 1.0)
percentCutoff <- c(0.0, 1.0)
#' Set core coverage, the % of taxa that must be present in the selected set
coreCoverage <- 1
#' Count taxa within each supertaxon
taxonIDs <- levels(as.factor(fullProcessedProfile$ncbiID))
sortedInputTaxa <- sortInputTaxa(
taxonIDs, rankName, refTaxon, NULL
)
taxaCount <- plyr::count(sortedInputTaxa, "supertaxon")
#' Identify core genes
getCoreGene(
rankName,
taxaCore,
fullProcessedProfile,
taxaCount,
var1Cutoff, var2Cutoff,
percentCutoff, coreCoverage
)
## -----------------------------------------------------------------------------
#' Load built-in data
data("mainLongRaw", package="PhyloProfile")
data <- mainLongRaw
#' choose the in-group taxa
inGroup <- c("ncbi9606", "ncbi10116")
#' choose variable to be compared
variable <- colnames(data)[4]
#' compare the selected variable between the in-group and out-group taxa
compareTaxonGroups(data, inGroup, TRUE, variable, 0.05)
## ---- fig.show='hold', dev='png'----------------------------------------------
### An example for plotting the distribution of the 1st additional variable.
### See ?createVarDistPlot for more details.
#' Load built-in data
data("mainLongRaw", package="PhyloProfile")
#' Process data for distribution analysis
#' See ?createVariableDistributionData
data <- createVariableDistributionData(
mainLongRaw, c(0, 1), c(0.5, 1)
)
head(data, 6)
#' Choose a variable for plotting and set the variable name
varType <- "var1"
varName <- "Variable 1"
#' Set cutoff for the percentage of present species in each supertaxon
percentCutoff <- c(0,1)
#' Set text size
distTextSize <- 12
#' Create distribution plot
createVarDistPlot(
data,
varName,
varType,
percentCutoff,
distTextSize
)
## -----------------------------------------------------------------------------
#' Load built-in data
#' If input data is in other format (e.g. fasta, OrthoXML, or wide matrix),
#' see ?createLongMatrix
rawInput <- system.file(
"extdata", "test.main.long", package = "PhyloProfile", mustWork = TRUE
)
#' Set working rank and the reference taxon
rankName <- "class"
refTaxon <- "Mammalia"
#' Input a user-defined taxonomy tree to replace NCBI taxonomy tree (optional)
taxaTree <- NULL
#' Choose how to aggregate the additional variables when pocessing the data
#' into supertaxon
var1AggregateBy <- "max"
var2AggregateBy <- "mean"
#' Set cutoffs for for percentage of species present in a supertaxon,
#' allowed number of co orthologs, and cutoffs for the additional variables
percentCutoff <- c(0.0, 1.0)
coorthologCutoffMax <- 10
var1Cutoff <- c(0.75, 1.0)
var2Cutoff <- c(0.5, 1.0)
#' Choose the relationship of the additional variables, if they are related to
#' the orthologous proteins or to the species
var1Relation <- "protein"
var2Relation <- "species"
#' Identify categories for input genes (by a mapping tab-delimited file)
groupByCat <- FALSE
catDt <- NULL
#' Process the input file into a phylogenetic profile data that contains
#' taxonomy information and the aggregated values for the 2 additional variables
profileData <- fromInputToProfile(
rawInput,
rankName,
refTaxon,
taxaTree,
var1AggregateBy,
var2AggregateBy,
percentCutoff,
coorthologCutoffMax,
var1Cutoff,
var2Cutoff,
var1Relation,
var2Relation,
groupByCat,
catDt
)
head(profileData)
## ---- fig.show='hold', dev='png'----------------------------------------------
#' Load built-in processed data
data("superTaxonProfile", package="PhyloProfile")
#' Create data for plotting
plotDf <- dataMainPlot(superTaxonProfile)
#' You can also choose a subset of genes and/or taxa for plotting with:
#' selectedTaxa <- c("Mammalia", "Echinoidea", "Gunneridae")
#' selectedSeq <- "all"
#' plotDf <- dataCustomizedPlot(
#' superTaxonProfile, selectedTaxa, selectedSeq
#' )
#' Identify plot's parameters
plotParameter <- list(
"xAxis" = "taxa",
"var1ID" = "FAS_FW",
"var2ID" = "FAS_BW",
"lowColorVar1" = "#FF8C00",
"highColorVar1" = "#4682B4",
"lowColorVar2" = "#FFFFFF",
"highColorVar2" = "#F0E68C",
"paraColor" = "#07D000",
"xSize" = 8,
"ySize" = 8,
"legendSize" = 8,
"mainLegend" = "top",
"dotZoom" = 0,
"xAngle" = 60,
"guideline" = 0,
"colorByGroup" = FALSE
)
#' Generate profile plot
heatmapPlotting(plotDf, plotParameter)
#' To highlight a gene and/or taxon of interest
taxonHighlight <- "Mammalia"
rankName <- "class"
geneHighlight <- "none"
#' Then use ?highlightProfilePlot function
# highlightProfilePlot(
# plotDf, plotParameter, taxonHighlight, rankName, geneHighlight
# )
## ---- fig.show='hold', dev='png'----------------------------------------------
#' Load protein domain architecture file
domainFile <- system.file(
"extdata", "domainFiles/101621at6656.domains",
package = "PhyloProfile", mustWork = TRUE
)
#' Identify IDs of gene of interest and its ortholog partner
seedID <- "101621at6656"
orthoID <- "101621at6656|AGRPL@224129@0|224129_0:001955|1"
info <- c(seedID, orthoID)
#' Get data for 2 selected genes from input file
domainDf <- parseDomainInput(seedID, domainFile, "file")
#' Generate plot
plot <- createArchiPlot(info, domainDf, 9, 9)
grid::grid.draw(plot)
## -----------------------------------------------------------------------------
#' Load raw input
data("mainLongRaw", package="PhyloProfile")
inputDf <- mainLongRaw
#' Set working rank and the reference taxon
rankName <- "phylum"
#' Get taxonomy IDs and names for the input data
inputTaxonID <- getInputTaxaID(inputDf)
inputTaxonName <- getInputTaxaName(rankName, inputTaxonID)
head(inputTaxonName)
## -----------------------------------------------------------------------------
#' Get list of taxon IDs and names for input profile
data("mainLongRaw", package="PhyloProfile")
inputDf <- mainLongRaw
rankName <- "phylum"
inputTaxonID <- getInputTaxaID(inputDf)
#' Input a user-defined taxonomy tree to replace NCBI taxonomy tree (optional)
inputTaxaTree <- NULL
#' Sort taxonomy list based on a selected refTaxon
refTaxon <- "Microsporidia"
sortedTaxonomy <- sortInputTaxa(
taxonIDs = inputTaxonID,
rankName = rankName,
refTaxon = refTaxon,
taxaTree = inputTaxaTree
)
head(
sortedTaxonomy[
, c("ncbiID", "fullName", "supertaxon", "supertaxonID", "rank")
]
)
## -----------------------------------------------------------------------------
citation("PhyloProfile")
## ----sessionInfo, echo = FALSE------------------------------------------------
sessionInfo(package = "PhyloProfile")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.