##############################
## Sven Nelson ##
## 3/12/2015 ##
## Function: countTFs ##
##############################
countTFs <- function(geneListA, geneListB, A, B, labA, labB, title, latexTable, noPlot) {
# TFplot(TFcounts(DvsWTdryset$UP), TFcounts(DvsWTdryset$DN))
TFplot(TFcounts(geneListA), TFcounts(geneListB),A, B, labA, labB, title, latexTable, noPlot)
}
TFcounts <- function(geneList, TFfile = "default") {
# geneList is a list of AT numbers (not case sensitive)
geneList <- toupper(geneList) # removes case-sensitivity
## Example code: UP and DOWN
# ARvsD12h_UP_TFs <- TFcounts(ARvsD12hset$UP)
# ARvsD12h_DN_TFs <- TFcounts(ARvsD12hset$DN)
# This version takes a geneList of differentially expressed genes and returns
# a dataframe with number of hits per TF type for easy plotting
# Future versions may include the ability to return the list of genes in a given type
## Load TFfile ##
# TFsWithType <- utils::read.table(file.choose(), header=TRUE, sep="\t")
# save(TFsWithType, file = "TFsWithType.RData")
if (TFfile == "default") {
#load(file = "myAnnot.RData")
utils::data("TFsWithType")
} else {
# Please ensure that TFsWithType.tsv is in your working directory or TFsWithType exists
if(is.null(TFsWithType)) {
TFsWithType <- utils::read.table(TFfile, header=TRUE, sep="\t")
}
if(is.null(TFsWithType)) {
writeLines("\nUnable to locate 'TFfile' file. Please check the path.")
}
}
# Subset TFsWithType to a dataframe that only included genes in geneList with TF family
TFhits <- subset(TFsWithType, Protein_ID %in% geneList)
# Lists the different TF families present in geneList
familiesRepresented <- unique(TFhits$Family)
# Start by creating a data frame with 1 column for each TF family category
TFfamilyCounts <- data.frame(matrix(NA, nrow = 1, ncol = length(familiesRepresented)))
colnames(TFfamilyCounts) <- familiesRepresented
# Fill in the dataframe with the number of hits in each TF family
for(i in 1:length(TFfamilyCounts[1,])) {
#TFfamilyCounts[,i] <- countHits(TFhits, familiesRepresented[i])
TFfamilyCounts[,i] <- nrow(subset(TFhits, Family %in% familiesRepresented[i]))
# Here is would be easy to modify this code to return a table of hits
# subset(TFhits, familiesRepresented[i] %in% Family) # make use of this code
}
# returns a data.frame of counts for each TF family
return(TFfamilyCounts)
}
### Takes two TFcounts objects and plots them using ggplot2
TFplot <- function(TFcountsA, TFcountsB, A = "UPreg", B = "DOWNreg", labA = A, labB = B, title = "TF families", latexTable=F, noPlot=F) {
## Example code: UP and DOWN
# TFplot(ARvsD12h_UP_TFs, ARvsD12h_DN_TFs)
## Example code: diffA and diffB
# TFplot(ARvsD12h_TFs, WTvsD12h_TFs,A="ARvsD12h",B="WTvsD12h")
#### Preparing data ####
# Use TFcounts to define TFcounts:
# TFcountsA => UP, TFcountsB => DOWN
# Make combined list of colnames (unique), ordered alphabetically
combinedNames <- sort(unique(c(colnames(TFcountsA),colnames(TFcountsB)))) # combined list
# Create a combined dataframe
TFcluster.df <- data.frame(matrix(NA, nrow = length(combinedNames), ncol = 3))
rownames(TFcluster.df) <- combinedNames
colnames(TFcluster.df) <- c("Family","A","B")
TFcluster.df$Family <- combinedNames
# Fill in the dataframe with the number of hits in each TF family
for(i in 1:length(combinedNames)) {
#print(combinedNames)
if (combinedNames[i] %in% colnames(TFcountsA)) {
#print("enteredA")
TFcluster.df$A[i] <- TFcountsA[1,combinedNames[i]]
}
if (combinedNames[i] %in% colnames(TFcountsB)) {
#print("enteredB")
TFcluster.df$B[i] <- TFcountsB[1,combinedNames[i]]
}
}
#print(TFcluster.df)
# Add code here to output a latex table (replace NAs with 0s)
if(latexTable==TRUE) { # p-values get cut off, need to be rounded...
TFclust <- TFcluster.df
TFclust$A[is.na(TFclust$A)] <- 0
TFclust$B[is.na(TFclust$B)] <- 0
TFtable <- TFclust[,2:3]
#require(xtable)
writeLines("\\documentclass[border={(0.5pt) (0.8pt) (1pt) (1pt)}]{standalone}
\\begin{document}
\\SweaveOpts{concordance=TRUE}\n")
xtable::print.xtable(xtable::xtable(TFtable,digits = c(0,0,0),floating=FALSE))
writeLines("\\end{document}")
#print(tab,type="html")
}
#if (returnTable) {
# return(TFcluster.df)
#} # nothing after this point will be run if table was returned
if(!noPlot) {
#require(reshape) # for melt
TFcluster.long <- reshape::melt(TFcluster.df,
## ID variables:
# variables to keep but not split apart on
id.vars="Family",
# Measure variables: the source columns
measure.vars=c("B","A"),
# Name of the destination column that
# will identify the original
# column that the measurement came from
variable_name="Comparison"
)
# Reorder the data by Ontology
TFcluster.long$Family <- factor(TFcluster.long$Family, levels = combinedNames)
##require(scales) # For percent_format() in plot
#require(ggplot2)
if(A == "UPreg" & B == "DOWNreg") { # UP => pinkish and DOWN => blueish
colorA <- "#E82A76" # UP => pinkish
colorB <- "#3B2DD6" # DOWN => blueish
} else {
colorA <- "#E69F00" # A => yellow
colorB <- "#999999" # B => gray
}
#### Now for the TAGGIT plot ####
ggplot2::ggplot(data=TFcluster.long, ggplot2::aes(x=Family, y=value, fill=factor(Comparison))) +
ggplot2::geom_bar(position='dodge',stat='identity', width=0.8) +
#scale_y_continuous(labels = percent_format()) +
ggplot2::scale_y_continuous() +
ggplot2::coord_flip() +
ggplot2::xlab("") + # Set x-axis label
ggplot2::ylab("Number of TFs") +
ggplot2::theme_bw() +
ggplot2::ggtitle(title) +
ggplot2::scale_x_discrete(breaks=combinedNames, labels=combinedNames) +
#theme(legend.position = c(.700, .250), legend.background = element_rect(fill = "transparent"), legend.text.align=0) +
ggplot2::theme(legend.position = c(.800, .900), legend.background = ggplot2::element_rect(fill = "transparent"), legend.text.align=0) +
ggplot2::labs(fill = NULL) +
# colorA and colorB are reversed because this is a horizontal plot
ggplot2::scale_fill_manual(breaks=c("A", "B"),values=c(colorB,colorA),labels = c(labA, labB))
# For publications exported EPS at 400x511 resolution (??)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.