phenotypePlot <-
function(d, max.y,max.x, suggestive.line, significant.line,
size.x.labels=9, size.y.labels=9, switch.axis=F, sort.by.value=F, sort.by.category.value=F,
#annotation
base.labels=F,
annotate.phenotype.description,
annotate.angle=0, annotate.size=5, annotate.level,
annotate.phenotype=F,
annotate.snp.w.phenotype=F,
annotate.snp=F, annotate.snp.angle=0,
annotate.list, annotate.only.largest=T,
#labels
lc.labels=F,
x.group.labels=T, x.phenotype.labels=F,
sizes=F, direction=F, point.size=3,
#plot characteristics
use.color=T,
color.palette,
title= paste0("Phenotype Plot ", date()),
x.axis.label="Phenotypes",
y.axis.label="Values",
y.axis.interval=5) {
#Check to ensure the input contains columns phenotype and value
if ( sum(c("phenotype","value") %in% names(d))<2 ) stop("Data input must contain columns phenotype and value.")
#Check for annotation information
if (!missing(annotate.phenotype.description)) {
#If it is a data frame with appropriate fields, set it up
if(is.data.frame(annotate.phenotype.description) && sum(c("phenotype","description") %in% names(annotate.phenotype.description))==2) {
#Add annotation
d=merge(d,annotate.phenotype.description)
annotate.phenotype.description=T
} else if(is.logical(annotate.phenotype.description)) {
#If there is a logical, check to see if it meets criteria
if(annotate.phenotype.description==T && !length(d$description)) {
stop("Annotate.phenotype.description must contain columns phenotype and description, or be TRUE with provided d$description.")
}
#Else do nothing, as it is ready.
} else {
#If it wasn't appropriate input, throw an error
stop("Annotate.phenotype.description must contain columns phenotype and description, or be TRUE with provided d$description.")
}
} else {
#If no description annotation was mentioned, set it to FALSE
annotate.phenotype.description=F
}
if ((annotate.snp || annotate.snp.w.phenotype) && !("snp" %in% names(d))) stop("You requested SNP annotation but d$snp is not defined.")
if(annotate.snp.w.phenotype && annotate.snp) warning("You requested SNP annotation twice")
#Is there any annotation?
annotate=annotate.phenotype.description || annotate.phenotype || annotate.snp || annotate.snp.w.phenotype
#If so, but nothing listing to annotate, warn and remove annotate flag
if(annotate && (missing(annotate.level) || is.na(annotate.level)) && missing(annotate.list)) {
warning("You requested annotation, but did not specify annotate.level or annotate.list.")
annotate=F
}
#Check for conflicting color commands
if(!use.color && !missing(color.palette)) stop("You requested no color, but provided a color palette.")
#Check for color
if(use.color) {
if(missing(color.palette)) {
if(!("color" %in% names(d))) stop("You requested color, but did not provide a color attribute in d or color.palette")
else if(class(d$color)=="factor") warning("The color attribute is a factor and no color palette is provided: R default color scheme will be used. Convert color to character if it contains color names or codes")
}
#Set up color if using a palette
if(!missing(color.palette) && !length(d$color)) {
if(length(d$groupnum)) d$color=d$groupnum
else stop("You requested use.color, but d$color or d$groupnum were not provided to distinguish groups.")
}
} else {
#Set the colors to all black if no color is requested
d$color="#000000"
}
#Check for point sizing/direction information if requested
if(sizes && !length(d$size)) stop("You requested size information, but did not provide d$size")
if(direction && !length(d$direction)) stop("You requested direction information, but did not provide d$direction")
#Set point size if sizes are not requested
if(!sizes) d$size=point.size
#Remove lc.labels flag if no annotations
if(!annotate.phenotype.description && !annotate.phenotype && !annotate.snp && !annotate.snp.w.phenotype) lc.labels=F
#One cannot sort by value and have x.group.labels
if (sort.by.value && x.group.labels) stop("One cannot sort universally by value and have x group labels. Try sort.by.category.value or not labeling the groups.")
#Check for group information if requested
if((sort.by.category.value || x.group.labels) && (sum(c("groupnum","group") %in% names(d))<2)) {
stop("Requested group information, but did not provide d$groupnum and d$group.")
}
#If no group info, just assign all to group 0
if(!("groupnum"%in%names(d))) {
d$groupnum=0
}
#Remove items with NA values or phenotypes
d=d[!(is.na(d$phenotype) | is.na(d$value)),]
#Sort by the phenotype
d=d[order(d$phenotype),]
#Set the maximum x value to fit all phenotypes if not specified
if(missing(max.x)) max.x = length(unique(d$phenotype))
#Create the list of phenotypes, finding the best values for each phenotype
phenotypes=aggregate(value ~ phenotype + groupnum, d,FUN=max)
#Remove the least significant phenotypes; only has an effect if max.x was specified.
phenotypes=phenotypes[order(phenotypes$value, decreasing=T),][1:min(nrow(phenotypes),max.x),]
#If the user requested sorting by values
if (sort.by.value) {
#Sort by values
phenotypes=phenotypes[order(phenotypes$value, decreasing=T),]
} else if (sort.by.category.value) {
#If the user requested soring by values within each group.
#Sort by group and then value
phenotypes=phenotypes[order(-phenotypes$groupnum,phenotypes$value, decreasing=T),]
} else {
#Restore the phenotype sorting order if no other sorting was requested
#TODO: What about non-numeric phenotypes? Need this to sort 008 etc.
phenotypes=phenotypes[order(phenotypes$groupnum,phenotypes$phenotype),]
}
phenotypes$seq = 1:nrow(phenotypes)
#Limit to phenotype and seq, as they are the only relevant columns
#Include value as min.value for annotation purposes
phenotypes=phenotypes[,c("phenotype","seq","value")]
names(phenotypes)[3]="min.value"
#Add sequence information
d=inner_join(phenotypes,d,by="phenotype")
d=d[order(d$seq),]
#Define the max y axis value if not provided
if(missing(max.y)) max.y=ceiling(max(d$value))
if(switch.axis) {
#Swap ordering
d=d[nrow(d):1,]
d$groupnum=max(d$groupnum)-d$groupnum+1
d$seq=max(d$seq)-d$seq+1
}
if (x.group.labels) {
labels= summarize(group_by(d, groupnum), tick=mean(unique(seq)),label=as.character(group[1]))
labels=labels[order(labels$tick),]
}
if(missing(color.palette)) {
color.palette = unique(d[order(d$seq),]$color)
names(color.palette)=color.palette
} else {
names(color.palette) = unique(d[order(d$seq),]$color)
}
#Check if we are using switch.axis
if(!switch.axis) {
#Generate the inital plot
plot=ggplot(d,ylab=y.axis.label,xlab=x.axis.label)
#Include lines for significance thresholds
if (!missing(suggestive.line) && !is.na(suggestive.line)) plot=plot+geom_hline(yintercept=suggestive.line,colour="blue", alpha=I(1/3),size=1)
if (!missing(significant.line) && !is.na(significant.line)) plot=plot+geom_hline(yintercept=significant.line,colour="red",alpha=I(1/3),size=1)
plot=plot+aes(seq,value,size=size,colour=color)
if(!sizes) plot=plot+scale_size(range=c(point.size,point.size),guide="none")
#Add points
plot=plot+geom_point()
#Color as defined
plot = plot + scale_colour_manual(values= color.palette, guide="none")
#If label the X axis with the groups if requested
if (x.group.labels) {
plot=plot+scale_x_continuous(name=x.axis.label, limits=c(1,max.x), breaks=labels$tick, labels=labels$label, expand=c(.01,0))
} else {
plot=plot+scale_x_continuous(name=x.axis.label, limits=c(1,max.x), breaks=c(-100), labels=c(""), expand=c(.015,0))
}
#Set the Y scale and labels
plot=plot+scale_y_continuous(y.axis.label, limits=c(0,max.y), breaks=seq(0,max.y,y.axis.interval), expand=c(0,.2))
#Set the default theme
plot=plot+theme(
panel.background=element_blank(),
panel.grid.minor=element_blank(),
axis.text.x=element_text(size=size.x.labels, colour="black", angle=-40, hjust=0, vjust=1),
axis.text.y=element_text(size=size.y.labels, colour="black"),
axis.line =element_line(colour="black"),
axis.ticks=element_line(colour="black")
)
} else {
####Generate plot with switch.axis
#Generate the inital plot
plot=ggplot(d,xlab=y.axis.label,ylab=x.axis.label)
#Include lines for significance thresholds
if (!missing(suggestive.line) && !is.na(suggestive.line)) plot=plot+geom_vline(xintercept=suggestive.line,colour="blue", alpha=I(1/3),size=1)
if (!missing(significant.line) && !is.na(significant.line)) plot=plot+geom_vline(xintercept=significant.line,colour="red",alpha=I(1/3),size=1)
plot=plot+aes(value,seq,size=size,colour=color)
if(!sizes) plot=plot+scale_size(range=c(point.size,point.size),guide="none")
#Add points
plot=plot+geom_point()
#Color as defined
plot = plot + scale_colour_manual(values= color.palette, guide="none")
#If label the Y axis with the groups if requested
if (x.group.labels) {
plot=plot+scale_y_continuous(name=x.axis.label, limits=c(1,max.x), breaks=labels$tick, labels=labels$label, expand=c(.015,.02))
} else {
plot=plot+scale_y_continuous(name=x.axis.label, limits=c(0,max.x), breaks=c(-100), labels=c(""), expand=c(.015,0))
}
#Set the Y scale and labels
plot=plot+scale_x_continuous(y.axis.label, limits=c(0,max.y), breaks=seq(0,max.y,y.axis.interval), expand=c(0,.2))
#Set the default theme
plot=plot+theme(
panel.background=element_blank(),
panel.grid.minor=element_blank(),
axis.text.y=element_text(size=size.x.labels, colour="black", hjust=1, vjust=.5),
axis.text.x=element_text(size=size.y.labels, colour="black", hjust=.5, vjust=0),
axis.line =element_line(colour="black"),
axis.ticks=element_line(colour="black")
)
}
#Hide the legend by default
plot = plot+theme(legend.position = "none")
#Add OR information
if(sizes){
plot= suppressWarnings(plot + theme(legend.position="right") +
scale_size("Size", range = c(point.size, 2*point.size)))
}
if(direction){
plot=plot+aes(shape = factor(direction), fill=color) +
scale_shape("Direction", solid = TRUE) + scale_shape_manual(values=c(25,24)) +
scale_fill_manual(values= color.palette, guide="none")
}
#If annotation present, start the definitions, otherwise skip it
if (annotate) {
d$annotate=F
#If provided with a list of phenotypes to annotate, select those.
if(!missing(annotate.list)) d[d$phenotype %in% annotate.list, ]$annotate=T
#Include those above the given threshold
if((!missing(annotate.level) && !is.na(annotate.level)) && (sum(d$value>=annotate.level)>0)) d[d$value>=annotate.level, ]$annotate=T
#Select only the largest value for each phenotype to label, unless requested otherwise
if(sum(d$value!=d$min.value)>0 && annotate.only.largest) d[d$value!=d$min.value,]$annotate=F
#If no base descriptions were included, add empty ones
if(!annotate.phenotype.description) {
d$description=""
}
#If requested the phenotype annotation, add it
if(annotate.phenotype) d$description = paste(d$phenotype, ifelse(d$description=="","",paste(":",d$description)),sep="")
#Add snp annotation to the phenotype label
if (annotate.snp.w.phenotype) d$description = paste(d$snp, ifelse(d$description=="","",paste(":",d$description)),sep="")
#Cap annotation length
d$description = substr(d$description,1,60)
#Add leading space
d$description = paste0(" ",d$description)
#If lower case labels are requested, lower case them.
if(lc.labels) d$description=tolower(d$description)
if(sum(d$annotate)==0) {
warning("Annotation requested, but no points met criteria")
} else {
#Add annotations
if(annotate.phenotype.description || annotate.phenotype || annotate.snp.w.phenotype) {
plot = plot + if(!base.labels) { ggrepel::geom_text_repel(aes(label=description),colour="black",data=d[d$annotate,],size=annotate.size,angle=annotate.angle)
} else {geom_text(aes(label=description),colour="black",data=d[d$annotate,],hjust=0,size=annotate.size,angle=annotate.angle)}
}
#Add SNP annotations
if(annotate.snp) {
plot = plot + if(!base.labels) {ggrepel::geom_text_repel(aes(label=snp),colour="black",data=d[d$annotate,],size=annotate.size,angle=annotate.snp.angle)
} else {geom_text(aes(label=snp),colour="black",data=d[d$annotate,],hjust=0,size=annotate.size,angle=annotate.snp.angle)}
}
}
}
#Add the title
plot=plot+labs(title=title) + theme(title=element_text(size=12))
plot
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.