Nothing
## LD.heat.map -- Steven J. Mack April 6, 2020 v1.0
## Displays a heatmap of the matrix for phased and unphased LD values returned by LDWrap()
#' Generates heat-maps for four linkage disequilibrium (LD) values ({D'}, {Wn}, {WLoc1/Loc2} and {WLoc2/Loc1}) generated for all pairs of phased and unphased two-locus haplotypes by LDWrap().
#'
#' This function accepts *_LD_results.csv files generated by LDWrap() as input, and generates a PNG-formatted heat-map plot file for each LD measure.
#'
#' @param dataName The "base" name of the _LD_result.csv files generated by LDWrap() without the "_Phased_LD_results.csv" or "_Unphased_LD_results.csv" suffixes. See Examples, below. If both corresponding "<dataName>_Phased_LD_results.csv" and "<dataName>_Unphased_LD_results.csv" files are not found, the funciton will halt with a notification. However, if only one of those files is found in the working director, half-matrix heat map plots will be generated.
#' @param phasedData The complete name of a file of phased LD results generated by LDWrap(). Provide this filename if no base name is provided for dataName and you want to generate heat-maps for a specific set of phased LD values.
#' @param unphasedData The complete name of a file of unphased LD results generated by LDWrap(). Provide this filename if no base name is provided for dataName and you want to generate heat-maps for a specific set of unphased LD values.
#' @param phasedLabel The label that should appear on the heat-map plots for the upper, phased half of the plot. The default option is 'Phased'.
#' @param unphasedLabel The label that should appear on the heat-map plots for the lower, unphased half of the plot. The default option is 'EM-estimated'.
#' @param color A logical parameter that identifies if the heat-maps should be plotted in color (TRUE) or greyscale (FALSE). The default option is TRUE.
#' @param writePlot A logical parameter that identifies if the heat-map plots should be automatically saved after they are generated. The default is 'writePlot=FALSE'.
#' @param writeDir The directory into which the heat-map plots should be saved when 'writePlot=TRUE'. The default is the directory specified by tempdir().
#' @keywords heat map linkage disequilibrium
#' @importFrom utils read.csv
#' @importFrom ggplot2 ggsave ggplot geom_tile geom_text scale_fill_gradient2 theme_minimal theme guides xlab ggtitle coord_fixed aes element_text element_blank guide_colorbar
#' @importFrom reshape2 melt
#' @keywords linkage disequilibrium heat map heat-map heatmap
#' @export
#' @examples
#' # Using the results of LDWrap() for the first 10 rows of the drb1.dqb1.demo dataset.
#' # Results are saved in the temporary directory as
#' # "hla-family-data_Phased_LD_results.csv" and
#' # "hla-family-data_Unphased_LD_results.csv", respectively.
#' LDWrap(drb1.dqb1.demo[1:10,])
#' LDWrap(drb1.dqb1.demo[1:10,],phased=FALSE)
#' exampleData <- paste(tempdir(),"hla-family-data",sep=.Platform$file.sep)
#' LD.heat.map(exampleData)
#' # Alternatively, these files can be sepcified individually to generate a half-matrix.
#' LD.heat.map(phasedData=paste(exampleData,"_Phased_LD_results.csv",sep=""),unphasedLabel="")
#' # Further, two different sets of results for the same loci can be plotted; e.g., using
#' # phasedData="my_Phased_LD_results.csv" and unphasedData="your_Phased_LD_results.csv".
#' @references Osoegawa et al. Hum Immunol. 2019;80(9):644 https://doi.org/10.1016/j.humimm.2019.05.018
LD.heat.map <- function(dataName="",phasedData="", unphasedData="",phasedLabel="Phased",unphasedLabel="EM-estimated", color=TRUE, writePlot=FALSE, writeDir=tempdir()) {
Var1 <- Var2 <- value <- NULL
## Importing LDWrap() output data and processing to populate matrices
dTop <- dBot <- TRUE
phaseDataRaw <- unphaseDataRaw <- ""
if(dataName!=""){
if(file.exists(paste(dataName,"_Phased_LD_results.csv",sep=""))) { phaseDataRaw <- read.csv(file=paste(dataName,"_Phased_LD_results.csv",sep=""),header = TRUE,stringsAsFactors = FALSE)}
if(file.exists(paste(dataName,"_Unphased_LD_results.csv",sep=""))) { unphaseDataRaw <- read.csv(file=paste(dataName,"_Unphased_LD_results.csv",sep=""),header=TRUE,stringsAsFactors = FALSE)}
# At lease one file of phased or unphased data must be available
} else {
if(file.exists(phasedData)) { phaseDataRaw <- read.csv(file=phasedData,header = TRUE,stringsAsFactors = FALSE)}
if(file.exists(unphasedData)) { unphaseDataRaw <- read.csv(file=unphasedData,header = TRUE,stringsAsFactors = FALSE)}
}
if(length(phaseDataRaw) == 1) {dTop <- FALSE}
if(length(unphaseDataRaw) == 1) {dBot <- FALSE}
if(!dTop && !dBot) {
if(dataName!="") { warning(paste("Files ",dataName,"_Phased_LD_results.csv"," and ",dataName,"_Unphased_LD_results.csv"," are not found.",sep=""))
} else {warning(paste("The specified phased and unphased datafiles are not found.",sep=""))}
} else {
# Some LDWrap() output files will contain monomorphic positions, and # subects < threshold for specific locus-combinations; these wil result in NA values in column 6.
if(dTop) { phaseDataRaw <- phaseDataRaw[!is.na(phaseDataRaw$N_Haplotypes),]} ## Be sure to check to see if dTop and bTop are True
if(dBot) { unphaseDataRaw <- unphaseDataRaw[!is.na(unphaseDataRaw$N_Haplotypes),] }
# Assuming both phased and unphased files are the same, use whichever you have and extract the locus names
locNames <- unique(as.vector(unlist(strsplit(unlist(ifelse(dTop,phaseDataRaw,unphaseDataRaw)),"~",TRUE))))
plotDo <- TRUE # With support for two distinct files, need to check to make sure the loci are the same
if(dTop && dBot && dataName =="") {
locNames2 <- unique(as.vector(unlist(strsplit(unlist(unphaseDataRaw[[1]]),"~",TRUE))))
if(length(locNames) != length(locNames2)) {
plotDo <- FALSE } else {
if(TRUE %in% (locNames != locNames2)) { plotDo <- FALSE #v0.5 correct evaluation of locus match
}
}
if(!plotDo) {warning(paste("Different loci included in ", phasedData, " and ", unphasedData,". Cannot produce heatmaps.", sep=""))}
}
if(plotDo) {
measureNames <- c("D'","Wn","Wa/b","Wb/a")
# building full matrixes for each LD measure
matrices <- list()
for(i in 1:4) {
# making 1 matrix for each LD measure
matrices[[i]] <- matrix(NA,length(locNames),length(locNames),FALSE,list(locNames,locNames))
# if rows without LD data had to be eliminated, the data columns have to be converted to numeric
if(dTop) {phaseDataRaw[[i+1]] <- as.numeric(phaseDataRaw[[i+1]])}
if(dBot) {unphaseDataRaw[[i+1]] <- as.numeric(unphaseDataRaw[[i+1]])}
for(j in 1:(length(locNames)-1)){
for(k in (j+1):length(locNames)) {
### v0.6 Some datasets don't have LD values for all locus pairs.
### This resulted in this match returnig a value of 'numeric(0)', which had length of 0. NA has a length of 1.
if(dTop) { matrices[[i]][j,k] <- phaseDataRaw[match(paste(locNames[j],locNames[k],sep="~"),phaseDataRaw$Loc1.Loc2,NA_integer_),i+1] }
if(dBot) { matrices[[i]][k,j] <- unphaseDataRaw[match(paste(locNames[j],locNames[k],sep="~"),unphaseDataRaw$Loc1.Loc2,NA_integer_),i+1] }
}
}
## Building and saving the plots
#ggplot2
currTest <- paste(measureNames[i],"value",sep="\n")
melted <- melt(matrices[[i]],na.rm=TRUE)
minCol <- maxCol <- midCol <- ""
if(color) {
minCol <- "blue"
midCol <- "white"
maxCol <- "red"
} else {
minCol <- "grey15"
midCol <- "grey53"
maxCol <- "grey91"
}
heatPlot <- ggplot(data=melted,aes(x=Var1,y=Var2,fill=value)) +
geom_tile(color="white") +
geom_text(aes(Var1,Var2,label=formatC(value,2,format="f")), color="black", size=4) +
scale_fill_gradient2(low=minCol,high=maxCol,mid=midCol,
midpoint=0.5,limit=c(0,1),space="Lab",name=currTest) +
theme_minimal() +
theme(axis.text.x = element_text(size=12), axis.text.y = element_text(size=12),
axis.title.x = element_text(size=12,vjust = -2), axis.title.y = element_blank(),
panel.grid.major = element_blank(), panel.border = element_blank(),
panel.background = element_blank(), axis.ticks = element_blank(),
legend.justification = c(1,0),legend.direction = "vertical", legend.position="right",
plot.title=element_text(size=12,hjust=0.5),axis.title=element_text(size=12)) +
guides(fill=guide_colorbar(barwidth=1,barheight = 7,
title.position = "top",title.hjust = 0.5)) +
xlab(label=unphasedLabel) +
ggtitle(phasedLabel) +
coord_fixed()
plot(heatPlot)
if(writePlot){
# If no dataName was specified combine the names of the specified phased and unphased files
if(dataName=="") {dataName <- paste(gsub(".csv","",phasedData,fixed=TRUE),gsub(".csv","",unphasedData,fixed=TRUE),sep="-")}
# Save each plot as it is defined
ggsave(paste(writeDir,paste(basename(dataName),gsub("/","-",measureNames[i],fixed=TRUE),"heatmap.png",sep="_"),sep=.Platform$file.sep),heatPlot,width=7.78,height = 7.39)
}
}
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.