R/plotEPG.R

#' @title plotEPG
#' @author Oyvind Bleka <Oyvind.Bleka.at.fhi.no>
#' @description EPG plotter created by Oskar Hansson.
#' @details Plots peak height with corresponding allele for one sample for a given kit.
#' @param Data List of adata- and hdata-elements.
#' @param kit name of kit: {"ESX17","ESI17","ESI17Fast","ESX17Fast","Y23","Identifiler","NGM","ESSPlex","ESSplexSE","NGMSElect","SGMPlus","ESX16", "Fusion","GlobalFiler"}
#' @param sname Sample name label.
#' @param threshT The detection threshold can be shown in gray in the plot.
#' @param refcond condition on a list$refname$locname$adata of reference alleles which are labeled in EPG
#' @export
plotEPG <- function(Data,kitname,sname="",threshT=0,refcond=NULL) {
 #Data is list with allele and height data. Only one sample!
 #for selected sample:

 generateEPG<-function(typingKit, alleleList, peakHeightList, locusVector, sampleName, refLabelList=NULL, refnames=NULL, dyeVector=NULL,  offsetVector=NULL,repeatUnitVector=NULL,  drawBoxPlots=TRUE, drawPeaks=TRUE){

	# This function generates an electropherogram (EPG) like plot from lists of allele names and peak heights.
	# Default values are used if no typing kit is specified.
	# If a typing kit is specified some or all of its properties can be overriden.

	# INPUT PARAMETERS:

	# alleleList - a list of alleles names.
	# peakHeightList - a list of peak heights.
	# dyeVector - a vector specifying the color of markers.
	# locusVector - a vector specifying the marker names.
	# offsetVector - a vector specifying the marker start offsets in base pairs.
	# repeatUnitVector - a vector specifying the repeat unit size in base pairs.
	# sampleName - title on the EPG.
	# refLabelList - a list of reference-index
	# refnames - a vector with reference names
 	# typingKit - kit short name or index number as specified in the function 'getKit'.

	# NB! If 'locusVector', 'dyeVector', 'offsetVector' or 'repeatUnitVector' is provided
	# they override the information in a matching 'typingKit' IF they are of the same lenght.


	# FLAGS:

	kitFound <- FALSE


	# CONSTANTS:

	# Default sample name / epg header:
	defSampleName <- "EPG"

	# Default repeat unit in base pairs:
	defaultRepeatUnit <- 4

	# Default locus name (will be suffixed with a number):
	defaultLocusName <- "Locus "

	# Default marker spacing in base pairs:
	defaultMarkerSpacing <- 100

	# Available letter abreviations for colors:
	colorLetters <- c("X", "R", "B", "G", "Y")

	# Numeric values corresponding to color abreviations:
	# NB! There are 8 colors that can be represented by a single number character, palette():
	# 1="black", 2="red", 3="green3", 4="blue", 5="cyan", 6="magenta", 7="yellow", 8="gray" 
	colorNumbers <- c(1, 2, 4, 3, 7)

	# GRAPH CONSTANTS:
	
	# Graph title:
	# sampleName is used as title.
	
	# X axis label:
#	xlabel <- "base pair"
	xlabel <- "bp"

	# Y axis label:
	ylabel <- "peak height (rfu)"	

	# Peak half width in base pair (width of peak = 2 * peakHalfWidth):
	peakHalfWidth <- 0.6

	# Relative Allele name text size:	
	alleleNameTxtSize <- 0.8

	# Distance between the highest peak in a plot and the plot border (1.04 = 4% margin).
	yMarginTop <- 1.04


	# VERIFICATIONS
	# REQUIRED PARAMETERS:	

	# Check if allele name list is provided.
	if(is.null(alleleList) || !is.list(alleleList))	{

		stop("'alleleList' must be a list giving the alleles per marker/locus")
	}

	# Check if peak height list is provided.
	if(is.null(peakHeightList) || !is.list(peakHeightList)) {

		stop("'peakHeightList' must be a list giving the peak heights per marker/locus")
	}

	# VERIFICATIONS
	# OPTIONAL PARAMETERS:	

	# Check if sample name is provided.
	if (is.null(sampleName)) {
		sampleName <- defSampleName
	}

	# Check if kit name is provided.
	if (!is.null(typingKit)){

		# Get kit information.
		kit <- getKit(typingKit)
	
		# Check if kit was found.	
		if (is.na(kit)[1]) {
			print("Kit not found.")
			print("Using default values.")
			kitFound <- FALSE
			
		} else { # Kit found. Save information in vectors.
		
			kitFound <- TRUE

                  unittab <- getKit(typingKit, what="offset")
                  rangetab <- getKit(typingKit, what="range") 
#cbind(dyetab,unittab) 

			# Marker/locus names.
			locusVectorKit <- rangetab[,1]

			# Dye for each marker/locus
			dyeVectorKit <- rangetab[,2] 
                  dyeVectorKit <- toupper(substr(dyeVectorKit, 1, 1)) #Convert!

			#Base pair start offset for each marker.
			offsetVectorKit <- unittab[,2]

			#Size in base pair of repeating unit for each marker.
			repeatUnitVectorKit <- unittab[,3]

                  #Range of markers:
                  locusMinVectorKit <- rangetab[,3]
                  locusMaxVectorKit <- rangetab[,4]

		}
	}

	# If kit is specified...
	if (kitFound){
		# ... check length of allele list.
		numberOfAlleles <- length(alleleList)
		numberOfAllelesKit <- length(locusVectorKit)
		if (numberOfAlleles != numberOfAllelesKit) {
			numberOfAlleles <- numberOfAlleles + 1
			# Extend with missing alleles (add NAs)
			for (index in numberOfAlleles:numberOfAllelesKit) {
				alleleList[[index]] <- c(NA)
			}
		}
		# ... and length of peak height list.
		numberOfPeaks <- length(peakHeightList)
		numberOfAllelesKit <- length(locusVectorKit)
		if (numberOfPeaks != numberOfAllelesKit) {
			numberOfPeaks <- numberOfPeaks + 1
			# Extend with missing peaks (add NAs)
			for (index in numberOfPeaks:numberOfAllelesKit) {
				peakHeightList[[index]] <- c(NA)
			}
		}
	}

	# Check dye vector.
	if (is.null(dyeVector)) {
		if (kitFound) {
			dyeVector <- dyeVectorKit
		} else {
			# No dye vector is specified. Use default color.
			dyeVector <- rep("X", length(alleleList))
		}
	} else {
		if (kitFound) {
			# Use kit specifications if incorrect length.
			if (length(dyeVector) != length(dyeVectorKit)) {
				print("'dyeVector' is not of the same length as kit specifications.")
				print("Kit specifications will be used.")
				dyeVector <- dyeVectorKit
			} else {
				# Override kit specifications with provided values.
			}
		}
	}

	# Check locus vector.
	if (is.null(locusVector)) {
		if (kitFound) {
			locusVector <- locusVectorKit
		} else {
			# No locus vector is specified. Use default name + number suffix.
			locusVector <- paste(rep(defaultLocusName, length(alleleList)), seq(1:length(alleleList)))
		}
	} else {
		if (kitFound) {
			# Use kit specifications if incorrect length.
			if (length(locusVector) != length(locusVectorKit)) {
				print("'locusVector' is not of the same length as kit specifications.")
				print("Kit specifications will be used.")
				locusVector <- locusVectorKit
			} else {
				# Override kit specifications with provided values.
			}
		}
	}
	# Check offset vector.
	if (is.null(offsetVector)) {
		if (kitFound) {
			offsetVector <- offsetVectorKit
		} else {
			# No offset vector is specified. Use default marker spacing.
			offsetVector <- seq(0, length(alleleList) * defaultMarkerSpacing, by = defaultMarkerSpacing)
		}
	} else {
		if (kitFound) {
			# Use kit specifications if incorrect length.
			if (length(offsetVector) != length(offsetVectorKit)) {
				print("'offsetVector' is not of the same length as kit specifications.")
				print("Kit specifications will be used.")
				offsetVector <- offsetVectorKit
			} else {
				# Override kit specifications with provided values.
			}
		}
	}
	# Check repeatUnit vector.
	if (is.null(repeatUnitVector)) {
		if (kitFound) {
			repeatUnitVector <- repeatUnitVectorKit
		} else {
			# No repeatUnit vector is specified. Use default repeat unit length.
			repeatUnitVector <- rep(defaultRepeatUnit, length(alleleList))
		}
	} else {
		if (kitFound) {
			# Use kit specifications if incorrect length.
			if (length(repeatUnitVector) != length(repeatUnitVector)) {
				print("'repeatUnitVector' is not of the same length as kit specifications.")
				print("Kit specifications will be used.")
				repeatUnitVector <- repeatUnitVectorKit
			} else {
				# Override kit specifications with provided values.
			}
		}
	}


	# PREPARE PARAMETERS

	# Convert dye vector to numeric vector.
	colorVector <- colorNumbers[match(dyeVector, colorLetters)]
	
	# Get number of colors.
#	noColors <- nlevels(factor(dyeVector))  #removed ŘB

	# Get colors
#	colors <- levels(factor(dyeVector))#removed ŘB
#	colors <- colorNumbers[match(colors, colorLetters)]#removed ŘB
	colors <- unique(colorVector)  #added ŘB
      noColors <-  length(colors) #added ŘB



	# INITIATE VARIABLES

	# Create lists:
	basePairList <- list()
	phListByColor <- list()
	allelesByColorList <- list()
	
      markerByColorList <- list()
      bpmarkerByColorList <- list()
      refLabelByColorList <- list()
      isLab <- length(refnames)>0 #boolean have labels

	# SORT DATA ACCORDING TO COLOR CHANNEL and
	# CONVERT ALLELE NAMES TO FRAGMENT LENGTH IN BASE PAIRS

	# Loop over all color channels.
	for (color in 1:noColors){
            basePairTmpLst <- list()
		# Boolean vector indicating selected markers (same color).
 		selectedMarkers <- colorVector == colors[color]

            if (kitFound) {
             markerByColorList[[color]] <- locusVectorKit[selectedMarkers] #EDIT by ŘB: marker names
		} else {
             markerByColorList[[color]] <- locusVector #EDIT by ŘB: marker names
            }

		# Extract all alleles in the same color channel.
		allelesByColor <- alleleList[selectedMarkers]

		# Extract all peak heights in the same color channel.
		peakHeightsByColor <- peakHeightList[selectedMarkers]

		# Extract all marker offsets in the same color channel.
		offsetByColor <- offsetVector[selectedMarkers]

		# Extract all repeat unit sizes in the same color channel.
		repeatUnitByColor <- repeatUnitVector[selectedMarkers]

            refLabelByColor <- refLabelList[selectedMarkers] #added ŘB

		# Loop over all markers in that color channel.
		for (marker in 1:length(allelesByColor)){

			# Get alleles for the current marker.
			alleleValue <- allelesByColor[[marker]]
			# Check presence of X/Y.
			indexOfXY <- grep("[X,x,Y,y]", alleleValue)
			if (length(indexOfXY)) {
				alleleValue <- toupper(alleleValue)
				# Use 1 and 2 for X and Y.
				alleleValue <- sub("X", 1, alleleValue)
				alleleValue <- sub("Y", 2, alleleValue)
				alleleValue <- as.numeric(alleleValue)
			} else { #added ŘB
			 alleleValue <- as.numeric(alleleValue) #added ŘB
                  } #added ŘB

			# Convert all allele names in current marker to base pairs.
			basePairTmpLst[[marker]] <- offsetByColor[marker] + floor(alleleValue) * repeatUnitByColor[marker] + (alleleValue %% 1) * 10
		}
            bpmarkerByColorList[[color]] <- sapply(basePairTmpLst,function(x) x[1])
           
		# Add basepair to list.
		for(row in 1:length(allelesByColor)){

			# Avoid 'subscript out of bounds' error.
			if (length(basePairList)<color){
				basePairList[[color]] <- basePairTmpLst[[row]]
				phListByColor[[color]] <- peakHeightsByColor[[row]]
				allelesByColorList[[color]] <- allelesByColor[[row]]
                        if(isLab) refLabelByColorList[[color]] <- refLabelByColor[[row]]
			} else {
				basePairList[[color]] <- c(basePairList[[color]], basePairTmpLst[[row]])
				phListByColor[[color]] <- c(phListByColor[[color]], peakHeightsByColor[[row]])
				allelesByColorList[[color]] <- c(allelesByColorList[[color]], allelesByColor[[row]])
 				if(isLab) refLabelByColorList[[color]] <- c(refLabelByColorList[[color]], refLabelByColor[[row]])

			}
		}
	}


	# CREATE GRAPH

	# Set up the plot window according to the number of color channels.
	par(mfrow = c(noColors, 1))

	# Make x axis cross at y = 0 (i.e. remove the 4% margin at both ends of the y axis).
	par(yaxs = "i")

	# Reduce the spacing between the plots.
	# c(bottom, left, top, right) is the number of lines of margin to be specified on the four sides of the plot.
	# The default is c(5, 4, 4, 2) + 0.1
	par(mar = c(2.3, 4, 1.5, 1) + 0.1)

	# Define lower and upper bound for the x axis.
	xMin <- .Machine$integer.max 
	xMax <- 0
	for (row in 1:length(basePairList)){

#print("basePairList[[row]]")
#print(basePairList[[row]])

		xChannelMin <- sapply(na.omit(basePairList[[row]]),min)
		if (length(xChannelMin) > 0) {
			xChannelMin <- min(xChannelMin)
			xVal <- is.finite(xChannelMin)
		} else {
			xVal <- FALSE
		}
		if (xMin > xChannelMin && xVal == TRUE) {
			xMin <- xChannelMin
		}
		xChannelMax <- sapply(na.omit(basePairList[[row]]),max)
		if (length(xChannelMax) > 0) {
			xChannelMax <- max(xChannelMax)
			xVal <- is.finite(xChannelMax)
		} else {
			xVal <- FALSE
		}
		if (xMax < xChannelMax && xVal == TRUE) {
			xMax <- xChannelMax
		}
	}

	#Loop over all color channels.
	for (color in 1:noColors){

		# Get alleles and peak heights for current marker.
		bpVector <- basePairList[[color]]
		phList <- phListByColor[[color]]

		# Create blank plot with axes.
#		yMax <- sapply(na.omit(phList),max)
		yMax <- sapply(phList[!is.na(phList)],max)

		noData <- FALSE
		if (length(yMax) == 0) {
#print("length yMax == 0")
			yMax <- 1 # Minimal height.
			noData <- TRUE
		}
		yMax <- max(yMax)
#		yMax <- sapply(na.omit(yMax),max)

#		noData <- FALSE
		if (is.infinite(yMax) || is.na(yMax)) {
			yMax <- 1 # Minimal height.
			noData <- TRUE
		}
		plot(c(xMin, xMax), c(0, yMax), type="n", ylim = c(0, yMax * yMarginTop), ann = FALSE,axes=FALSE)
            axis(side = 2)
            nl <- 25 #number of ticks
            axis(side = 1,at=seq(xMin,xMax,l=nl),label=rep("",nl))
            if(isLab && color==1) legend("topright",legend=paste0("Label ",1:length(refnames)," = ",refnames),bty="n")
            abline(h=0)
            if(threshT>0) abline(h=threshT,col="gray",lwd=0.5)

		# Write text if no data.
		if (noData) {
			text(xMax / 1.4, yMax / 2, labels="No data", cex = 1.5)
		}

		# Create a title.
		if (color == 1) {
		title(main = sampleName, col.main = "red", font.main = 4)
		}	

		# Label the y axes.
		title(ylab = ylabel)

		# Label the x axis.
		# pos values of 1, 2, 3 and 4 indicate positions below, left, above and right of the coordinate.
#		mtext(paste(xlabel), side = 1, line = 2, adj = 0, cex = alleleNameTxtSize)
		mtext(paste(xlabel), side = 1, line = 0, adj = 0, cex = alleleNameTxtSize)

		# Write allele names under the alleles.
		# The additional par(xpd=TRUE) makes it possible to write text outside of the plot region.
		text(bpVector, 0, labels = allelesByColorList[[color]], cex = alleleNameTxtSize, pos = 1, xpd = TRUE) 
            if(isLab) text(bpVector,-yMax/20,labels=refLabelByColorList[[color]],cex = alleleNameTxtSize,pos=1, xpd = TRUE)

#           text(bpmarkerByColorList[[color]], yMax + yMax*0.02 ,expression(bold(paste0(markerByColorList[[color]]))),cex=alleleNameTxtSize,font=1)
            text(bpmarkerByColorList[[color]], yMax + yMax*0.04 ,markerByColorList[[color]],cex=1,font=2,xpd = TRUE)

		# Loop over all peaks.
		for (peak in 1:length(bpVector)){

			# Check if boxplot is to be drawn. Do not dra if only one value.
			if (drawBoxPlots && length(phList[[peak]]) > 1) {
			# Draw boxplots showing the distribution of peak heights.
			boxplot(phList[peak], add=TRUE, at=bpVector[peak],
				border=colors[color],pars=list(boxwex=peakHalfWidth*20, axes=FALSE))
			}


			if (drawPeaks) {
			# Create corners of peak triangle.
			xCords <- c(bpVector[peak] - peakHalfWidth, bpVector[peak], bpVector[peak] + peakHalfWidth)
			yCords <- c(0, lapply(phList[peak],mean), 0)
			# Plot peaks as filled polygons.
			polygon(xCords, yCords, col = colors[color])
			}
		}
	}
 dev.new()
 op <- par(no.readonly = TRUE)
 dev.off()
 par(op)
} #end plot function

 #START FUNCTION
 alength <- sapply(Data,function(x) length(x$adata))
 hlength <- sapply(Data,function(x) length(x$hdata))
 locs <- names(Data)

 if( all(alength==0) ) {
  gmessage(message="There is no allele data in sample!",title="Error",icon="error")
 } else {

   #fix order prior:
   kit <- getKit(kitname)
   adata <- lapply(Data,function(x) x$adata)
   hdata <- lapply(Data,function(x) x$hdata)
   cdata <- list() #used to label conditioned references
   for(loc in locs) { #impute missing heights with 1
    lA <- length(adata[[loc]])
    if(length(hdata[[loc]])==0 && length(lA>0)) hdata[[loc]] <- rep(1,lA) #impute with height 1

    #reference data:
    if(!is.null(refcond)) {
     rdata <- lapply(refcond,function(x) unlist(x[[loc]]))
     unrdata <- unique(unlist(rdata))
     newA <- unrdata[!unrdata%in%adata[[loc]]] #get new alleles not seen in data
     nA <- length(newA)
     if(nA>0) {
      adata[[loc]] <- c(adata[[loc]], newA)
      hdata[[loc]] <- c(hdata[[loc]], rep(1e-6,nA)) #insert peak heights
     }
     cdata[[loc]] <- rep("",length(adata[[loc]]))
     for(rn in names(refcond)) {
       ri <- which(names(refcond)==rn) #reference index
       ind <- adata[[loc]]%in%rdata[[rn]]
       fix <- nchar(cdata[[loc]][ind])>0 
       cdata[[loc]][ind][fix] <- paste0(cdata[[loc]][ind][fix],"/")
       cdata[[loc]][ind] <- paste0(cdata[[loc]][ind],ri)
     }
    }
   } #end for each loci

   if (!is.na(kit[[1]][1])) { #the kit was found.
      kitlocs <- toupper(getKit(kitname,what="marker")) #make uppercase
      sname <- paste0(kitname," - ",sname)
      AMELname <- kitlocs[grep("AM",kitlocs,fixed=TRUE)] #get amel-name
      if(!is.null(AMELname)) {
       AMind <- grep("AM",toupper(locs),fixed=TRUE)
       locs[AMind] <- AMELname
       names(adata)[AMind] <- AMELname
       names(hdata)[AMind] <- AMELname
      }
      #Insert missing loci:
      missloc <- kitlocs[!kitlocs%in%locs] #get missing loci
      for(loc in missloc) {
       	adata[loc] = "" 
            hdata[loc] = 1e-6 #default is binary signal (below detection threshold)
            cdata[loc] = ""
      }
	adata <- adata[kitlocs] 
	hdata <- hdata[kitlocs] 
	cdata <- cdata[kitlocs] 
   } else {
    kitlocs <- locs
   }
   generateEPG(typingKit=kitname,alleleList=adata,peakHeightList=hdata, locusVector=kitlocs,sampleName=sname, drawBoxPlots=FALSE, drawPeaks=TRUE,refLabelList=cdata,refnames=names(refcond))
 }
}

Try the gammadnamix package in your browser

Any scripts or data that you put into this service are public.

gammadnamix documentation built on May 2, 2019, 4:59 p.m.