Nothing
#' Display of a hbr object
#'
#' This function displays the summarized features of a hbr object
#'
#' @aliases show show.hbr
#' @author Susanne U. Franssen
#' @details This function displays the summarized features of a hbr object
#' @export
#' @importFrom methods show
#' @param object object of the class hbr data
#' @references Franssen et al. 2016, \href{http://mbe.oxfordjournals.org/content/early/2016/10/03/molbev.msw210.abstract}{Reconstruction of haplotype-blocks
#' selected during experimental evolution}, \href{http://mbe.oxfordjournals.org/}{MBE}
#' @seealso \code{\link{summary.hbr}} \code{\link{ex_dat}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#'
setMethod("show","hbr",
function(object) {
cat("Object of class",class(object),"\n\n")
cat("Information on used time series data:\n")
cat("Number of populations included:",length(unique(object@dat@pop.ident)),"\n")
cat("Number of libraries included:",length(object@dat@pop.ident),"\n")
cat("Number of libraries used for filtering:",sum(object@dat@use.libs),"\n")
cat("Data has been filtered for SNPs with:\n"
,"\t allele frequencies >=",object@dat@min.minor.freq," in the founder population\n"
,"\t allele frequencies <=",object@dat@max.minor.freq," in the founder population\n"
,"\t a frequency increase of",object@dat@minfreqchange
," in at least ",object@dat@minrepl,"replicates\n")
cat("Number of SNPs after filtering:",nrow(object@dat@col.info),"\n")
cat("Window definition:\n")
cat("\t distance measure used:",object@dat@win.scale,"\n")
cat("\t applied windowsize:",object@dat@winsize,"\n")
cat("Parameters used for haplotype-block reconstruction:\n")
cat("Chromosome",object@chromosome,"\n")
cat("Parameters used for each sliding window:\n"
,"\t a minimum cluster size of ", object@min.cl.size,"\n"
,"\t an average correlation within a cluster of ", object@min.cl.cor,"\n")
cat("Parameters for cluster elongation across windows:\n"
,"\t a minimum number of ",object@min.inter,"intersecting markers\n\n")
cat("Summary of results:\n")
cat("Window summary:\n"
,"\t for ",sum(unlist(lapply(object@cl.chr,length))!=0)
," windows clusters were identified \n")
#cat(", with\n\t window_id #_cluster \n")
#print_wincl_hbr_info(object@cl.chr)
cat("Haplotype-block (hbr) summary:\n"
,"\t",length(object@cl.long.m)," haplotype-blocks (hbr) were reconstructed\n")
#cat(", with\n\t hbr_id #_marker \n")
#print_wincl_hbr_info(object@cl.long.m)
cat("\t hbrs (and markers) where included if they\n"
,"\t\t are only supported by a single window:"
,object@single.win)
})
#' Method to summarize information of reconstructed haplotype-blocks
#'
#' The method summarizes information of the reconstructed haplotype-blocks for a chromosome.
#'
#' @aliases summary.hbr
#' @author Susanne U. Franssen
#' @details The method operates on \code{\link{hbr}} objects and summarizes information of
#' reconstructed haplotype-blocks
#' @export
#' @param object object of class \code{\link{hbr}} containing the results of reconstructed haplotype
#' blocks to be summarized.
#' @param min.marker numeric specifying the minimum number of markers a haplotype-block to be
#' reported. NOTE: IDs of haplotype-blocks in the provided summary are identical to the
#' IDs of the previously identified blocks.
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#' @return data.table with row entries for each reconstructed haplotype-block and columns for:
#' chr (chromosome), id (numbering of selected blocks), n.marker (number of markers in the
#' reconstructed block), spos, epos (starting and end position of the block), len.pos (length
#' of the block in bp), MperMb (number of markers per Mb), swin, ewin (starting and end window of the block)
#'
setMethod("summary","hbr",
function(object, min.marker=1) {
mil=1000000
len=length(object@cl.long.m)
chr=rep(object@chromosome,len)
id=1:len
n.marker=unlist(lapply(object@cl.long.m,length)) # number of markers
spos=unlist(lapply(object@cl.long.m,min)) # start pos
epos=unlist(lapply(object@cl.long.m,max)) # end pos
swin=lapply( map(object), FUN = function(x) x[1]) # start window
ewin=lapply( map(object), FUN = function(x) x[length(x)]) # end window
len.pos=epos-spos+1 # lenght in bp
MperMb=round(n.marker/len.pos*mil,1) # marker per Mb
res=data.table(chr,id,n.marker,spos,epos,len.pos,MperMb)
res$id=as.integer(id)
res$n.marker=as.integer(n.marker)
res$spos=as.integer(spos)
res$epos=as.integer(epos)
res$swin=as.integer(swin)
res$ewin=as.integer(ewin)
res$len.pos=as.integer(len.pos)
res$MperMb=as.double(MperMb)
if (min.marker>1)
{
res=res[n.marker>=min.marker]
# update id
#res$id=1:nrow(res)
}
return(res)
})
#' Method to visualize reconstructed haplotype-blocks
#'
#' The method visualizes reconstructed haplotype-blocks for a chromosome.
#'
#' @aliases plot.hbr
#' @author Susanne U. Franssen
#' @details The method operates on \code{\link{hbr}} objects and visualizes location of
#' reconstructed haplotype-blocks with respect to its genomic position.
#' @export
#' @importFrom graphics segments abline
#' @importFrom utils combn
#' @param x object of class \code{hbr} containing the results of reconstructed haplotype
#' blocks for visualization.
#' @param min.marker numeric specifying the minimum number of markers a haplotype-block
#' should contain in order to be visualized.
#' @param xlab Label of the x-axis with the default value 'Genomic position [MB]'.
#' @param ylab Label of the y-axis with the default value 'Reconstructe haplotype-block'.
#' @param main Plot title (default: "Chromosome XX").
#' @param col Color of the lines representing the haplotype-blocks (default: "black").
#' @param lwd Line width of the lines representing the haplotype-blocks (default: 4).
#' @param hline Distance between horizontal lines plotted for orientation (default: 10).
#' @param ... arguments of the generic plot method.
#' @param indicate_shared logical value specifying if "spurious" markers that are
#' identical between pairs of haplotype-blocks should be indicated.
#' This function is usefull for inspecting results and deciding whether all
#' identified blocks are all independent or maybe reconstruction parameters should
#' be changed.
#' @param addPoints logical value indicating if for each reconstructed block markers
#' should additionally be indicated.
#' @param hbr_plot boolean vector of length the number of reconstructed bocks indicating which
#' ones should be plotted
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}}\code{\link{number_hbr}}
#'
setMethod("plot","hbr",
function(x, min.marker=1, xlab="Genomic position [Mb]",
ylab="Reconstructed haplotype-block", main="default",
col="black", lwd=4, hline=10,
indicate_shared=F, addPoints=F,
hbr_plot=NULL, ...)
{
if (is.null(hbr_plot))
{
hbr_plot=rep(T,length(x@cl.long.m))
} else if (length(hbr_plot) != length(x@cl.long.m))
{
stop(paste("hbr_plot must be a boolean vector of length",length(x@cl.long.m)))
}
hbr.i=unlist(lapply(x@cl.long.m,length))>=min.marker # boolean for hbrs with sufficient markers
hbr.i=(hbr_plot+hbr.i==2) # only hbrs with enough min markers and were specified to plot
hbrs=x@cl.long.m[hbr.i] # hbrs of sufficient # of markers
mil=1000000
min.pos=min(unlist(hbrs))/mil
max.pos=max(unlist(hbrs))/mil
if(main=="default"){main=paste("Chromosome",x@chromosome)}
if(length(col)<length(hbrs)){col=rep(col,length(hbrs))}
plot(x=c(min.pos,max.pos),y=c(0.5,length(hbrs)+0.5), col="white", xlab=xlab, ylab=ylab, main=main, ...)
abline(h=(0:round(length(hbrs)/hline))*hline, lwd=0.7, lty=2)
for (i in 1:length(hbrs))
{
if (addPoints) points(hbrs[[i]]/mil, rep(i,length(hbrs[[i]])), col="red")
segments(min(hbrs[[i]])/mil,i,max(hbrs[[i]])/mil,i,lwd=lwd, col=col[i])
}
if (indicate_shared & length(hbrs)>1)
{
# all pairwise comparisons of marker sets of hbrs
hbrNames.comb=combn( 1:length(hbrs) , 2 , simplify = FALSE )
hbrSets.comb=combn( hbrs , 2 , simplify = FALSE )
hbrinter.comb=lapply(hbrSets.comb , function(x) intersect(x[[1]], x[[2]]) )
for (comp in 1:length(hbrinter.comb))
{
#draw connecting lines whenever markers are shared between two hbrs
if (length(hbrinter.comb[[comp]]) >0)
{
segments(x0=hbrinter.comb[[comp]]/mil, y0=hbrNames.comb[[comp]][1],
x1=hbrinter.comb[[comp]]/mil, y1=hbrNames.comb[[comp]][2])
}
}
}
})
#' Method to visualize the trajectories for all identified clusters in a window
#'
#' @aliases plot_cluster_trajectories plot_cluster_trajectories.hbr
#' @author Susanne U. Franssen
#' @details The method operates on \code{\link{hbr}} objects and plots the trajectories
#' of all clusters and replicates in the specified window. Cluster trajectories are
#' visualized by trajectories of all markers contained in the respective clusters and
#' median trajectories for a cluster are visualized in a different color. For each
#' cluster it is indicated to which haplotype-block it was later assigned.
#' NOTE: It is possible that to clusters are assigned to the identical
#' haplotype-block. This can happen when a cluster in an overlapping window shares
#' markers with two clusters of the "focal" window.
#' @export
#' @importFrom grDevices rainbow
#' @importFrom stats median
#' @param object object of class \code{hbr}
#' @param window numeric specifying the number of the window, for which trajectories
#' should be plotted.
#' @param ylim numeric vector with two elements specifying the limits of the y-axis.
#' @param p.median boolean specifying if the median trajectory for each cluster should
#' be added to the plot (default: TRUE).
#' @param tp.cor Boolean indicating if only the time points used for calaculating
#' correlations \code{use.libs} are shown (tp.cor=T) or all time points present in the
#' data set are shown (tp.cor=F).
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#'
setMethod("plot_cluster_trajectories", "hbr",
function(object, window, ylim=c(0,1), p.median=T, tp.cor=T)
{
if (!window %in% 1:length(object@cl.chr))
{
writeLines(paste("This window does not exist. Windows range from 1 to",length(object@cl.chr),"."))
} else if (length(object@cl.chr[[window]])==0)
{
writeLines("No clusters were called for this window.")
} else # print trajectories for all clusters identified for this window
{
# for adding hbr info
revMap=rev_map(object)
hbrs=unlist(revMap[names(revMap) == paste0("w",window)])
n.cluster=length(object@cl.chr[[window]]) # number of clusters identified for this window
n.repl=length(unique(object@dat@pop.ident)) # number of different replicates / populations provided
xmin=min(object@dat@pop.generation)
xmax=max(object@dat@pop.generation)
cols=rainbow(n.cluster) # colors for median lines in different clusters
par(mfrow=c(n.cluster,n.repl),mar=c(0,0,0,0),oma=c(4,4,3,1))
for (i in 1: n.cluster)
{
SNP.pos=object@cl.chr[[window]][[i]]
# test which hbr this cluster belongs to
hbr=c()
for (xx in hbrs)
{
if (length(intersect(object@cl.long.m[[xx]], SNP.pos))
>= object@min.inter)#(0.2)*length(SNP.pos))
{ hbr=c(hbr,xx)}
}
pos <- NULL # silly as pos is defined in object@dat@col.info$pos
if (tp.cor) # only time points used for the correlations will be shown
{
lf=object@dat@lib.freqs[object@dat@col.info$pos %in% SNP.pos,object@dat@use.libs,with=F] #library frequencies
} else {
lf=object@dat@lib.freqs[object@dat@col.info$pos %in% SNP.pos,] #library frequencies
}
bf=object@dat@col.info[pos %in% SNP.pos]$base.freq #composite base frequencies
if(i==n.cluster){xaxt=NULL}else{xaxt="n"}
for (repl in 1:n.repl) # plot for each replicate population
{
if (tp.cor) # only time points used for the correlations will be shown
{
gens=object@dat@pop.generation[object@dat@use.libs][repl==object@dat@pop.ident[object@dat@use.libs]]
lf.repl=lf[,repl==object@dat@pop.ident[object@dat@use.libs],with=F][,order(gens),with=F]
} else {
gens=object@dat@pop.generation[repl==object@dat@pop.ident]
lf.repl=lf[,repl==object@dat@pop.ident,with=F][,order(gens),with=F]
}
if (sum(gens==0) == 0) # add composite base to trajectory
{
lf.repl=cbind(bf,lf.repl)
gens=c(0,gens)
}
if(repl==1){yaxt=NULL}else{yaxt="n"}
plot(1,col="white", xlim=c(xmin,xmax),ylim=ylim,xaxt=xaxt,yaxt=yaxt)
text(x=xmax*0.2,y=ylim[2]*0.9, paste("hbr",paste(hbr,collapse=","))) # hbr info
for(line in 1:nrow(lf.repl))
{
lines(gens,lf.repl[line,])
}
if(p.median){lines(gens,apply(lf.repl,2,median),col=cols[i],lwd=1.5)}
}
}
mtext("Generation",1,outer = T,line = 3)
mtext("Marker frequency",2,outer = T,line = 3)
mtext(paste("Window",window," Replicates",paste(sort(unique(object@dat@pop.ident)),collapse=", ")),3,outer = T,line = 1)
}
})
#' Method to visualize the trajectories for all markers in a haplotype-block
#'
#' @aliases plot_marker_trajectories plot_marker_trajectories.hbr
#' @author Susanne U. Franssen
#' @details The method operates on \code{\link{hbr}} objects and plots the trajectories
#' of all markers in a haplotype-block in all replicates.
#' Note: As blocks can span a wide range along the genome, it is not expected that
#' trajectories within a block stay very similar. Changing location along the genome
#' can be indicated with loc.col=T.
#' @export
#' @importFrom grDevices colorRampPalette
#' @importFrom graphics legend lines mtext par points text
#' @param object object of class \code{hbr}
#' @param hbr_id the id of the haplotype-block, for which the marker trajectories
#' should be plotted.
#' @param ylim numeric vector with two elements specifying the limits of the y-axis.
#' @param loc.col boolean indicating if trajectories should be coloured ranging from
#' red over blue to yellow according to their location on the chromosome (default: TRUE).
#' @param tp.cor Boolean indicating if only the time points used for calaculating
#' correlations \code{use.libs} are shown (tp.cor=T) or all time points present in the
#' data set are shown (tp.cor=F).
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#'
setMethod("plot_marker_trajectories", "hbr",
function(object, hbr_id, ylim=c(0,1), loc.col=T, tp.cor=T)
{
marker=markers(object, hbr_id)
n.repl=length(unique(object@dat@pop.ident)) # number of different replicates / populations provided
xmin=min(object@dat@pop.generation)
xmax=max(object@dat@pop.generation)
if (loc.col)
{
colfunc <- colorRampPalette(c("red", "blue","yellow"))
cols=colfunc(length(marker))
} else {cols=rep("black",length(marker))}
par(mfrow=c(1,n.repl),mar=c(0,0,0,0),oma=c(4,4,3,1))
SNP.pos=marker
pos <- NULL # silly as pos is defined in object@dat@col.info$pos
if (tp.cor) # only time points used for the correlations will be shown
{
lf=object@dat@lib.freqs[object@dat@col.info$pos %in% SNP.pos,object@dat@use.libs,with=F] #library frequencies
} else {
lf=object@dat@lib.freqs[object@dat@col.info$pos %in% SNP.pos,] #library frequencies
}
bf=object@dat@col.info[pos %in% SNP.pos]$base.freq #composite base frequencies
for (repl in 1:n.repl) # plot for each replicate population
{
if (tp.cor) # only time points used for the correlations will be shown
{
gens=object@dat@pop.generation[object@dat@use.libs][repl==object@dat@pop.ident[object@dat@use.libs]]
lf.repl=lf[,repl==object@dat@pop.ident[object@dat@use.libs],with=F][,order(gens),with=F]
} else {
gens=object@dat@pop.generation[repl==object@dat@pop.ident]
lf.repl=lf[,repl==object@dat@pop.ident,with=F][,order(gens),with=F]
}
if (sum(gens==0) == 0) # add composite base to trajectory
{
lf.repl=cbind(bf,lf.repl)
gens=c(0,gens)
}
if(repl==1){yaxt=NULL}else{yaxt="n"}
xaxt=NULL
plot(1,col="white", xlim=c(xmin,xmax),ylim=ylim,xaxt=xaxt,yaxt=yaxt)
for(line in 1:nrow(lf.repl))
{
lines(gens,lf.repl[line,], col=cols[line])
}
}
mtext("Generation",1,outer = T,line = 3)
mtext("Marker frequency",2,outer = T,line = 3)
})
#' Map from reconstructed haplotype-blocks to windows
#'
#' A method returning information which windows are contain in which reconstructed
#' haplotype-block.
#'
#' @aliases map map.hbr
#' @author Susanne U. Franssen
#' @details The method operates on \code{\link{hbr}} objects and returns a list summarizing
#' the mapping information from reconstructed haplotype-blocks to contained windows.
#' NOTE: A window is only listed for a haplotype-block when at least \code{min.inter}
#' many markers of the block are in that window.
#' @export
#' @param object object of class \code{\link{hbr}} containing the results of reconstructed
#' haplotype-blocks
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#' @return list with elements corresponding to all reconstructed haplotype-blocks (id for all
#' blocks without filtering based on the number of markers). Each element contains a vector of
#' window indices that are contained in the block
#'
setMethod("map", "hbr",
function(object)
{
hb <- NULL # initialized in the subsequent foreach
win.list = foreach (hb=object@cl.long.m) %do%
{
wins=c()
for (win in 1:length(object@cl.chr))
{
# intersection between blocks in that marker and in any cluster of the respective
# window
inter=intersect(hb,unlist(object@cl.chr[[win]]))
if (length(inter)>=object@min.inter) { wins=c(wins,win) }
}
#paste(wins,collapse = ",")
wins
}
# each list element corresponds to an hbr and contains a vector of all windows it spans
win.list
})
#' Map from windows to reconstructed haplotype-blocks
#'
#' A method returning information which reconstructed haplotype-blocks are present in
#' which windows.
#'
#' @aliases rev_map rev_map.hbr
#' @author Susanne U. Franssen
#' @details The method operates on \code{\link{hbr}} objects and returns a list summarizing
#' the mapping information from windows to reconstructed haplotype-blocks.
#' @export
#' @param object object of class \code{\link{hbr}} containing the results of reconstructed
#' haplotype-blocks
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#' @return list with elements corresponding to all windows that contain a reconstructed
#' haplotype-block segment. Each element contains a vector of haplotype-block indices
#' that overlap the respective window.
#'
setMethod("rev_map", "hbr",
function(object)
{
reverse_map=list()
xx=map(object)
for (i in 1:length(xx)) # over all hbr
{
for (j in 1:length(xx[[i]])) # over all win in each hbr
{
if (!paste0("w",xx[[i]][j]) %in% names(reverse_map))
{
reverse_map[[length(reverse_map)+1]]=c(i)
names(reverse_map)[length(reverse_map)]=paste0("w",xx[[i]][j])
} else
{
ii=(1:length(reverse_map))[names(reverse_map)==paste0("w",xx[[i]][j])]
reverse_map[[ii]]=c(reverse_map[[ii]],i)
}
}
}
reverse_map
})
#' Markers of the specified reconstructed haplotype-block
#'
#' Returns the genomic positions of the markers for the specified reconstructed haplotype-block
#'
#' @aliases markers markers.hbr
#' @author Susanne U. Franssen
#' @export
#' @param object object of class \code{\link{hbr}} containing the results of reconstructed
#' haplotype-blocks
#' @param hbr_id index of the haplotype-block of interest
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#' @return numeric vector with marker positions of the specified reconstructed haplotype-block
#'
setMethod("markers", "hbr",
function(object, hbr_id)
{
if (hbr_id > length(object@cl.long.m)) stop(paste("Please provide an 'hbr_id' <=",length(object@cl.long.m),"."))
object@cl.long.m[[hbr_id]]
})
#' The number of reconstructed haplotype-block
#'
#' Returns the number of reconstructed haplotype-blocks
#'
#' @aliases number_hbr number_hbr.hbr
#' @author Susanne U. Franssen
#' @export
#' @param object object of class \code{\link{hbr}} containing the results of reconstructed
#' haplotype-blocks
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}}
#' @return returns the number of reconstructed haplotype-blocks
#'
setMethod("number_hbr", "hbr",
function(object)
{
length(object@cl.long.m)
})
#' Plots frequencies of a reconstructed haplotype-block along the chromosome
#'
#' @aliases plot_hbr_freq plot_hbr-freq.hbr
#' @author Susanne U. Franssen
#' @details The plotting method operates on \code{\link{hbr}} objects and returns a plot containing
#' frequencies of a respective haplotype-block along the chromosome for specified time points and
#' replicates.
#' @export
#' @importFrom grDevices rainbow
#' @importFrom zoo rollapply
#' @param object object of class \code{\link{hbr}} containing the results of reconstructed
#' haplotype-blocks
#' @param hbr_id id (integer) of the haplotype-block to be plotted
#' @param replicate numeric vector of integers specifying the replicates for which results
#' should be plotted
#' @param timepoint numeric vector of time points for which the results shouls be plotted
#' @param window window size over which frequencies are averaged (results are plotted for windows
#' overlapping by windows/2)
#' @param cols vector of colors with the length of the specified libraries
#' @param add logical specifying if a new plot should be created or haplotype-block frequencies
#' are added to an existing plot.
#' @param sumstat Summary statistics used for the y-values in a window. Either
#' specify "mean" (the default) or "median".
#' @param cex scaling of the point size in the output plot
#' @param xlab x-label in the output plot
#' @param ylab y-label in the output plot
#' @param xlim vector of the limits on the x-axis in the output plot
#' @param ylim vector of the limits on the y-axis in the output plot
#' @param pch option to specify symbols to use when plotting points in the output plot
#' @param lwd line width in the output plot
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#'
setMethod("plot_hbr_freq", "hbr",
function(object, hbr_id=1, replicate, timepoint, window=1, cols=NULL, add=F
, sumstat="mean", cex=0.7
, xlab="Genomic position [Mb]", ylab="Marker frequency", xlim=NULL
, ylim=c(0,1), pch=20, lwd=2)
{
validity_plot_hbr_freq(object, hbr_id, replicate, timepoint, window
, cols, add, cex
, xlab, ylab, xlim, ylim)
mil=1000000
n.libs=length(object@dat@pop.ident)
# get markers for the respective block
# index of hbr markers in the time series data
i.hbr.m=object@dat@col.info$pos %in% object@cl.long.m[[hbr_id]]
col.info.hbr=object@dat@col.info[i.hbr.m,]
lib.freqs.hbr=object@dat@lib.freqs[i.hbr.m,]
if (nrow(col.info.hbr)<(1.5*window) & window>1){
stop(paste("There are only",nrow(col.info.hbr),"markers, please provide a smaller
window size."))
}
# get markers for the respective libraries
libs.rep=(1:n.libs)[object@dat@pop.ident %in% replicate]
libs.tp=(1:n.libs)[object@dat@pop.generation %in% timepoint]
libs=intersect(libs.rep,libs.tp)
if (length(libs)<1){
stop("There are no libraries with the specified replicate AND timepoint properties.
Please provide valid values for replicate and timepoints.")}
# color handling
if (is.null(cols)){
cols=rainbow(length(libs))
} else {
if (length(cols)!=length(libs)) {
print(libs)
stop(paste("Please provide a vector of",length(libs),"colors for the libraries",
paste(colnames(lib.freqs.hbr)[libs],collapse = ", ")))
}
}
cat(paste("Block frequencies are provided for libraries:\n"
,paste(colnames(lib.freqs.hbr)[libs],collapse = ", "),"\n"))
# window smoothing
if (window>1) {
x=rollapply(col.info.hbr$pos[!is.na(lib.freqs.hbr[[libs[1]]])]
,width=window,FUN=mean,by=1)/mil#window/2)/mil
if (sumstat=="median"){
y=rollapply(lib.freqs.hbr[[libs[1]]][!is.na(lib.freqs.hbr[[libs[1]]])]
,width=window,FUN=median,by=1)#window/2)
} else {
y=rollapply(lib.freqs.hbr[[libs[1]]][!is.na(lib.freqs.hbr[[libs[1]]])]
,width=window,FUN=mean,by=1)#window/2)
}
} else {
x=col.info.hbr$pos/mil
y=lib.freqs.hbr[[libs[1]]]
}
# plotting parameters
if (is.null(xlim)){
xlim=c(min(x),max(x))
} else {xlim=xlim/mil}
# plotting
if (add){
points(x,y,pch=pch,col=cols[1], cex=cex)
} else {
plot(x,y,pch=pch,col=cols[1], xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, cex=cex)
abline(h=(0:5)*0.2,lty=2,lwd=0.5)
}
if (length(libs)>1)
{
for (i in 2:length(libs))
{
if (window>1){
x=rollapply(col.info.hbr$pos[!is.na(lib.freqs.hbr[[libs[i]]])]
,width=window,FUN=mean,by=1)/mil#window/2)/mil
if (sumstat=="median"){
y=rollapply(lib.freqs.hbr[[libs[i]]][!is.na(lib.freqs.hbr[[libs[i]]])]
,width=window,FUN=median,by=1)#window/2)
} else {
y=rollapply(lib.freqs.hbr[[libs[i]]][!is.na(lib.freqs.hbr[[libs[i]]])]
,width=window,FUN=mean,by=1)#window/2)
}
} else {
y=lib.freqs.hbr[[libs[i]]]
}
points(x,y,pch=pch,col=cols[i],cex=cex)
}
}
})
#' Haplotype-block marker inspection for a window
#'
#' Plots correlations and groupings of haplotype-markers for a given window
#'
#' @aliases inspect_window inspect_window.hbr
#' @author Susanne U. Franssen
#' @details The plotting method operates on \code{\link{hbr}} objects. For a
#' specified window the correlation matrix for identified haplotype-block markers
#' in this window is visualized also indicating haplotype-block identity.
#' @export
#' @importFrom gplots heatmap.2
#' @importFrom grDevices colorRampPalette rainbow
#' @param object object of class \code{\link{hbr}} containing the results of
#' reconstructed haplotype-blocks
#' @param window number of the window, which should be inspected
#' @param colCluster boolean value indicating whether columns in the output
#' correlation plot should be clustered or the original positions of the
#' markers reflecting genomic positions should be kept.
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#'
setMethod("inspect_window", "hbr",
function(object, window, colCluster=T)
{
# extract data matrix from the respective window
choose=( object@dat@col.info$win==window | object@dat@col.info$win==window+1) # relates to the data matrix in object
sum(choose)
ttpos=object@dat@col.info$pos[choose]
#
# data matrix in object@dat may contain some SNPs in between that were not included in any large cluster
choose2= (object@dat@col.info$pos >= min(ttpos) & object@dat@col.info$pos <= max(ttpos))# relates to the data matrix in object@dat
aapos=object@dat@col.info$pos[choose2]
#aadat=object@dat@lib.freqs[choose2,]
aadat=object@dat@lib.freqs[choose2,object@dat@use.libs,with=F]
rownames(aadat)<-aapos
#
if (object@transf) {aadat=sqrt(aadat)}#; print("trans")}
if (object@scaleSNP) {aadat=data.table(t(scale(t(aadat))))}#; print("scale")}
aadat_t=t(aadat); colnames(aadat_t)=aapos
dwin.cor=cor(aadat_t,use="pairwise.complete.obs")
if (object@pos.cor) {dwin.cor[dwin.cor<0]=0}
# color according to performed clustering
colV=rep("black", length(aapos))
# block ids for window
hbs=rev_map(object)[names(rev_map(object))==paste0("w",window)][[1]]
cc=rainbow(length(hbs))
for (i in hbs)
{
colV[ aapos %in% markers(object,i) ] <- cc[which(hbs==i)]
}
#require(gplots)
colfunc <- colorRampPalette(c("yellow", "darkgreen"))
# usage of average linkage clustering for heatmap
hclust2 <- function(x, method="average")
hclust(x, method=method)
# heatmap colors
# to keep the same colors across plots regardeles of the range of correlation values
if(round(min(dwin.cor)/ (1/30)) == 30)
{
colH=colfunc(30)[29:30]
} else {
if (round(min(dwin.cor)<0)) {zz=0} else {zz=round(min(dwin.cor)/ (1/30))}
colH=colfunc(30)[zz:30]
}
if (colCluster)
{
heatmap.2(dwin.cor, ColSideColors=colV, RowSideColors=colV, scale="none",
key.xlab="Correlation", hclustfun=hclust2,
main=paste("Window",window, " hbr",paste(hbs,collapse = ",")),
trace="none", key.title="",
col=colH, dendrogram ="both")
} else {
heatmap.2(dwin.cor, ColSideColors=colV, RowSideColors=colV, scale="none",
key.xlab="Correlation", hclustfun=hclust2,
main=paste("Window",window, " hbr",paste(hbs,collapse = ",")),
trace="none", key.title="",
col=colH, dendrogram ="row", Colv=F)
}
})
#' PCA of haplotype-block marker for a window
#'
#' Plots all filtered SNPs for a given window in terms of their the
#' first two principal components of the respective time series
#' data.
#'
#' @aliases inspect_window_PCA inspect_window_PCA.hbr
#' @author Susanne U. Franssen
#' @details Performs principal component analysis (PCA) on the time series data
#' of the filtered SNPs and the selected time points (filtered and selected
#' through \code{use.libs} in \code{\link{initialize_SNP_time_series}})
#' in a given window. SNPs are shown in terms of
#' the first two principal components and colored according to the
#' haplotype-block they were assigned to. Black colored points were
#' not assigned to any cluster.
#' @export
#' @importFrom grDevices rainbow
#' @importFrom stats prcomp
#' @param object object of class \code{\link{hbr}} containing the results of
#' reconstructed haplotype-blocks
#' @param window number of the window, which should be inspected
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_avLink}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#'
setMethod("inspect_window_PCA", "hbr",
function(object, window)
{
# extract data matrix from the respective window
choose=( object@dat@col.info$win==window | object@dat@col.info$win==window+1) # relates to the data matrix in object
sum(choose)
ttpos=object@dat@col.info$pos[choose]
#
# data matrix in object@dat may contain some SNPs in between that were not included in any large cluster
choose2= (object@dat@col.info$pos >= min(ttpos) & object@dat@col.info$pos <= max(ttpos))# relates to the data matrix in object@dat
aapos=object@dat@col.info$pos[choose2]
#aadat=object@dat@lib.freqs[choose2,]
aadat=object@dat@lib.freqs[choose2,object@dat@use.libs,with=F]
rownames(aadat)<-aapos
dim(aadat)
length(aapos)
if (object@transf) {aadat=sqrt(aadat)}#; print("trans")}
if (object@scaleSNP) {aadat=data.table(t(scale(t(aadat))))}#; print("scale")}
a=nrow(aadat)
aapos=aapos[rowSums(is.na(aadat))==0]
aadat=aadat[rowSums(is.na(aadat))==0,] #na.omit(aadat)
b=nrow(aadat)
if (b<a) warning(paste("Due to NA positions (position of too low coverage)",round((1-b/a)*100,2),"% of the
SNP positions were removed for the PCA analysis!"))
# PCA
pr.comp=prcomp(aadat,center = T, scale = T)
# variances, i.e. proportion of variances explained
vars <- pr.comp$sdev^2
vars <- vars/sum(vars)
# color according to performed clustering
col=rep("black", length(aapos))
# block ids for window
hbs=rev_map(object)[names(rev_map(object))==paste0("w",window)][[1]]
cc=rainbow(length(hbs))
for (i in hbs)
{
col[ aapos %in% markers(object,i) ]=cc[which(hbs==i)]
}
# plot data points in terms of PCs
par(mfrow=c(1,1), mar=c(4,4,1,1), oma=c(1,1,1,1))
plot(pr.comp$x[,1], pr.comp$x[,2], pch=20, col=col
, xlab=paste("PC1,",round(vars[1],3)*100,"%")
, ylab=paste("PC2,",round(vars[2],3)*100,"%"))
xx=par('usr') # figure x,ylim
legend(0.85*(xx[2]-xx[1])+xx[1], 0.95*(xx[4]-xx[3])+xx[3]
, paste("hb",hbs), col=cc, pch=1, bty = "n")
})
#' Hierachical clustering of haplotype-block marker for a window
#'
#' Performs average linkage clustering for all SNPs in a window
#' after filtering (i.e. \code{\link{initialize_SNP_time_series}}).
#'
#' @aliases inspect_window_avLink inspect_window_avLink.hbr
#' @author Susanne U. Franssen
#' @details Performs average linkage clustering on the time series data
#' of the filtered SNPs and the selected time points (filtered and selected
#' through \code{use.libs} in \code{\link{initialize_SNP_time_series}})
#' in a given window. The horizontal red line indicates the correlation
#' (1-correlation) threshold on which clusters were originally identified.
#' Note: Clusters with less than \code{min.cl.cor} markers are discarded.
#' If no horizontal line is shown this indicates that all SNPs were assigned
#' to the same cluster.
#' @export
#' @importFrom stats hclust as.dist na.omit
#' @param object object of class \code{\link{hbr}} containing the results of
#' reconstructed haplotype-blocks
#' @param window number of the window, which should be inspected
#' @param plotDendro Boolean indicating if dendrogram using average
#' linkage clustering should be plotted.
#' @param plotCluster Boolean indicating if clusters identified with average
#' linkage clustering should be visualized in PCs.
#' @param min.cl.cor minimum cluster correlation, if none is provided, the min.cl.cor
#' of the provided \code{\link{hbr}} object is taken.
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}}
#' \code{\link{inspect_window_dbScan}} \code{\link{number_hbr}}
#'
setMethod("inspect_window_avLink", "hbr",
function(object, window, min.cl.cor=0, plotDendro=T, plotCluster=T)
{
if (min.cl.cor==0){
min.cl.cor=object@min.cl.cor
}
# extract data matrix from the respective window
choose=( object@dat@col.info$win==window | object@dat@col.info$win==window+1) # relates to the data matrix in object
sum(choose)
ttpos=object@dat@col.info$pos[choose]
#
# data matrix in object@dat may contain some SNPs in between that were not included in any large cluster
choose2= (object@dat@col.info$pos >= min(ttpos) & object@dat@col.info$pos <= max(ttpos))# relates to the data matrix in object@dat
aapos=object@dat@col.info$pos[choose2]
#aadat=object@dat@lib.freqs[choose2,]
aadat=object@dat@lib.freqs[choose2,object@dat@use.libs,with=F]
rownames(aadat)<-aapos
dim(aadat)
length(aapos)
if (object@transf) {aadat=sqrt(aadat)}#; print("trans")}
if (object@scaleSNP) {aadat=data.table(t(scale(t(aadat))))}#; print("scale")}
dsub.cor=cor(t(aadat),use="pairwise.complete.obs")
dsub.cordist=1-as.dist(dsub.cor) # convert to dist format
if (object@pos.cor) {dwin.cor[dwin.cor<0]=0}
dsub.clust=hclust(dsub.cordist,method="average")
dsub.clust$labels<-aapos
# visulalize dendrogram
if (plotDendro)
{
par(mfrow=c(1,1),mar=c(2,4,2.5,1))
plot(dsub.clust, main="Average linkage clustering", ylab="Distance [1-correlation]", xlab="")
abline(h=1-min.cl.cor, col="red")
}
if (plotCluster)
{
# visualize clusters on PCs
dsub.cl=cutree(dsub.clust,h = 1-min.cl.cor)
counts=table(dsub.cl) # count SNPs in each cluster
small=as.numeric(names(counts))[counts<object@min.cl.size] # clusters that do not meet the minimum size
dsub.cl[dsub.cl %in% small]<-0
dsub.cl=dsub.cl+1
#
a=nrow(aadat)
dsub.cl=dsub.cl[rowSums(is.na(aadat))==0]
aadat=aadat[rowSums(is.na(aadat))==0,] #na.omit(aadat)
b=nrow(aadat)
if (b<a) warning(paste("Due to NA positions (position of too low coverage)",round((1-b/a)*100,2),"% of the
SNP positions were removed for the PCA analysis! This can cause discrepancies in
the dendrogram and the PCA visualization!"))
pr.comp=prcomp(aadat,center = T, scale = T, na.action=na.omit)
size=rep(1.5,length(dsub.cl)) # point size: large are clusters small are discarded
size[dsub.cl==1]=0.7
comp=data.frame(pr.comp$x[,1:3])
plot(comp, col=dsub.cl, pch=20, cex=size)
}
})
#' Clustering with the dbscan algorithm
#'
#' Performs clustering with the dbscan (Density-based spatial clustering
#' of applications with noise) for all SNPs in a window
#' after filtering (i.e. \code{\link{initialize_SNP_time_series}})
#' and visualizes found clusters based on principal components.
#' NOTE: The clusters identified here are not nessesarily identical
#' with the clusters identified with average linkage clustering. Therefore
#' if haplotype reconstruction was done using average linkage clustering
#' the clusters shown here can be different from clusters of identified haplotype
#' blocks.
#'
#' @aliases inspect_window_dbScan inspect_window_dbScan.hbr
#' @author Susanne U. Franssen
#' @details Performs clustering with \href{https://CRAN.R-project.org/package=dbscan}{dbscan}
#' (Density-based spatial clustering of applications with noise) for all SNPs in a window
#' after filtering (i.e. \code{\link{initialize_SNP_time_series}})
#' and visualizes found clusters based on principal components.
#' NOTE: The clusters identified here are not nessesarily identical
#' with the clusters identified with average linkage clustering. Therefore
#' if haplotype reconstruction was done using average linkage clustering
#' the clusters shown here can be different from clusters of identified haplotype
#' blocks. This method is rather indendet for inspection and getting an idea
#' if haplotype reconstruction should rather be run using dbscan instead
#' of average linkage clustering.
#'
#' This package used the dbscan implementation of the package \href{https://CRAN.R-project.org/package=dbscan}{dbscan}
#' (Michael Hahsler) originally described by Ester et al. (1996).
#' @export
#' @importFrom dbscan dbscan kNNdistplot
#' @param object object of class \code{\link{hbr}} containing the results of
#' reconstructed haplotype-blocks
#' @param window Number of the window, which should be inspected
#' @param minPts Number of minimum points in the eps region (for core points).
#' (default \code{min.cl.size} used for haplotype block reconstruction)
#' @param eps The size of the epsilon neighborhood. Defines the distance between
#' samples that built up a core cluster, for details see ?\code{dbscan} of
#' the \code{dbscan} package. (This value is only used for the \code{plotCluster}
#' functionality.)
#' @param plotkNN Boolean indicating if the kNN distance plot should be printed
#' , for details see ?\code{kNNdistplot} of the \code{dbscan} package. Briefly,
#' the y-value where a bent in the curve is visible is a good indicator of the eps
#' value to choose for the given k = minPts.
#' @param plotCluster Boolean indicating if clusters identified with average
#' linkage clustering should be visualized in PCs.
## @references Martin Ester, Hans-Peter Kriegel, Joerg Sander, Xiaowei Xu (1996).
## A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases
## with Noise. Institute for Computer Science, University of Munich. Proceedings
## of 2nd International Conference on Knowledge Discovery and Data Mining (KDD-96).
#' @seealso \code{\link{hbr}} \code{\link{ex_dat}} \code{\link{summary.hbr}} \code{\link{plot.hbr}}
#' \code{\link{plot_cluster_trajectories}} \code{\link{plot_marker_trajectories}}
#' \code{\link{map}} \code{\link{rev_map}} \code{\link{markers}} \code{\link{plot_hbr_freq}}
#' \code{\link{inspect_window}} \code{\link{inspect_window_PCA}} \code{\link{inspect_window_avLink}}
#' \code{\link{number_hbr}}
#'
setMethod("inspect_window_dbScan", "hbr",
function(object, window, minPts=object@min.cl.size, eps, plotkNN=T, plotCluster=T)
{
# extract data matrix from the respective window
choose=( object@dat@col.info$win==window | object@dat@col.info$win==window+1) # relates to the data matrix in object
sum(choose)
ttpos=object@dat@col.info$pos[choose]
#
# data matrix in object@dat may contain some SNPs in between that were not included in any large cluster
choose2= (object@dat@col.info$pos >= min(ttpos) & object@dat@col.info$pos <= max(ttpos))# relates to the data matrix in object@dat
aapos=object@dat@col.info$pos[choose2]
#aadat=object@dat@lib.freqs[choose2,]
aadat=object@dat@lib.freqs[choose2,object@dat@use.libs,with=F]
rownames(aadat)<-aapos
dim(aadat)
length(aapos)
if (object@transf) {aadat=sqrt(aadat)}#; print("trans")}
if (object@scaleSNP) {aadat=data.table(t(scale(t(aadat))))}#; print("scale")}
a=nrow(aadat)
aapos=aapos[rowSums(is.na(aadat))==0]
aadat=aadat[rowSums(is.na(aadat))==0,] #na.omit(aadat)
b=nrow(aadat)
if (b<a) warning(paste("Due to NA positions (position of too low coverage)",round((1-b/a)*100,2),"% of the
SNP positions were removed for the PCA analysis!"))
if (plotkNN){
kNNdistplot(aadat, k=minPts)
}
if (plotCluster){
xx=dbscan(aadat, eps=eps, minPts = minPts)
#
pr.comp=prcomp(aadat,center = T, scale = T)
size=rep(1.5,length(xx$cluster)) # point size: large are clusters small are discarded
size[xx$cluster==0]=0.7
comp=data.frame(pr.comp$x[,1:3])
plot(comp, col=xx$cluster+1, pch=20, cex=size)
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.