#' Display fitted generative distribution function
#'
#' This function displays a one-dimensional generative distribution function fitted using \code{\link{dffit}}.
#'
#' @importFrom magicaxis magaxis magplot
#' @importFrom graphics plot lines points polygon box clip
#' @importFrom grDevices col2rgb rgb
#' @importFrom stats qpois
#'
#' @param survey List produced by \code{\link{dffit}}
#' @param xlab x-axis label
#' @param ylab y-axis label
#' @param ylab.histogram y-axis label for histogram
#' @param xlim 2-element vector with x-axis plotting limits
#' @param ylim 2-element vector with y-axis plotting limits
#' @param log String specifying the log-axes as in \code{\link{plot}}.
#' @param p Parameters of a reference distribution function do be over-plotted to the fitted function. Choose \code{NULL} to avoid plotting a reference function.
#' @param veff Function of x to be plotted over the histogram, if \code{show.data.histogram = TRUE}.
#' @param xpower10 If \code{TRUE}, the model argument x is elevated to the power of 10 in the plots.
#' @param show.input.data If \code{TRUE}, the input data is shown in bins. Each bin values is simply the sum 1/Veff(x) of the observed x-values in this bin.
#' @param show.posterior.data If \code{TRUE}, the posterior data, constructucted from all the individual posterior PDFs of the observed data, are snown in bins. Note that posterior data only exists of the fitted data is uncertain, e.g. if the argument \code{x.err} in \code{\link{dffit}} was non-zero.
#' @param show.data.histogram If \code{TRUE}, a histogram of source counts, based on the input data, is displayed in a bottom panel. Choose \code{NA}, to leave space for the histogram without drawing it.
#' @param show.uncertainties If \code{TRUE}, uncertainties are displayed around the best fit model.
#' @param uncertainty.type \code{1}: plot Gaussian 1-sigma uncertanties propagated from the Hessian matrix of the likelihood. \code{2}: plot 68 percentile region (from 16 to 84 percent). \code{3} plot 68 (16 to 84) and 95 (2 to 98) percentile regions.
#' @param show.bias.correction If \code{TRUE}, the bias corrected MLE is shown instead of the native ML parameters.
#' @param add If \code{TRUE}, the lines are overplotted on the currently open plot.
#' @param scd is an optional N-vector or scalar function specifying a source count density to be plotted over the histogram.
#' @param nbins Number of bins to be plotted; must be larger than 0. Choose \code{nbins=NULL} (default) to determine the number of bins automatically.
#' @param bin.xmin Left edge of first bin
#' @param bin.xmax Right edge of last bin
#' @param clip.xmin Minimum x values to be plotted in models
#' @param clip.xmax Maximum x values to be plotted in models
#' @param clip.ymin Minimum y values to be plotted in models
#' @param clip.ymax Maximum y values to be plotted in models
#' @param col.fit Color of fit (see \code{\link{plot}})
#' @param lwd.fit Line width of fit (see \code{\link{plot}})
#' @param lty.fit Line type of fit (see \code{\link{plot}})
#' @param col.data.input Color of binned input data
#' @param cex.data.input Size of binned input data
#' @param lwd.data.input Line width of binned input data
#' @param col.data.posterior Color of binned posterior data
#' @param cex.data.posterior Size of binned posterior data
#' @param lwd.data.posterior Line width of binned posterior data
#' @param col.hist Color of source count histogram
#' @param col.ref Color of reference distribution function
#' @param lwd.ref Line width of reference distribution function
#' @param lty.ref Line type of reference distribution function
#' @param col.veff Color of reference function \code{veff}
#' @param lwd.veff Line width of reference function \code{veff}
#' @param lty.veff Line type of reference function \code{veff}
#' @param col.scd Color of source count density function \code{scd}
#' @param lwd.scd Line width of source count density function \code{scd}
#' @param lty.scd Line type of source count density function \code{scd}
#' @param div.line Logical flag specifying if the division line between upper and lower p
#' @param axes Logical flag specifying whether to plot axes
#' @param margins Margins (bottom,left,top,right)
#'
#' @return Returns the input list \code{survey} with the additional sub-list \code{survey$bin} that contains the binned data.
#'
#' @seealso For optimized plotting of galaxy mass functions, use the derived function \code{\link{mfplot}}. As an example run \code{dfexample(1)}. See examples in \code{\link{dffit}}.
#'
#' @author Danail Obreschkow
#'
#' @export
dfplot <- function(survey,
xlab = 'Observable x',
ylab = expression('Generative distribution function'~phi),
ylab.histogram = 'Counts',
xlim = NULL,
ylim = NULL,
log = 'y',
p = NULL,
veff = NULL,
xpower10 = FALSE,
show.input.data = TRUE,
show.posterior.data = TRUE,
show.data.histogram = FALSE,
show.uncertainties = TRUE,
uncertainty.type = 1,
show.bias.correction = FALSE,
add = FALSE,
scd = NULL,
nbins = NULL,
bin.xmin = NULL,
bin.xmax = NULL,
clip.xmin = NULL,
clip.xmax = NULL,
clip.ymin = NULL,
clip.ymax = NULL,
col.fit = 'blue',
lwd.fit = 2,
lty.fit = 1,
col.data.input = 'grey',
cex.data.input = 1,
lwd.data.input = 1,
col.data.posterior = 'blue',
cex.data.posterior = 1,
lwd.data.posterior = 1,
col.hist = 'grey',
col.ref = 'black',
lwd.ref = 1,
lty.ref = 2,
col.veff = '#666666',
lwd.veff = 1.5,
lty.veff = 3,
col.scd = 'black',
lwd.scd = 1,
lty.scd = 1,
div.line = TRUE,
axes = TRUE,
margins=c(5.1,4.1,4.1,2.1)) {
r = col2rgb(col.fit)[1]/255
g = col2rgb(col.fit)[2]/255
b = col2rgb(col.fit)[3]/255
n.data = dim(survey$data$x)[1]
n.dim = dim(survey$data$x)[2]
if (n.dim!=1) {
if (n.dim==2) {
stop('Use dfplot2 for two-dimensional distribution functions.')
} else {
stop('dfplot only handles one-dimensional distribution functions.')
}
}
# define plot limits
if (is.null(xlim)) {
if (add) {
xlim = par('usr')[1:2]
} else {
xlim = range(survey$data$x)
xlim = xlim+c(-0.1,0.1)*(xlim[2]-xlim[1])
}
if (xpower10) xlim = 10^xlim
} else {
if (add) stop('dfplot: cannot specify xlim, if add = TRUE.')
}
if (is.null(ylim)) {
if (add) {
ylim = 10^par('usr')[3:4]
if (is.na(show.data.histogram) | show.data.histogram) {
ylim[1] = exp(log(ylim[1])+(log(ylim[2])-log(ylim[1]))*0.2)
}
} else {
ylim = max(survey$fit$gdf(survey$data$x))*c(2e-4,2)
}
} else {
if (add) stop('dfplot: cannot specify ylim, if add = TRUE.')
}
# bin data
survey = .bin.data(survey,nbins,bin.xmin,bin.xmax)
# open plot
if (!add) {
par(pty = 'm')
par(mar = margins)
if (is.na(show.data.histogram) | show.data.histogram) {
plot(0,0,type='n',yaxs='i',xaxt='n',yaxt='n',xlim=xlim,ylim=c(0,1),xlab='',ylab='',bty='n')
.plotSub(0,1,0.2,1)
}
plot(1,1,type='n',log=log,xaxs='i',yaxs='i',xaxt='n',yaxt='n',
xlim = xlim, ylim = ylim, xlab = '', ylab = '',bty='n')
} else {
if (is.na(show.data.histogram) | show.data.histogram) {
.plotSub(0,1,0.2,1)
plot(1,1,type='n',log=log,xaxs='i',yaxs='i',xaxt='n',yaxt='n',
xlim = xlim, ylim = ylim, xlab = '', ylab = '',bty='n')
}
}
# clip plotting range
usr <- par("usr")
if (is.null(clip.xmin)) {x1=xlim[1]} else {x1=clip.xmin}
if (is.null(clip.xmax)) {x2=xlim[2]} else {x2=clip.xmax}
if (is.null(clip.ymin)) {y1=ylim[1]} else {y1=clip.ymin}
if (is.null(clip.ymax)) {y2=ylim[2]} else {y2=clip.ymax}
clip(x1,x2,y1,y2)
# plot uncertainty regions
if (show.uncertainties & survey$fit$status$converged) {
poly.x = c(survey$grid$x,rev(survey$grid$x))
if (xpower10) poly.x = 10^poly.x
if ((uncertainty.type>1) & (!length(survey$grid$gdf.quantile.16)>0)) stop('Quantiles not available. Use resampling in dffit.')
if (uncertainty.type == 3) {
poly.y.95 = pmax(ylim[1],c(survey$grid$gdf.quantile.02,rev(survey$grid$gdf.quantile.98)))
list = is.finite(poly.x) & is.finite(poly.y.95)
polygon(poly.x[list],poly.y.95[list],col=rgb(r,g,b,0.15),border=NA)
}
if (uncertainty.type >= 2) {
poly.y.68 = pmax(ylim[1],c(survey$grid$gdf.quantile.16,rev(survey$grid$gdf.quantile.84)))
list = is.finite(poly.x) & is.finite(poly.y.68)
polygon(poly.x[list],poly.y.68[list],col=rgb(r,g,b,0.25),border=NA)
}
if (uncertainty.type == 1) {
poly.y.68 = pmax(ylim[1],c(survey$grid$gdf-survey$grid$gdf.error.neg,
rev(survey$grid$gdf+survey$grid$gdf.error.pos)))
list = is.finite(poly.x) & is.finite(poly.y.68)
polygon(poly.x[list],poly.y.68[list],col=rgb(r,g,b,0.25),border=NA)
}
}
# plot central fit
x = survey$grid$x[survey$grid$gdf>0]
if (xpower10) x = 10^x
if (show.bias.correction & survey$fit$status$converged) {
if (length(survey$fit$parameters$p.optimal.bias.corrected)==0) stop('Bias corrected MLE parameters not available. Use bias.correction in dffit.')
lines(x,survey$input$distribution.function$phi(survey$grid$x[survey$grid$gdf>0],
survey$fit$parameters$p.optimal.bias.corrected),col=col.fit,lwd=lwd.fit,lty=lty.fit)
} else {
lines(x,survey$grid$gdf[survey$grid$gdf>0],col=col.fit,lwd=lwd.fit,lty=lty.fit)
}
# plot reference distribution function
if (!is.null(p)) {
if (xpower10) {
lines(x,survey$model$gdf(log10(x),p),col=col.ref,lwd=lwd.ref,lty=lty.ref)
} else {
lines(x,survey$model$gdf(x,p),col=col.ref,lwd=lwd.ref,lty=lty.ref)
}
}
# unclip plotting range
clip(xlim[1],xlim[2],ylim[1],ylim[2])
# plot binned input data points
bin = list()
for (mode in seq(2)) {
if (mode==1) {
show = show.input.data
if (show) {
bin$count = survey$bin$count.input
bin$gdf = survey$bin$gdf.input
bin$xmean = survey$bin$xcenter
col.data = col.data.input
cex.data = cex.data.input
lwd.data = lwd.data.input
}
} else {
show = show.posterior.data & !is.null(survey$data$x.err) & !is.null(survey$grid$effective.counts)
if (show) {
bin$count = survey$bin$count.posterior
bin$gdf = survey$bin$gdf.posterior
bin$xmean = survey$bin$xmean.posterior
col.data = col.data.posterior
cex.data = cex.data.posterior
lwd.data = lwd.data.posterior
}
}
if (show) {
list = bin$gdf>0
pm = 0.05
f.16 = qpois(0.16,bin$count)/bin$count
f.84 = qpois(0.84,bin$count)/bin$count
upper = f.16<pm
f.16 = pmax(f.16,pm)
if (xpower10) {
points(10^bin$xmean[list],bin$gdf[list],pch=20,col=col.data,cex=cex.data)
segments(10^bin$xmean[list],bin$gdf[list]*f.16[list],10^bin$xmean[list],bin$gdf[list]*f.84[list],col=col.data,lwd=lwd.data)
points(10^bin$xmean[upper],bin$gdf[upper]*pm,pch=25,cex=cex.data*0.7,col=col.data,bg=col.data)
#stop('sdf')
segments(10^(survey$bin$xmin+seq(0,survey$bin$n-1)[list]*survey$bin$dx),bin$gdf[list],
10^(survey$bin$xmin+seq(1,survey$bin$n)[list]*survey$bin$dx),bin$gdf[list],col=col.data,lwd=lwd.data)
} else {
points(bin$xmean[list],bin$gdf[list],pch=20,col=col.data,cex=cex.data)
segments(bin$xmean[list],bin$gdf[list]*f.16[list],bin$xmean[list],bin$gdf[list]*f.84[list],col=col.data,lwd=lwd.data)
points(bin$xmean[upper],bin$gdf[upper]*pm,pch=25,cex=cex.data,bg=col.data)
segments((survey$bin$xmin+seq(0,survey$bin$n-1)[list]*survey$bin$dx),bin$gdf[list],
(survey$bin$xmin+seq(1,survey$bin$n)[list]*survey$bin$dx),bin$gdf[list],col=col.data,lwd=lwd.data)
}
}
}
# axes
if (!add & axes) {
magicaxis::magaxis(side=2,ylab=ylab,lwd=NA,lwd.ticks=1)
magicaxis::magaxis(side=4,labels=FALSE,lwd=NA,lwd.ticks=1)
}
# plot binned data histogram
if (!is.na(show.data.histogram) & show.data.histogram) {
.plotSub(0,1,0,0.2)
ymax.hist = 1.2
if (!is.null(scd)) {
if (is.function(scd)) {
if (xpower10) {
x.scd = seq(log10(xlim[1]),log10(xlim[2]),length=200)
} else {
x.scd = seq(xlim[1],xlim[2],length=200)
}
y.scd = scd(x.scd)*survey$bin$dx
} else {
breaks = survey$bin$xmin+seq(0,survey$bin$n)*survey$bin$dx
h = .hist(scd,breaks)
x.scd = h$x
y.scd = h$y
}
ymax = max(y.scd)*ymax.hist
if (xpower10) {x.scd = 10^x.scd}
} else {
ymax = max(survey$bin$histogram)*ymax.hist
}
if (length(grep('x',log))==1) {lg='x'} else {lg=''}
plot(1,1,type='n',log=lg,xaxs='i',yaxs='i',xaxt='n',yaxt='n',
xlim = xlim, ylim = c(0,ymax), xlab = '', ylab = '',bty='n')
xbin = rep(survey$bin$xmin+seq(0,survey$bin$n)*survey$bin$dx,each=2)
if (xpower10) xbin = 10^xbin
xhist = c(xlim[1],xbin,xlim[2])
yhist = c(0,0,rep(survey$bin$histogram,each=2),0,0)
polygon(xhist,yhist,col=col.hist,border = NA)
if (!is.null(veff)) {
if (xpower10) {
x = seq(log10(xlim[1]),log10(xlim[2]),length=200)
} else {
x = seq(xlim[1],xlim[2],length=200)
}
y = veff(x)
if (xpower10) {x = 10^x}
lines(x,y/max(y)*ymax*0.88,col=col.veff,lwd=lwd.veff,lty=lty.veff)
}
if (!is.null(scd)) {
lines(x.scd,y.scd,col=col.scd,lwd=lwd.scd,lty=lty.scd)
}
if (div.line) {
par(xpd=TRUE)
lines(xlim,rep(ymax,2))
par(xpd=FALSE)
}
if (axes) {
magicaxis::magaxis(side=2,ylab=ylab.histogram,lwd=NA,labels=FALSE,lwd.ticks=NA)
magicaxis::magaxis(side=4,labels=FALSE,lwd=NA,lwd.ticks=NA)
}
}
if (is.na(show.data.histogram) | show.data.histogram) {
.plotSubEnd()
}
# axes
if (!add & axes) {
magicaxis::magaxis(side=1,xlab=xlab,lwd=NA,lwd.ticks=1)
magicaxis::magaxis(side=3,labels=FALSE,lwd=NA,lwd.ticks=1)
box(which = "plot", lty = "solid", lwd = 1)
par(pty = "m")
}
if (is.na(show.data.histogram) | show.data.histogram) {
.plotSub(0,1,0,1)
plot(1,1,type='n',log=log,xaxs='i',yaxs='i',xaxt='n',yaxt='n',
xlim = xlim, ylim = c(exp(log(ylim[1])-(log(ylim[2])-log(ylim[1]))*0.25),ylim[2]), xlab = '', ylab = '',bty='n')
clip(xlim[1],xlim[2],ylim[1],ylim[2])
}
invisible(survey)
}
.bin.data = function(survey,nbins,bin.xmin,bin.xmax) {
# initialize
x = survey$data$x
bin = list()
n.data = length(x)
# determine number of bins
if (is.null(nbins)) {
bin$n = min(100,round(sqrt(n.data)))
} else {
if (nbins<=0) stop('Choose more than 0 bins.')
bin$n = nbins
}
# make bin intervals
if (is.null(bin.xmin)) {
bin$xmin = min(x)-(max(x)-min(x))/bin$n*0.25
} else {
bin$xmin = bin.xmin
}
if (is.null(bin.xmax)) {
bin$xmax = max(x)+(max(x)-min(x))/bin$n*0.25
} else {
bin$xmax = bin.xmax
}
wx = bin$xmax-bin$xmin
bin$dx = wx/bin$n
bin$xcenter = bin$xmin+(seq(bin$n)-0.5)*bin$dx
# fill input data into bins
v = survey$selection$veff(x)
bin$gdf.input = bin$count.input = bin$xmean.input = array(0,bin$n)
for (i in seq(n.data)) {
k = floor((x[i]-bin$xmin)/wx*0.99999999*bin$n)+1
if (k>=1 & k<=bin$n) {
bin$gdf.input[k] = bin$gdf.input[k]+1/bin$dx/v[i]
bin$count.input[k] = bin$count.input[k]+1
bin$xmean.input[k] = bin$xmean.input[k]+x[i]
}
}
bin$xmean.input = bin$xmean.input/pmax(1,bin$count.input)
# fill posterior data into bins
if (!is.null(survey$data$x.err) & !is.null(survey$grid$effective.counts)) {
bin$gdf.posterior = bin$count.posterior = bin$xmean.posterior = array(0,bin$n)
xg = survey$grid$x
dx = xg[2]-xg[1]
for (k in seq(bin$n)) {
list = floor((xg-bin$xmin)/wx*0.99999999*bin$n)+1==k
bin$xmean.posterior[k] = sum(survey$grid$scd.posterior[list]*xg[list])/sum(survey$grid$scd.posterior[list])
bin$count.posterior[k] = mean(survey$grid$effective.counts[list])
bin$gdf.posterior[k] = sum(survey$grid$scd.posterior[list]/survey$grid$veff[list]/survey$grid$obs.filter[list])/sum(list)
}
}
# make histogram counts
bin$histogram = array(0,bin$n)
for (i in seq(n.data)) {
k = floor((x[i]-bin$xmin)/wx*0.99999999*bin$n)+1
if (k>=1 & k<=bin$n) {
bin$histogram[k] = bin$histogram[k]+1
}
}
survey$bin = bin
invisible(survey)
}
#' @export
.plotSub <- function(xleft=0.1,xright=0.3,ybottom=0.1,ytop=0.3) {
par(omd=c(0,1,0,1))
xmarg = sum(par()$mai[c(2,4)])
xplot = par()$pin[1]
ymarg = sum(par()$mai[c(1,3)])
yplot = par()$pin[2]
par(new=T,omd=c(xleft*xplot/(xplot+xmarg),(xright*xplot+xmarg)/(xplot+xmarg),ybottom*yplot/(yplot+ymarg),(ytop*yplot+ymarg)/(yplot+ymarg)))
}
#' @export
.plotSubEnd <- function() {
par(oma=c(0,0,0,0))
par(omi=c(0,0,0,0))
par(omd=c(0,1,0,1))
}
#' @export
.hist = function(x,breaks) {
b = c(-1e99,breaks,1e99)
counts = hist(x,breaks=b,plot=F)$counts[2:(length(b)-2)]
center = (breaks[1:(length(breaks)-1)]+breaks[2:length(breaks)])/2
x = array(rbind(array(breaks),array(breaks)))
y = c(0,array(rbind(array(counts),array(counts))),0)
return(list(counts=counts,center=center,x=x,y=y))
}
#' @export
.dfsun <- function(x,y,cex=1) {
par(xpd=TRUE)
points(rep(x,2),rep(y,2),pch=c(1,20),cex=c(1.2,0.45)*cex)
par(xpd=FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.