npplot  R Documentation 
npplot
is invoked by plot
and generates plots of
nonparametric statistical objects such as regressions, quantile
regressions, partially linear regressions, singleindex models,
densities and distributions, given training data and a bandwidth
object.
npplot(bws = stop("'bws' has not been set"), ..., random.seed = 42) ## S3 method for class 'bandwidth' npplot(bws, xdat, data = NULL, xq = 0.5, xtrim = 0.0, neval = 50, common.scale = TRUE, perspective = TRUE, main = NULL, type = NULL, border = NULL, cex.axis = NULL, cex.lab = NULL, cex.main = NULL, cex.sub = NULL, col = NULL, ylab = NULL, xlab = NULL, zlab = NULL, sub = NULL, ylim = NULL, xlim = NULL, zlim = NULL, lty = NULL, lwd = NULL, theta = 0.0, phi = 10.0, view = c("rotate","fixed"), plot.behavior = c("plot","plotdata","data"), plot.errors.method = c("none","bootstrap","asymptotic"), plot.errors.boot.method = c("inid", "fixed", "geom"), plot.errors.boot.blocklen = NULL, plot.errors.boot.num = 399, plot.errors.center = c("estimate","biascorrected"), plot.errors.type = c("standard","quantiles"), plot.errors.quantiles = c(0.025,0.975), plot.errors.style = c("band","bar"), plot.errors.bar = c("","I"), plot.errors.bar.num = min(neval,25), plot.bxp = FALSE, plot.bxp.out = TRUE, plot.par.mfrow = TRUE, ..., random.seed) ## S3 method for class 'conbandwidth' npplot(bws, xdat, ydat, data = NULL, xq = 0.5, yq = 0.5, xtrim = 0.0, ytrim = 0.0, neval = 50, gradients = FALSE, common.scale = TRUE, perspective = TRUE, main = NULL, type = NULL, border = NULL, cex.axis = NULL, cex.lab = NULL, cex.main = NULL, cex.sub = NULL, col = NULL, ylab = NULL, xlab = NULL, zlab = NULL, sub = NULL, ylim = NULL, xlim = NULL, zlim = NULL, lty = NULL, lwd = NULL, theta = 0.0, phi = 10.0, tau = 0.5, view = c("rotate","fixed"), plot.behavior = c("plot","plotdata","data"), plot.errors.method = c("none","bootstrap","asymptotic"), plot.errors.boot.method = c("inid", "fixed", "geom"), plot.errors.boot.blocklen = NULL, plot.errors.boot.num = 399, plot.errors.center = c("estimate","biascorrected"), plot.errors.type = c("standard","quantiles"), plot.errors.quantiles = c(0.025,0.975), plot.errors.style = c("band","bar"), plot.errors.bar = c("","I"), plot.errors.bar.num = min(neval,25), plot.bxp = FALSE, plot.bxp.out = TRUE, plot.par.mfrow = TRUE, ..., random.seed) ## S3 method for class 'plbandwidth' npplot(bws, xdat, ydat, zdat, data = NULL, xq = 0.5, zq = 0.5, xtrim = 0.0, ztrim = 0.0, neval = 50, common.scale = TRUE, perspective = TRUE, gradients = FALSE, main = NULL, type = NULL, border = NULL, cex.axis = NULL, cex.lab = NULL, cex.main = NULL, cex.sub = NULL, col = NULL, ylab = NULL, xlab = NULL, zlab = NULL, sub = NULL, ylim = NULL, xlim = NULL, zlim = NULL, lty = NULL, lwd = NULL, theta = 0.0, phi = 10.0, view = c("rotate","fixed"), plot.behavior = c("plot","plotdata","data"), plot.errors.method = c("none","bootstrap","asymptotic"), plot.errors.boot.method = c("inid", "fixed", "geom"), plot.errors.boot.blocklen = NULL, plot.errors.boot.num = 399, plot.errors.center = c("estimate","biascorrected"), plot.errors.type = c("standard","quantiles"), plot.errors.quantiles = c(0.025,0.975), plot.errors.style = c("band","bar"), plot.errors.bar = c("","I"), plot.errors.bar.num = min(neval,25), plot.bxp = FALSE, plot.bxp.out = TRUE, plot.par.mfrow = TRUE, ..., random.seed) ## S3 method for class 'rbandwidth' npplot(bws, xdat, ydat, data = NULL, xq = 0.5, xtrim = 0.0, neval = 50, common.scale = TRUE, perspective = TRUE, gradients = FALSE, main = NULL, type = NULL, border = NULL, cex.axis = NULL, cex.lab = NULL, cex.main = NULL, cex.sub = NULL, col = NULL, ylab = NULL, xlab = NULL, zlab = NULL, sub = NULL, ylim = NULL, xlim = NULL, zlim = NULL, lty = NULL, lwd = NULL, theta = 0.0, phi = 10.0, view = c("rotate","fixed"), plot.behavior = c("plot","plotdata","data"), plot.errors.method = c("none","bootstrap","asymptotic"), plot.errors.boot.num = 399, plot.errors.boot.method = c("inid", "fixed", "geom"), plot.errors.boot.blocklen = NULL, plot.errors.center = c("estimate","biascorrected"), plot.errors.type = c("standard","quantiles"), plot.errors.quantiles = c(0.025,0.975), plot.errors.style = c("band","bar"), plot.errors.bar = c("","I"), plot.errors.bar.num = min(neval,25), plot.bxp = FALSE, plot.bxp.out = TRUE, plot.par.mfrow = TRUE, ..., random.seed) ## S3 method for class 'scbandwidth' npplot(bws, xdat, ydat, zdat = NULL, data = NULL, xq = 0.5, zq = 0.5, xtrim = 0.0, ztrim = 0.0, neval = 50, common.scale = TRUE, perspective = TRUE, gradients = FALSE, main = NULL, type = NULL, border = NULL, cex.axis = NULL, cex.lab = NULL, cex.main = NULL, cex.sub = NULL, col = NULL, ylab = NULL, xlab = NULL, zlab = NULL, sub = NULL, ylim = NULL, xlim = NULL, zlim = NULL, lty = NULL, lwd = NULL, theta = 0.0, phi = 10.0, view = c("rotate","fixed"), plot.behavior = c("plot","plotdata","data"), plot.errors.method = c("none","bootstrap","asymptotic"), plot.errors.boot.num = 399, plot.errors.boot.method = c("inid", "fixed", "geom"), plot.errors.boot.blocklen = NULL, plot.errors.center = c("estimate","biascorrected"), plot.errors.type = c("standard","quantiles"), plot.errors.quantiles = c(0.025,0.975), plot.errors.style = c("band","bar"), plot.errors.bar = c("","I"), plot.errors.bar.num = min(neval,25), plot.bxp = FALSE, plot.bxp.out = TRUE, plot.par.mfrow = TRUE, ..., random.seed) ## S3 method for class 'sibandwidth' npplot(bws, xdat, ydat, data = NULL, common.scale = TRUE, gradients = FALSE, main = NULL, type = NULL, cex.axis = NULL, cex.lab = NULL, cex.main = NULL, cex.sub = NULL, col = NULL, ylab = NULL, xlab = NULL, sub = NULL, ylim = NULL, xlim = NULL, lty = NULL, lwd = NULL, plot.behavior = c("plot","plotdata","data"), plot.errors.method = c("none","bootstrap","asymptotic"), plot.errors.boot.num = 399, plot.errors.boot.method = c("inid", "fixed", "geom"), plot.errors.boot.blocklen = NULL, plot.errors.center = c("estimate","biascorrected"), plot.errors.type = c("standard","quantiles"), plot.errors.quantiles = c(0.025,0.975), plot.errors.style = c("band","bar"), plot.errors.bar = c("","I"), plot.errors.bar.num = NULL, plot.par.mfrow = TRUE, ..., random.seed)
bws 
a bandwidth specification. This should be a bandwidth object
returned from an invocation of 
... 
additional arguments supplied to control various aspects of plotting, depending on the type of object to be plotted, detailed below. 
data 
an optional data frame, list or environment (or object
coercible to a data frame by 
xdat 
a pvariate data frame of sample realizations (training data). 
ydat 
a qvariate data frame of sample realizations (training data). In a regression or conditional density context, this is the dependent data. 
zdat 
a pvariate data frame of sample realizations (training data). 
xq 
a numeric pvector of quantiles. Each element i of

yq 
a numeric qvector of quantiles. Each element i of

zq 
a numeric qvector of quantiles. Each element i of

xtrim 
a numeric pvector of quantiles. Each element i of

ytrim 
a numeric qvector of quantiles. Each element i of

ztrim 
a numeric qvector of quantiles. Each element i of

neval 
an integer specifying the number of evaluation points. Only applies
to continuous variables however, as discrete variables will be
evaluated once at each category. Defaults to 
common.scale 
a logical value specifying whether or not all graphs are to be
plotted on a common scale. Defaults to 
perspective 
a logical value specifying whether a perspective plot should be
displayed (if possible). Defaults to 
gradients 
a logical value specifying whether gradients should be plotted
(if possible). Defaults to 
main 
optional title, see 
sub 
optional subtitle, see 
type 
optional character indicating the type of plotting; actually any of
the types as in 
border 
optional character indicating the border of plotting; actually any of
the borders as in 
cex.axis 
The magnification to be used for axis annotation relative to the current setting of cex. 
cex.lab 
The magnification to be used for x and y labels relative to the current setting of cex. 
cex.main 
The magnification to be used for main titles relative to the current setting of cex. 
cex.sub 
The magnification to be used for subtitles relative to the current setting of cex. 
col 
optional character indicating the color of plotting; actually any of
the colours as in 
ylab 
optional character indicating the y axis label of plotting; actually any of
the ylabs as in 
xlab 
optional character indicating the x axis label of plotting; actually any of
the xlabs as in 
zlab 
optional character indicating the z axis label of plotting; actually any of
the zlabs as in 
ylim 
optional a twoelement numeric vector of the minimum and maximum y plotting
limits. Defaults to 
xlim 
a twoelement numeric vector of the minimum and maximum x plotting
limits. Defaults to 
zlim 
a twoelement numeric vector of the minimum and maximum z plotting
limits. Defaults to 
lty 
a numeric value indicating the line type of plotting; actually any of
the ltys as in 
lwd 
a numeric value indicating the width of the line of plotting;
actually any of the lwds as in 
theta 
a numeric value specifying the starting azimuthal angle of the
perspective plot. Defaults to 
phi 
a numeric value specifying the starting zenith angle of the
perspective plot. Defaults to 
tau 
a numeric value specifying the tauth quantile is
desired when plotting quantile regressions. Defaults to 
view 
a character string used to specify the viewing mode of the
perspective plot. Can be set as 
plot.behavior 
a character string used to specify the net behavior of 
plot.errors.method 
a character string used to specify the method to calculate
errors. Can be set as 
plot.errors.boot.method 
a character string used to specify the bootstrap method. Can be set
as 
plot.errors.boot.blocklen 
an integer used to specify the block length b for the

plot.errors.boot.num 
an integer used to specify the number of bootstrap samples to use
for the calculation of errors. Defaults to 
plot.errors.center 
a character string used to specify where to center the errors on the
plot(s). Can be set as 
plot.errors.type 
a character string used to specify the type of error to
calculate. Can be set as 
plot.errors.quantiles 
a numeric vector specifying the quantiles of the statistic to
calculate for the purpose of error plotting. Defaults to

plot.errors.style 
a character string used to specify the style of error plotting. Can
be set as 
plot.errors.bar 
a character string used to specify the error bar shape. Can be set
as 
plot.errors.bar.num 
an integer specifying the number of error bars to plot. Defaults to

plot.bxp 
a logical value specifying whether boxplots should be produced when
appropriate. Defaults to 
plot.bxp.out 
a logical value specifying whether outliers should be plotted on
boxplots. Defaults to 
plot.par.mfrow 
a logical value specifying whether 
random.seed 
an integer used to seed R's random number generator. This ensures replicability of the bootstrapped errors. Defaults to 42. 
npplot
is a general purpose plotting routine for visually
exploring objects generated by the np
library, such as
regressions, quantile regressions, partially linear regressions,
singleindex models, densities and distributions. There is no need to
call npplot
directly as it is automatically invoked when
plot
is used with an object generated by the np
package.
Visualizing one and two dimensional datasets is a straightforward
process. The default behavior of npplot
is to generate a
standard 2D plot to visualize univariate data, and a perspective plot
for bivariate data. When visualizing higher dimensional data,
npplot
resorts to plotting a series of 1D slices of the
data. For a slice along dimension i, all other variables at
indices j != i are held constant at the quantiles
specified in the jth element of xq
. The default is the
median.
The slice itself is evaluated on a uniformly spaced sequence of
neval points. The interval of evaluation is determined by the
training data. The default behavior is to evaluate from
min(txdat[,i])
to max(txdat[,i])
. The xtrim
variable allows for control over this behavior. When xtrim
is
set, data is evaluated from the xtrim[i]
th quantile of
txdat[,i]
to the 1.0xtrim[i]
th quantile of
txdat[,i]
.
Furthermore, xtrim
can be set to a negative
value in which case it will expand the limits of the evaluation
interval beyond the support of the training data, by measuring the
distance between min(txdat[,i])
and the xtrim[i]
th
quantile of txdat[,i]
, and extending the support by that
distance on the lower limit of the interval. npplot
uses an
analogous procedure to extend the upper limit of the interval.
Bootstrap resampling is conducted pairwise on (y,X,Z) (i.e., by
resampling from rows of the (y,X) data or (y,X,Z) data
where appropriate). inid
admits general
heteroskedasticity of unknown form, though it does not allow for
dependence. fixed
conducts Kunsch's (1988) block bootstrap
for dependent data, while geom
conducts Politis and
Romano's (1994) stationary bootstrap.
For consistency of the block and stationary bootstrap, the (mean) block length b should grow with the sample size n at an appropriate rate. If b is not given, then a default growth rate of const n^{1/3} is used. This rate is “optimal” under certain conditions (see Politis and Romano (1994) for more details). However, in general the growth rate depends on the specific properties of the DGP. A default value for const (3.15) has been determined by a Monte Carlo simulation using a Gaussian AR(1) process (AR(1)parameter of 0.5, 500 observations). const has been chosen such that the mean square error for the bootstrap estimate of the variance of the empirical mean is minimized.
Setting plot.behavior
will instruct npplot
what data
to return. Option summary:
plot
: instruct npplot
to just plot the data and
return NULL
plotdata
: instruct npplot
to plot the data and return
the data used to generate the plots. The data will be a list
of
objects of the appropriate type, with one object per plot. For
example, invoking npplot
on 3D density data will have it
return a list of three npdensity objects. If biases were calculated,
they are stored in a component named bias
data
: instruct npplot
to generate data only and no plots
If you are using data of mixed types, then it is advisable to use the
data.frame
function to construct your input data and not
cbind
, since cbind
will typically not work as
intended on mixed data types and will coerce the data to the same
type.
Tristen Hayfield tristen.hayfield@gmail.com, Jeffrey S. Racine racinej@mcmaster.ca
Aitchison, J. and C.G.G. Aitken (1976), “Multivariate binary discrimination by the kernel method,” Biometrika, 63, 413420.
Hall, P. and J.S. Racine and Q. Li (2004), “Crossvalidation and the estimation of conditional probability densities,” Journal of the American Statistical Association, 99, 10151026.
Kunsch, H.R. (1989), “The jackknife and the bootstrap for general stationary observations,” The Annals of Statistics, 17, 12171241.
Li, Q. and J.S. Racine (2007), Nonparametric Econometrics: Theory and Practice, Princeton University Press.
Pagan, A. and A. Ullah (1999), Nonparametric Econometrics, Cambridge University Press.
Politis, D.N. and J.P. Romano (1994), “The stationary bootstrap,” Journal of the American Statistical Association, 89, 13031313.
Scott, D.W. (1992), Multivariate Density Estimation. Theory, Practice and Visualization, New York: Wiley.
Silverman, B.W. (1986), Density Estimation, London: Chapman and Hall.
Wang, M.C. and J. van Ryzin (1981), “A class of smooth estimators for discrete distributions,” Biometrika, 68, 301309.
## Not run: # EXAMPLE 1: For this example, we load Giovanni Baiocchi's Italian GDP # panel (see Italy for details), then create a data frame in which year # is an ordered factor, GDP is continuous, compute bandwidths using # likelihood crossvalidation, then create a grid of data on which the # density will be evaluated for plotting purposes data("Italy") attach(Italy) data < data.frame(ordered(year), gdp) # Compute bandwidths using likelihood crossvalidation (default). Note # that this may take a minute or two depending on the speed of your # computer... bw < npudensbw(dat=data) # You can always do things manually, as the following example demonstrates # Create an evaluation data matrix year.seq < sort(unique(year)) gdp.seq < seq(1,36,length=50) data.eval < expand.grid(year=year.seq,gdp=gdp.seq) # Generate the estimated density computed for the evaluation data fhat < fitted(npudens(tdat = data, edat = data.eval, bws=bw)) # Coerce the data into a matrix for plotting with persp() f < matrix(fhat, length(unique(year)), 50) # Next, create a 3D perspective plot of the PDF f persp(as.integer(levels(year.seq)), gdp.seq, f, col="lightblue", ticktype="detailed", ylab="GDP", xlab="Year", zlab="Density", theta=300, phi=50) # Sleep for 5 seconds so that we can examine the output... Sys.sleep(5) # However, npplot simply streamlines this process and aids in the # visualization process (<ctrl>C will interrupt on *NIX systems, <esc> # will interrupt on MS Windows systems). plot(bw) # npplot also streamlines construction of variability bounds (<ctrl>C # will interrupt on *NIX systems, <esc> will interrupt on MS Windows # systems) plot(bw, plot.errors.method = "asymptotic") # EXAMPLE 2: For this example, we simulate multivariate data, and plot the # partial regression surfaces for a locally linear estimator and its # derivatives. set.seed(123) n < 100 x1 < runif(n) x2 < runif(n) x3 < runif(n) x4 < rbinom(n, 2, .3) y < 1 + x1 + x2 + x3 + x4 + rnorm(n) X < data.frame(x1, x2, x3, ordered(x4)) bw < npregbw(xdat=X, ydat=y, regtype="ll", bwmethod="cv.aic") plot(bw) # Sleep for 5 seconds so that we can examine the output... Sys.sleep(5) # Now plot the gradients... plot(bw, gradients=TRUE) # Plot the partial regression surfaces with biascorrected bootstrapped # nonparametric confidence intervals... this may take a minute or two # depending on the speed of your computer as the bootstrapping must be # completed prior to results being displayed... plot(bw, plot.errors.method="bootstrap", plot.errors.center="biascorrected", plot.errors.type="quantiles") # Note  if you wished to create, say, a postscript graph for inclusion # in, say, a latex document, use R's `postscript' command to switch to # the postscript device (turn off the device once completed). The # following will create a disk file `graph.ps' that can be pulled into, # say, a latex document via \includegraphics[width=5in, height=5in, # angle=270]{graph.ps} # Note  make sure to include the graphicx package in your latex # document via adding \usepackage{graphicx} in your latex file. Also, # you might want to use larger fonts, which can be achieved by adding the # pointsize= argument, e.g., postscript(file="graph.ps", pointsize=20) postscript(file="graph.ps") plot(bw) dev.off() # The following latex file compiled in the same directory as graph.ps # ought to work (remove the #s and place in a file named, e.g., # test.tex). # \documentclass[]{article} # \usepackage{graphicx} # \begin{document} # \begin{figure}[!ht] # \includegraphics[width=5in, height=5in, angle=270]{graph.ps} # \caption{Local linear partial regression surfaces.} # \end{figure} # \end{document} # EXAMPLE 3: This example demonstrates how to retrieve plotting data from # npplot(). When npplot() is called with the arguments # `plot.behavior="plotdata"' (or "data"), it returns plotting objects # named r1, r2, and so on (rg1, rg2, and so on when `gradients=TRUE' is # set). Each plotting object's index (1,2,...) corresponds to the index # of the explanatory data data frame xdat (and zdat if appropriate). # Take the cps71 data by way of example. In this case, there is only one # object returned by default, `r1', since xdat is univariate. data("cps71") attach(cps71) # Compute bandwidths for local linear regression using cv.aic... bw < npregbw(xdat=age,ydat=logwage,regtype="ll",bwmethod="cv.aic") # Generate the plot and return plotting data, and store output in # `plot.out' (NOTE: the call to `plot.behavior' is necessary). plot.out < plot(bw, perspective=FALSE, plot.errors.method="bootstrap", plot.errors.boot.num=25, plot.behavior="plotdata") # Now grab the r1 object that npplot plotted on the screen, and take # what you need. First, take the output, lower error bound and upper # error bound... logwage.eval < fitted(plot.out$r1) logwage.se < se(plot.out$r1) logwage.lower.ci < logwage.eval + logwage.se[,1] logwage.upper.ci < logwage.eval + logwage.se[,2] # Next grab the x data evaluation data. xdat is a data.frame(), so we # need to coerce it into a vector (take the `first column' of data frame # even though there is only one column) age.eval < plot.out$r1$eval[,1] # Now we could plot this if we wished, or direct it to whatever end use # we envisioned. We plot the results using R's plot() routines... plot(age,logwage,cex=0.2,xlab="Age",ylab="log(Wage)") lines(age.eval,logwage.eval) lines(age.eval,logwage.lower.ci,lty=3) lines(age.eval,logwage.upper.ci,lty=3) # If you wanted npplot() data for gradients, you would use the argument # `gradients=TRUE' in the call to npplot() as the following # demonstrates... plot.out < plot(bw, perspective=FALSE, plot.errors.method="bootstrap", plot.errors.boot.num=25, plot.behavior="plotdata", gradients=TRUE) # Now grab object that npplot() plotted on the screen. First, take the # output, lower error bound and upper error bound... note that gradients # are stored in objects rg1, rg2 etc. grad.eval < gradients(plot.out$rg1) grad.se < gradients(plot.out$rg1, errors = TRUE) grad.lower.ci < grad.eval + grad.se[,1] grad.upper.ci < grad.eval + grad.se[,2] # Next grab the x evaluation data. xdat is a data.frame(), so we need to # coerce it into a vector (take `first column' of data frame even though # there is only one column) age.eval < plot.out$rg1$eval[,1] # We plot the results using R's plot() routines... plot(age.eval,grad.eval,cex=0.2, ylim=c(min(grad.lower.ci),max(grad.upper.ci)), xlab="Age",ylab="d log(Wage)/d Age",type="l") lines(age.eval,grad.lower.ci,lty=3) lines(age.eval,grad.upper.ci,lty=3) detach(cps71) ## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.