# R/binsreg.R In binsreg: Binscatter Estimation and Inference

########################################################################################
#'@title  Data-driven Binscatter Estimation with Robust Inference Procedures and Plots
#'@description \code{binsreg} implements binscatter estimation with robust inference proposed and plots, following the
#'             results in \href{https://arxiv.org/abs/1902.09608}{Cattaneo, Crump, Farrell and Feng (2019a)}.
#'             Binscatter provides a flexible way of describing
#'             the mean relationship between two variables, after possibly adjusting for other covariates, based on
#'             partitioning/binning of the independent variable of interest. The main purpose of this function is to
#'             generate binned scatter plots with curve estimation with robust pointwise confidence intervals and
#'             uniform confidence band.  If the binning scheme is not set by the user, the companion function
#'             \code{\link{binsregselect}} is used to implement binscatter in a data-driven (optimal)
#'             way. Hypothesis testing about the regression function can also be conducted via the companion
#'@param  y outcome variable. A vector.
#'@param  x independent variable of interest. A vector.
#'@param  w control variables. A matrix or a vector.
#'@param  deriv  derivative order of the regression function for estimation, testing and plotting.
#'               The default is \code{deriv=0}, which corresponds to the function itself.
#'@param  dots a vector. \code{dots=c(p,s)} sets a piecewise polynomial of degree \code{p} with \code{s} smoothness constraints for
#'             point estimation and plotting as "dots". The default is \code{dots=c(0,0)}, which corresponds to
#'             piecewise constant (canonical binscatter)
#'@param  dotsgrid number of dots within each bin to be plotted. Given the choice, these dots are point estimates
#'                 evaluated over an evenly-spaced grid within each bin. The default is \code{dotsgrid=0}, and only
#'                 the point estimates at the mean of \code{x} within each bin are presented.
#'@param  dotsgridmean If true, the dots corresponding to the point estimates evaluated at the mean of \code{x} within each bin
#'                     are presented. By default, they are presented, i.e., \code{dotsgridmean=T}.
#'@param  line a vector. \code{line=c(p,s)} sets a piecewise polynomial of degree \code{p} with \code{s} smoothness constraints
#'             for plotting as a "line".  By default, the line is not included in the plot unless explicitly
#'             specified.  Recommended specification is \code{line=c(3,3)}, which adds a cubic B-spline estimate
#'             of the regression function of interest to the binned scatter plot.
#'@param  linegrid number of evaluation points of an evenly-spaced grid within each bin used for evaluation of
#'                 the point estimate set by the \code{line=c(p,s)} option. The default is \code{linegrid=20},
#'                 which corresponds to 20 evenly-spaced evaluation points within each bin for fitting/plotting the line.
#'@param  ci a vector. \code{ci=c(p,s)} sets a piecewise polynomial of degree \code{p} with \code{s} smoothness constraints used for
#'             constructing confidence intervals. By default, the confidence intervals are not included in the plot
#'             unless explicitly specified.  Recommended specification is \code{ci=c(3,3)}, which adds confidence
#'             intervals based on cubic B-spline estimate of the regression function of interest to the binned scatter plot.
#'@param  cigrid number of evaluation points of an evenly-spaced grid within each bin used for evaluation of the point
#'               estimate set by the \code{ci=c(p,s)} option. The default is \code{cigrid=1}, which corresponds to 1
#'               evenly-spaced evaluation point within each bin for confidence interval construction.
#'@param  cigridmean If true, the confidence intervals corresponding to the point estimates evaluated at the mean of \code{x} within each bin
#'                   are presented. The default is \code{cigridmean=T}.
#'@param  cb a vector. \code{cb=c(p,s)} sets a the piecewise polynomial of degree \code{p} with \code{s} smoothness constraints used for
#'           constructing the confidence band. By default, the confidence band is not included in the plot unless
#'           explicitly specified.  Recommended specification is \code{cb=c(3,3)}, which adds a confidence band
#'           based on cubic B-spline estimate of the regression function of interest to the binned scatter plot.
#'@param  cbgrid number of evaluation points of an evenly-spaced grid within each bin used for evaluation of the point
#'               estimate set by the \code{cb=c(p,s)} option. The default is \code{cbgrid=20}, which corresponds
#'               to 20 evenly-spaced evaluation points within each bin for confidence interval construction.
#'@param  polyreg degree of a global polynomial regression model for plotting. By default, this fit is not included
#'                in the plot unless explicitly specified. Recommended specification is \code{polyreg=3}, which
#'                adds a cubic (global) polynomial fit of the regression function of interest to the binned scatter plot.
#'@param  polyreggrid number of evaluation points of an evenly-spaced grid within each bin used for evaluation of
#'                    the point estimate set by the \code{polyreg=p} option. The default is \code{polyreggrid=20},
#'                    which corresponds to 20 evenly-spaced evaluation points within each bin for confidence
#'                    interval construction.
#'@param  polyregcigrid number of evaluation points of an evenly-spaced grid within each bin used for constructing
#'                      confidence intervals based on polynomial regression set by the \code{polyreg=p} option.
#'                      The default is \code{polyregcigrid=0}, which corresponds to not plotting confidence
#'                      intervals for the global polynomial regression approximation.
#'@param  by a vector containing the group indicator for subgroup analysis; both numeric and string variables
#'           are supported. When \code{by} is specified, \code{binsreg} implements estimation and inference by each subgroup
#'           separately, but produces a common binned scatter plot. By default, the binning structure is selected for each
#'           subgroup separately, but see the option \code{samebinsby} below for imposing a common binning structure across subgroups.
#'@param  bycolors an ordered list of colors for plotting each subgroup series defined by the option \code{by}.
#'@param  bysymbols an ordered list of symbols for plotting each subgroup series defined by the option \code{by}.
#'@param  bylpatterns an ordered list of line patterns for plotting each subgroup series defined by the option \code{by}.
#'@param  legendTitle String, title of legend.
#'@param  legendoff If true, no legend is added.
#'@param  testmodel a vector. \code{testmodel=c(p,s)} sets a piecewise polynomial of degree \code{p} with \code{s}
#'                  smoothness constraints for parametric model specification testing.  The default is
#'                  \code{testmodel=c(3,3)}, which corresponds to a cubic B-spline estimate of the regression
#'                  function of interest for testing against the fitting from a parametric model specification.
#'@param  testmodelparfit a data frame or matrix which contains the evaluation grid and fitted values of the model(s) to be
#'                        tested against.  The first column contains a series of evaluation points
#'                        at which the binscatter model and the parametric model of interest are compared with
#'                        each other.  Each parametric model is represented by other columns, which must
#'                        contain the fitted values at the corresponding evaluation points.
#'@param  testmodelpoly degree of a global polynomial model to be tested against.
#'@param  testshape a vector. \code{testshape=c(p,s)} sets a piecewise polynomial of degree \code{p} with \code{s}
#'                  smoothness constraints for nonparametric shape restriction testing. The default is
#'                  \code{testshape=c(3,3)}, which corresponds to a cubic B-spline estimate of the regression
#'                  function of interest for one-sided or two-sided testing.
#'@param  testshapel a vector of null boundary values for hypothesis testing. Each number \code{a} in the vector
#'                   corresponds to one boundary of a one-sided hypothesis test to the left of the form
#'                   \code{H0: sup_x mu(x)<=a}.
#'@param  testshaper a vector of null boundary values for hypothesis testing. Each number \code{a} in the vector
#'                   corresponds to one boundary of a one-sided hypothesis test to the right of the form
#'                   \code{H0: inf_x mu(x)>=a}.
#'@param  testshape2 a vector of null boundary values for hypothesis testing. Each number \code{a} in the vector
#'                   corresponds to one boundary of a two-sided hypothesis test ofthe form
#'                   \code{H0: sup_x |mu(x)-a|=0}.
#'@param  nbins number of bins for partitioning/binning of \code{x}.  If not specified, the number of bins is
#'              selected via the companion function \code{binsregselect} in a data-driven, optimal way whenever possible.
#'@param  binspos position of binning knots. The default is \code{binspos="qs"}, which corresponds to quantile-spaced
#'                binning (canonical binscatter).  The other options are \code{"es"} for evenly-spaced binning, or
#'                a vector for manual specification of the positions of inner knots (which must be within the range of
#'                \code{x}).
#'@param  binsmethod method for data-driven selection of the number of bins. The default is \code{binsmethod="dpi"},
#'                   which corresponds to the IMSE-optimal direct plug-in rule.  The other option is: \code{"rot"}
#'                   for rule of thumb implementation.
#'@param  nbinsrot initial number of bins value used to construct the DPI number of bins selector.
#'                 If not specified, the data-driven ROT selector is used instead.
#'@param  samebinsby if true, a common partitioning/binning structure across all subgroups specified by the option \code{by} is forced.
#'                   The knots positions are selected according to the option \code{binspos} and using the full sample. If \code{nbins}
#'                   is not specified, then the number of bins is selected via the companion command \code{\link{binsregselect}} and
#'                   using the full sample.
#'@param  nsims number of random draws for constructing confidence bands and hypothesis testing. The default is
#'              \code{nsims=500}, which corresponds to 500 draws from a standard Gaussian random vector of size
#'              \code{[(p+1)*J - (J-1)*s]}.
#'@param  simsgrid number of evaluation points of an evenly-spaced grid within each bin used for evaluation of
#'                 the supremum (or infimum) operation needed to construct confidence bands and hypothesis testing
#'                 procedures. The default is \code{simsgrid=20}, which corresponds to 20 evenly-spaced
#'                 evaluation points within each bin for approximating the supremum (or infimum) operator.
#'@param  simsseed  seed for simulation.
#'@param  vce Procedure to compute the variance-covariance matrix estimator. Options are
#'           \itemize{
#'           \item \code{"const"} homoskedastic variance estimator.
#'           \item \code{"HC0"} heteroskedasticity-robust plug-in residuals variance estimator
#'                              without weights.
#'           \item \code{"HC1"} heteroskedasticity-robust plug-in residuals variance estimator
#'                              with hc1 weights. Default.
#'           \item \code{"HC2"} heteroskedasticity-robust plug-in residuals variance estimator
#'                              with hc2 weights.
#'           \item \code{"HC3"} heteroskedasticity-robust plug-in residuals variance estimator
#'                              with hc3 weights.
#'           }
#'@param  cluster cluster ID. Used for compute cluster-robust standard errors.
#'@param  level nominal confidence level for confidence interval and confidence band estimation. Default is \code{level=95}.
#'@param  noplot If true, no plot produced.
#'@param  dfcheck adjustments for minimum effective sample size checks, which take into account number of unique
#'                values of \code{x} (i.e., number of mass points), number of clusters, and degrees of freedom of
#'                the different stat models considered. The default is \code{dfcheck=c(20, 30)}.
#'                See \href{https://sites.google.com/site/nppackages/binsreg/Cattaneo-Crump-Farrell-Feng_2019_Stata.pdf}{Cattaneo, Crump, Farrell and Feng (2019b)} for more details.
#'@param  masspoints how mass points in \code{x} are handled. Available options:
#'                   \itemize{
#'                   \item \code{"on"} all mass point and degrees of freedom checks are implemented. Default.
#'                   \item \code{"noadjust"} mass point checks and the corresponding effective sample size adjustments are omitted.
#'                   \item \code{"nolocalcheck"} within-bin mass point and degrees of freedom checks are omitted.
#'                   \item \code{"off"} "noadjust" and "nolocalcheck" are set simultaneously.
#'                   \item \code{"veryfew"} forces the function to proceed as if \code{x} has only a few number of mass points (i.e., distinct values).
#'                                          In other words, forces the function to proceed as if the mass point and degrees of freedom checks were failed.
#'                   }
#'@param  weights an optional vector of weights to be used in the fitting process. Should be \code{NULL} or
#'                a numeric vector. For more details, see \code{\link{lm}}.
#'@param  subset  Optional rule specifying a subset of observations to be used.
#'@return \item{\code{bins_plot}}{A \code{ggplot} object for binscatter plot.}
#'        \item{\code{data.plot}}{A list containing data for plotting. Each item is a sublist of data frames for each group. Each sublist may contain the following data frames:
#'        \itemize{
#'        \item \code{data.dots} Data for dots. It contains: \code{x}, evaluation points; \code{bin}, the indicator of bins;
#'                               \code{isknot}, indicator of inner knots; \code{mid}, midpoint of each bin; and \code{fit}, fitted values.
#'        \item \code{data.line} Data for line. It contains: \code{x}, evaluation points; \code{bin}, the indicator of bins;
#'                                \code{isknot}, indicator of inner knots; \code{mid}, midpoint of each bin; and \code{fit}, fitted values.
#'        \item \code{data.ci} Data for CI. It contains: \code{x}, evaluation points; \code{bin}, the indicator of bins;
#'                                \code{isknot}, indicator of inner knots; \code{mid}, midpoint of each bin;
#'                                \code{ci.l} and \code{ci.r}, left and right boundaries of each confidence intervals.
#'        \item \code{data.cb} Data for CB. It contains: \code{x}, evaluation points; \code{bin}, the indicator of bins;
#'                                \code{isknot}, indicator of inner knots; \code{mid}, midpoint of each bin;
#'                                \code{cb.l} and \code{cb.r}, left and right boundaries of the confidence band.
#'        \item \code{data.poly} Data for polynomial regression. It contains: \code{x}, evaluation points;
#'                                \code{bin}, the indicator of bins;
#'                                \code{isknot}, indicator of inner knots; \code{mid}, midpoint of each bin; and
#'                                \code{fit}, fitted values.
#'        \item \code{data.polyci} Data for confidence intervals based on polynomial regression. It contains: \code{x}, evaluation points;
#'                                \code{bin}, the indicator of bins;
#'                                \code{isknot}, indicator of inner knots; \code{mid}, midpoint of each bin;
#'                                \code{polyci.l} and \code{polyci.r}, left and right boundaries of each confidence intervals.}}
#'        \item{\code{cval.by}}{A vector of critical values for constructing confidence band for each group.}
#'        \item{\code{opt}}{ A list containing options passed to the function, as well as \code{N.by} (total sample size for each group),
#'                           \code{Ndist.by} (number of distinct values in \code{x} for each group), \code{Nclust.by} (number of clusters for each group),
#'                           and \code{nbins.by} (number of bins for each group), and \code{byvals} (number of distinct values in \code{by}).}
#'
#'@author
#' Matias D. Cattaneo, University of Michigan, Ann Arbor, MI. \email{cattaneo@umich.edu}.
#'
#' Richard K. Crump, Federal Reserve Bank of New York, New York, NY. \email{richard.crump@ny.frb.org}.
#'
#' Max H. Farrell, University of Chicago, Chicago, IL. \email{max.farrell@chicagobooth.edu}.
#'
#' Yingjie Feng (maintainer), University of Michigan, Ann Arbor, MI. \email{yjfeng@umich.edu}.
#'
#'@references
#' Cattaneo, M. D., R. K. Crump, M. H. Farrell, and Y. Feng. 2019a: \href{https://arxiv.org/abs/1902.09608}{On Binscatter}. Working Paper.
#'
#' Cattaneo, M. D., R. K. Crump, M. H. Farrell, and Y. Feng. 2019b: \href{https://arxiv.org/abs/1902.09615}{Binscatter Regressions}. Working Paper.
#'
#'
#'@examples
#'  x <- runif(500); y <- sin(x)+rnorm(500)
#'  ## Binned scatterplot
#'  binsreg(y,x)
#'@export

binsreg <- function(y, x, w=NULL, deriv=0,
dots=c(0,0), dotsgrid=0, dotsgridmean=T, line=NULL, linegrid=20,
ci=NULL, cigrid=0, cigridmean=T, cb=NULL, cbgrid=20,
polyreg=NULL, polyreggrid=20, polyregcigrid=0,
by=NULL, bycolors=NULL, bysymbols=NULL, bylpatterns=NULL,
legendTitle=NULL, legendoff=F,
testmodel=c(3,3), testmodelparfit=NULL, testmodelpoly=NULL,
testshape=c(3,3), testshapel=NULL, testshaper=NULL, testshape2=NULL,
nbins=NULL, binspos="qs", binsmethod="dpi", nbinsrot=NULL, samebinsby=F,
nsims=500, simsgrid=20, simsseed=666,
vce="HC1",cluster=NULL, level=95,
noplot=F, dfcheck=c(20,30), masspoints="on", weights=NULL, subset=NULL) {

# param for internal use
qrot <- 2

####################
### prepare data ###
####################
if (is.data.frame(y)) y <- y[,1]
if (is.data.frame(x)) x <- x[,1]
if (!is.null(w))      w <- as.matrix(w)
xname <- deparse(substitute(x))
yname <- deparse(substitute(y))
if (!is.null(by)) byname <- deparse(substitute(by))

# substract subset
if (!is.null(subset)) {
y <- y[subset]
x <- x[subset]
w <- w[subset, , drop = F]
by <- by[subset]
}

na.ok <- complete.cases(x) & complete.cases(y)
if (!is.null(w))  na.ok <- na.ok & complete.cases(w)
if (!is.null(by)) na.ok <- na.ok & complete.cases(by)

y  <- y[na.ok]
x  <- x[na.ok]
w  <- w[na.ok, , drop = F]
by <- by[na.ok]
xmin <- min(x); xmax <- max(x)

##################################################
######## Error Checking ##########################
##################################################
exit <- 0
if (deriv < 0) {
print("derivative incorrectly specified.")
exit <- 1
}
if (dotsgrid<0|linegrid<0|cigrid<0|cbgrid<0|polyreggrid<0|polyregcigrid<0) {
print("# of evaluation points incorrectly specified.")
exit <- 1
}
if (!is.null(nbins)) {
if (nbins < 0) {
print("# of bins incorrectly specified.")
exit <- 1
}
}
if (!is.character(binspos)) {
if (min(binspos)<=xmin|max(binspos)>=xmax) {
print("knots out of allowed range")
exit <- 1
}
} else {
if (binspos!="es" & binspos!="qs") {
print("binspos incorrectly specified.")
exit <- 1
}
}
if (length(dots)==2) if (dots[1] < dots[2]) {
print("p<s not allowed.")
exit <- 1
}
if (!is.null(line)) if (length(line)==2) if (line[1]<line[2]) {
print("p<s not allowed.")
exit <- 1
}
if (!is.null(ci)) if (length(ci)==2) if (ci[1]<ci[2]) {
print("p<s not allowed.")
exit <- 1
}
if (!is.null(cb)) if (length(cb)==2) if (cb[1]<cb[2]) {
print("p<s not allowed.")
exit <- 1
}
if (length(testshape)==2) if (testshape[1]<testshape[2]) {
print("p<s not allowed.")
exit <- 1
}
if (length(testmodel)==2) if (testmodel[1]<testmodel[2]) {
print("p<s not allowed.")
exit <- 1
}
if (dots[1] < deriv) {
print("p<deriv not allowed.")
exit <- 1
}
if (!is.null(line)) if (line[1] < deriv) {
print("p<deriv not allowed.")
exit <- 1
}
if (!is.null(ci)) if (ci[1] < deriv) {
print("p<deriv not allowed.")
exit <- 1
}
if (!is.null(cb)) if (cb[1] < deriv) {
print("p<deriv not allowed.")
exit <- 1
}
if (testshape[1]<deriv|testmodel[1]<deriv) {
print("p<deriv not allowed.")
exit <- 1
}
if (binsmethod!="dpi" & binsmethod!="rot") {
print("bin selection method incorrectly specified.")
exit <- 1
}
if (exit > 0) {
stop()
}

##################################################
# Prepare options
dots.p <- dots[1]; dots.s <- dots[2]
dotsmean <- 0
if (dotsgridmean) dotsmean <- 1

if (is.null(line)) {
linegrid <- 0
} else {
line.p <- line[1]
if (length(line)==1) {
line.s <- line.p
} else {
line.s <- line[2]
}
}

cimean <- 0
if (cigridmean) cimean <- 1
if (is.null(ci)) {
cigrid <- 0
cimean <- 0
} else {
ci.p <- ci[1]
if (length(ci)==1) {
ci.s <- ci.p
} else {
ci.s <- ci[2]
}
}

if (is.null(cb)) {
cbgrid <- 0
} else {
cb.p <- cb[1]
if (length(cb)==1) {
cb.s <- cb.p
} else {
cb.s <- cb[2]
}
}

if (is.null(polyreg)) {
polyreggrid   <- 0
polyregcigrid <- 0
}

tsha.p <- testshape[1]; tsha.s <- testshape[2]
tmod.p <- testmodel[1]; tmod.s <- testmodel[2]
nL <- length(testshapel); nR <- length(testshaper); nT <- length(testshape2)
ntestshape <- nL + nR + nT

localcheck <- massadj <- T; fewmasspoints <- F
if (masspoints=="on") {
localcheck <- T; massadj <- T
} else if (masspoints=="off") {
localcheck <- F; massadj <- F
localcheck <- T; massadj <- F
} else if (masspoints=="nolocalcheck") {
localcheck <- F; massadj <- T
} else if (masspoints=="veryfew") {
fewmasspoints <- T
}

#############################
# Extract byvals in by ######
if (!is.null(by)) {
byvals <- sort(unique(by))
ngroup <- length(byvals)
} else {
byvals <- "Full Sample"
ngroup <- 1
}

########################################
# Default plotting options
if (length(bycolors)==0) {
bycolors <- c("navy", "maroon", "forestgreen", "darkorange", "lavenderblush3",
"khaki", "sienna", "steelblue", "brown", "gold", "gray45")
bycolors <- rep(bycolors, length.out=ngroup)
} else {
bycolors <- rep(bycolors, length.out=ngroup)
}

if (length(bysymbols)==0) {
bysymbols <- c(19, 15:18, 0:14)
bysymbols <- rep(bysymbols, length.out=ngroup)
} else {
bysymbols <- rep(bysymbols, length.out=ngroup)
}

if (length(bylpatterns)==0) {
bylpatterns <- rep(1, ngroup)
} else {
bylpatterns <- rep(bylpatterns, length.out=ngroup)
}

# Legend
if (ngroup==1) {
legendoff <- T    # turn off legend when only one group is available
} else {
legendGroups <- paste(byvals)
if (length(legendTitle) == 0) {
legendTitle <- paste(byname, "=")
} else {
legendTitle <- legendTitle[1]
}
}

#########################################
if (binsmethod=="dpi") {
selectmethod <- "IMSE direct plug-in"
} else {
selectmethod <- "IMSE rule-of-thumb"
}
nbins_all <- nbins         # "nbins" is reserved for use within loop
if (!is.null(nbins)) {
selectmethod <- "User-specified"
}

knot <- NULL
knotlistON <- F
if (!is.character(binspos)) {
nbins <- length(binspos)+1
knot <- c(xmin, sort(binspos), xmax)
position <- "User-specified"
es <- F
selectmethod <- "User-specified"
knotlistON <- T
} else {
if (binspos == "es") {
es <- T
position <- "Evenly-spaced"
} else {
es <- F
position <- "Quantile-spaced"
}
}
Ndist <- Nclust <- NA

################################################
### Bin selection using full sample if needed ##
fullfewobs <- fewobs <- selectfullON <- F
if (fewmasspoints) fullfewobs <- fewobs <- T
if (!fullfewobs & is.null(nbins) & (is.null(by) | (!is.null(by) & samebinsby))) selectfullON <- T
if (selectfullON) {
# effective size
eN <- N <- length(x)

Ndist <- length(unique(x))
eN <- min(eN, Ndist)
}

if (!is.null(cluster)) {
Nclust <- length(unique(cluster))
eN <- min(eN, Nclust)
}

# check if rot can be implemented
if (is.null(nbinsrot)) {
if (eN <= dfcheck[1]+dots.p+1+qrot) {
warning("too small effective sample size for bin selection. # of mass of points or clusters used and by option ignored.")
fewobs <- fullfewobs <- T
byvals <- "Full Sample"
es <- F
position <- "Quantile-spaced"
}
}
if (!fewobs) {
if (is.na(Ndist))  {
Ndist.sel <- NULL
} else {
Ndist.sel <- Ndist
}
if (is.na(Nclust)) {
Nclust.sel <- NULL
} else {
Nclust.sel <- Nclust
}
binselect <- binsregselect(y, x, w, deriv=deriv,
bins=dots, binspos=binspos,
binsmethod=binsmethod, nbinsrot=nbinsrot,
vce=vce, cluster=cluster,
dfcheck=dfcheck, masspoints=masspoints, weights=weights,
numdist=Ndist.sel, numclust=Nclust.sel)
if (is.na(binselect$nbinsrot.regul)) { print("bin selection fails.") stop() } if (binsmethod == "rot") { nbins <- binselect$nbinsrot.regul
} else if (binsmethod == "dpi") {
nbins <- binselect$nbinsdpi if (is.na(nbins)) { warning("DPI selection fails. ROT choice used.") nbins <- binselect$nbinsrot.regul
}
}
}
}

# Generate knot using the full sample if needed

if ((selectfullON | (!is.null(nbins) & samebinsby)) & !fullfewobs) {
knotlistON <- T
if (es) {
knot <- genKnot.es(xmin, xmax, nbins)
} else {
knot <- genKnot.qs(x, nbins)
}
}

knot_all <- NULL
if (knotlistON) {
knot_all <- knot    # universal knot sequence
}

##########################################
if (!is.null(simsseed)) set.seed(simsseed)
alpha <- (100-(100 - level)/2)/100

##################################################################
N.by <- Ndist.by <- Nclust.by <- nbins.by <- cval.by <- NULL   # save results
data.plot <- list()     # list storing graph data

##################################################################
##################### ENTER the loop #############################
##################################################################
for (i in 1:ngroup) {
# Take subsample
if (ngroup != 1) {
sub <- (by == byvals[i])
y.sub <- y[sub]; x.sub <- x[sub]; w.sub <- w[sub, , drop = F]
cluster.sub <- cluster[sub]; weights.sub <- weights[sub]
} else {
y.sub <- y; x.sub <- x; w.sub <- w; cluster.sub <- cluster
weights.sub <- weights
}

# Effective size
xmin <- min(x.sub); xmax <- max(x.sub)
eN <- N <- length(x.sub)
N.by <- c(N.by, N)

Ndist <- NA
Ndist <- length(unique(x.sub))
eN <- min(eN, Ndist)
}
Ndist.by <- c(Ndist.by, Ndist)

Nclust <- NA
if (!is.null(cluster.sub)) {
Nclust <- length(unique(cluster.sub))
eN <- min(eN, Nclust)
}
Nclust.by <- c(Nclust.by, Nclust)

#######################################################
############### Bin selection if needed ###############
nbins <- NULL; knot <- NULL                       # initialize again
if (is.null(nbins_all) & !knotlistON & !fullfewobs) {
# check if rot can be implemented
if (is.null(nbinsrot)) if (eN <= dfcheck[1]+dots.p+1+qrot) {
warning("too small effective sample size for bin selection.
# of mass points or clusters used.")
fewobs <- T
nbins <- eN
es <- F
}
if (!fewobs) {
if (is.na(Ndist))  {
Ndist.sel <- NULL
} else {
Ndist.sel <- Ndist
}
if (is.na(Nclust)) {
Nclust.sel <- NULL
} else {
Nclust.sel <- Nclust
}
binselect <- binsregselect(y.sub, x.sub, w.sub, deriv=deriv,
bins=dots, binspos=binspos,
binsmethod=binsmethod, nbinsrot=nbinsrot,
vce=vce, cluster=cluster.sub,
dfcheck=dfcheck, masspoints=masspoints, weights=weights.sub,
numdist=Ndist.sel, numclust=Nclust.sel)
if (is.na(binselect$nbinsrot.regul)) { print("bin selection fails.") stop() } if (binsmethod == "rot") { nbins <- binselect$nbinsrot.regul
} else if (binsmethod == "dpi") {
nbins <- binselect$nbinsdpi if (is.na(nbins)) { warning("DPI selection fails. ROT choice used.") nbins <- binselect$nbinsrot.regul
}
}
}
}
if (!is.null(nbins_all)) nbins <- nbins_all
if (knotlistON) {
nbins <- length(knot_all)-1
knot  <- knot_all
}
if (fullfewobs) {
fewobs <- T
nbins <- eN
}

###########################################
# Checking for each case
dots.fewobs <- line.fewobs <- ci.fewobs <- cb.fewobs <- tsha.fewobs <- tmod.fewobs <- test.fewobs <- polyreg.fewobs <- F
if ((ntestshape!=0|!is.null(testmodelparfit)|!is.null(testmodelpoly)) & !is.null(by)) {
test.fewobs <- T
warning("tests cannot be implemented with by option.")
}
if (!fewobs) {
if ((nbins-1)*(dots.p-dots.s+1)+dots.p+1+dfcheck[2]>=eN) {
fewobs <- T
nbins <- eN
es <- F
warning("too small effective sample size for dots. # of mass points or clusters used.")
}
if (!is.null(line)) {
if ((nbins-1)*(line.p-line.s+1)+line.p+1+dfcheck[2]>=eN) {
line.fewobs <- T
warning("too small effective sample size for line.")
}
}
if (!is.null(ci)) {
if ((nbins-1)*(ci.p-ci.s+1)+ci.p+1+dfcheck[2]>=eN) {
ci.fewobs <- T
warning("too small effective sample size for ci.")
}
}
if (!is.null(cb)) {
if ((nbins-1)*(cb.p-cb.s+1)+cb.p+1+dfcheck[2]>=eN) {
cb.fewobs <- T
warning("too small effective sample size for line.")
}
}
if (ntestshape != 0 & is.null(by)) {
if ((nbins-1)*(tsha.p-tsha.s+1)+tsha.p+1+dfcheck[2]>=eN) {
tsha.fewobs <- T
test.fewobs <- T
warning("too small effective sample size for testing shape.")
}
}
if ((!is.null(testmodelparfit)|!is.null(testmodelpoly)) & is.null(by)) {
if ((nbins-1)*(tmod.p-tmod.s+1)+tmod.p+1+dfcheck[2]>=eN) {
tmod.fewobs <- T
test.fewobs <- T
warning("too small effective sample size for testing models.")
}
}
}

if (!is.null(polyreg)) if (polyreg+1>eN) {
polyreg.fewobs <- T
warning("too small effective sample size for polynomial fit.")
}

####################################
########### Generate knot ##########
####################################
fewmass <- F
if (fewobs) if (!is.na(Ndist)) if (eN==Ndist) fewmass <- T
if (fewmasspoints) fewmass <- T

if (is.null(knot)) {
if (!fewmass) {
if (es) knot <- genKnot.es(xmin, xmax, nbins)
else    knot <- genKnot.qs(x.sub, nbins)
}
} else {
if (fewobs) if (!is.na(Ndist)) if (eN!=Ndist) {
knot <- genKnot.qs(x.sub, nbins)
}
}

# knot for few mass points
if (fewmass) {
knot <- sort(unique(x.sub))
if (fewmasspoints) {
eN <- nbins <- Ndist <- length(knot)
}
}
else {
knot <- c(knot[1], unique(knot[-1]))
if (nbins!=length(knot)-1) {
warning("repeated knots. Some bins dropped.")
nbins <- length(knot)-1
}
}

# check local mass points
if (!fewobs & localcheck) {
uniqmin <- binsreg.checklocalmass(x.sub, nbins, es, knot=knot) # mimic STATA
if (!is.null(dots)) {
if (uniqmin < dots.p+1) {
dots.fewobs <- T
warning("some bins have too few distinct values of x for dots.")
}
}
if (!is.null(line)) {
if (uniqmin < line.p+1) {
line.fewobs <- T
warning("some bins have too few distinct values of x for line.")
}
}
if (!is.null(ci)) {
if (uniqmin < ci.p+1) {
ci.fewobs <- T
warning("some bins have too few distinct values of x for CI.")
}
}
if (!is.null(cb)) {
if (uniqmin < cb.p+1) {
cb.fewobs <- T
warning("some bins have too few distinct values of x for CB.")
}
}
if (ntestshape != 0 & is.null(by)) {
if (uniqmin < tsha.p+1) {
test.fewobs <- T
warning("some bins have too few distinct values of x for testing.")
}
}
if ((!is.null(testmodelparfit)|!is.null(testmodelpoly)) & is.null(by)) {
if (uniqmin < tmod.p+1) {
test.fewobs <- T
warning("some bins have too few distinct values of x for testing.")
}
}
}

# NOW, save nbins
nbins.by <- c(nbins.by, nbins)

#######################################
###### Prepare Plots ##################
#######################################
data.by <- list()    # initialize data list
# data.dots <- data.line <- data.ci<- data.cb <- data.poly <- data.polyci <- NULL

# Dots and CI for Small eN case
if (dotsmean+dotsgrid!=0 & !noplot & fewobs) {
warning("dots=c(0,0) used.")
k <- nbins
if (!is.na(Ndist)) {
if (eN == Ndist) {
dots.x <- knot
# RENEW knot, each value forms a category
xcat.few  <- findInterval(x.sub, knot)
} else {
dots.x <- (knot[-1]+knot[-length(knot)])/2
xcat.few <- findInterval(x.sub, knot, rightmost.closed = T, left.open = T)
}
} else {
dots.x <- (knot[-1]+knot[-length(knot)])/2
xcat.few  <- findInterval(x.sub, knot, rightmost.closed = T, left.open = T)
}

if (is.null(w.sub)) {
model <- lm(y.sub~-1+factor(xcat.few), weights=weights.sub)
} else {
model <- lm(y.sub~-1+factor(xcat.few)+w.sub, weights=weights.sub)
}
beta  <- model$coeff[1:k] pos <- !is.na(beta) k.new <- sum(pos) beta <- beta[pos] vcv <- binsreg.vcov(model, type=vce, cluster=cluster.sub)[1:k.new, 1:k.new] dots.fit <- beta data.dots <- data.frame(group=paste(byvals[i]), x=dots.x, fit=dots.fit) data.by$data.dots <- data.dots
if (cigrid+cimean!=0) {
warning("ci=c(0,0) used.")
dots.se  <- sqrt(diag(vcv))
ci.l <- dots.fit - dots.se * qnorm(alpha)
ci.r <- dots.fit + dots.se * qnorm(alpha)
data.ci <- data.frame(group=byvals[i], x=dots.x, ci.l=ci.l, ci.r=ci.r)
data.by$data.ci <- data.ci } } ########################################## ########## Usual Case #################### ########################################## dotsON <- lineON <- ciON <- cbON <- polyON <- F xmean <- NULL ################ Dots #################### if (dotsmean+dotsgrid !=0 & !noplot & !dots.fewobs & !fewobs) { dotsON <- T } if (dotsON) { dots.x <- dots.bin <- dots.isknot <- dots.mid <- c() if (dotsmean!=0) { xcat <- findInterval(x.sub, knot, rightmost.closed = T, left.open = T) xmean <- as.vector(tapply(x.sub, xcat, FUN=mean)) dots.x <- c(dots.x, xmean) dots.bin <- c(dots.bin, 1:nbins) dots.isknot <- c(dots.isknot, rep(0, nbins)) dots.mid <- c(dots.mid, rep(0, nbins)) } if (dotsgrid!=0) { grid <- binsreg.grid(knot, dotsgrid, addmore=T) dots.x <- c(dots.x, grid$eval); dots.bin <- c(dots.bin, grid$bin) dots.isknot <- c(dots.isknot, grid$isknot); dots.mid <- c(dots.mid, grid$mid) } B <- binsreg.spdes(eval=x.sub, p=dots.p, s=dots.s, deriv=0, knot=knot) P <- cbind(B, w.sub) # full design matrix model.dots <- lm(y.sub~-1+P, weights=weights.sub) check.drop(model.dots$coeff, ncol(B))

basis <- binsreg.spdes(eval=dots.x, p=dots.p, s=dots.s, knot=knot, deriv=deriv)
pred.dots  <- binsreg.pred(basis, model.dots, type = "xb", vce=vce, cluster=cluster.sub)
dots.fit <- pred.dots$fit dots.fit[dots.isknot==1] <- NA data.dots <- data.frame(group=paste(byvals[i]), x=dots.x, bin=dots.bin, isknot=dots.isknot, mid=dots.mid, fit=dots.fit) data.by$data.dots <- data.dots
}

################ Line ####################
if (linegrid !=0 & !noplot & !line.fewobs & !fewobs) {
lineON <- T
}
if (lineON) {
line.x <- grid$eval; line.bin <- grid$bin
line.isknot <- grid$isknot; line.mid <- grid$mid

line.reg.ON <- T
if (dotsON) if(line.p==dots.p & line.s==dots.s) {
model.line <- model.dots; line.reg.ON <- F
}
if (line.reg.ON) {
B     <- binsreg.spdes(eval=x.sub, p=line.p, s=line.s, deriv=0, knot=knot)
P     <- cbind(B, w.sub)         # full design matrix
model.line <- lm(y.sub~-1+P, weights=weights.sub)
check.drop(model.line$coeff, ncol(B)) } basis <- binsreg.spdes(eval=line.x, p=line.p, s=line.s, knot=knot, deriv=deriv) pred.line <- binsreg.pred(basis, model.line, type = "xb", vce=vce, cluster=cluster.sub) line.fit <- pred.line$fit
if (line.s == 0 | line.s-deriv <= 0) {
line.fit[line.isknot==1] <- NA
}
data.line <- data.frame(group=byvals[i], x=line.x, bin=line.bin, isknot=line.isknot,
mid=line.mid, fit=line.fit)
data.by$data.line <- data.line } ############### Poly fit ######################### if (polyreggrid!=0 & !noplot & !polyreg.fewobs) { polyON <- T } if (polyON) { grid <- binsreg.grid(knot, polyreggrid, addmore=T) poly.x <- grid$eval; poly.bin <- grid$bin poly.isknot <- grid$isknot; poly.mid <- grid$mid # Run a poly reg x.p <- matrix(NA, N, polyreg+1) for (j in 1:(polyreg+1)) x.p[,j] <- x.sub^(j-1) P.poly <- cbind(x.p, w.sub) model.poly <- lm(y.sub~-1+P.poly, weights=weights.sub) beta.poly <- model.poly$coefficients
poly.fit  <- 0
for (j in deriv:polyreg) {
poly.fit <- poly.fit + poly.x^(j-deriv)*beta.poly[j+1]*factorial(j)/factorial(j-deriv)
}

data.poly <- data.frame(group=byvals[i], x=poly.x, bin=poly.bin, isknot=poly.isknot,
mid=poly.mid, fit=poly.fit)
data.by$data.poly <- data.poly # add CI? if (polyregcigrid!=0) { grid <- binsreg.grid(knot, polyregcigrid, addmore=T) polyci.x <- grid$eval; polyci.bin <- grid$bin polyci.isknot <- grid$isknot; polyci.mid <- grid$mid npolyci.x <- length(polyci.x) basis.polyci <- matrix(NA, npolyci.x, polyreg+1) for (j in 1:(polyreg+1)) { if (j-1>=deriv) { basis.polyci[,j] <- polyci.x^(j-deriv-1)*factorial(j-1)/factorial(j-1-deriv) } else { basis.polyci[,j] <- rep(0, npolyci.x) } } polyci.pred <- binsreg.pred(basis.polyci, model=model.poly, type="all", vce=vce, cluster=cluster.sub) polyci.l <- polyci.pred$fit - qnorm(alpha) * polyci.pred$se polyci.r <- polyci.pred$fit + qnorm(alpha) * polyci.pred$se data.polyci <- data.frame(group=byvals[i], x=polyci.x, bin=polyci.bin, isknot=polyci.isknot, mid=polyci.mid, polyci.l=polyci.l, polyci.r=polyci.r) data.by$data.polyci <- data.polyci
}
}

################ CI ####################
if (cimean+cigrid !=0 & !noplot & !ci.fewobs & !fewobs) {
ciON <- T
}
if (ciON) {
ci.x <- ci.bin <- ci.isknot <- ci.mid <- c()
if (cimean!=0) {
if (!is.null(xmean)) {
ci.x <- c(ci.x, xmean)
} else {
xcat   <- findInterval(x.sub, knot, rightmost.closed = T, left.open = T)
ci.x <- c(ci.x, as.vector(tapply(x.sub, xcat, FUN=mean)))
}
ci.bin <- c(ci.bin, 1:nbins)
ci.isknot <- c(ci.isknot, rep(0, nbins))
ci.mid <- c(ci.mid, rep(0, nbins))
}
if (cigrid!=0) {
ci.x <- c(ci.x, grid$eval); ci.bin <- c(ci.bin, grid$bin)
ci.isknot <- c(ci.isknot, grid$isknot); ci.mid <- c(ci.mid, grid$mid)
}

ci.reg.ON <- T
if (lineON) if (ci.p==line.p & ci.s==line.s) {
model.ci <- model.line; ci.reg.ON <- F
}
if (ci.reg.ON) if (dotsON) if (ci.p==dots.p & ci.s==dots.s) {
model.ci <- model.dots; ci.reg.ON <- F
}
if (ci.reg.ON) {
B    <- binsreg.spdes(eval=x.sub, p=ci.p, s=ci.s, deriv=0, knot=knot)
P    <- cbind(B, w.sub)            # full design matrix
model.ci <- lm(y.sub~P-1, weights=weights.sub)
check.drop(model.ci$coeff, ncol(B)) } basis <- binsreg.spdes(eval=ci.x, p=ci.p, s=ci.s, knot=knot, deriv=deriv) ci.pred <- binsreg.pred(X=basis, model=model.ci, type="all", vce=vce, cluster=cluster.sub) ci.l <- ci.pred$fit - qnorm(alpha)*ci.pred$se ci.r <- ci.pred$fit + qnorm(alpha)*ci.pred$se ci.l[ci.isknot==1] <- NA ci.r[ci.isknot==1] <- NA data.ci <- data.frame(group=byvals[i], x=ci.x, bin=ci.bin, isknot=ci.isknot, mid=ci.mid, ci.l=ci.l, ci.r=ci.r) data.by$data.ci <- data.ci
}

################ CB ###############################
cval <- NA
if (cbgrid !=0 & !noplot & !cb.fewobs & !fewobs) {
cbON <- T
}
if (cbON) {
cb.x <- grid$eval; cb.bin <- grid$bin
cb.isknot <- grid$isknot; cb.mid <- grid$mid

cb.reg.ON <- T
if (ciON) if (cb.p==ci.p & cb.s==ci.s) {
model.cb <- model.ci; cb.reg.ON <- F
}
if (cb.reg.ON) if (lineON) if (cb.p==line.p & cb.s==line.s) {
model.cb <- model.line; cb.reg.ON <- F
}
if (cb.reg.ON) if (dotsON) if (cb.p==dots.p & cb.s==dots.s) {
model.cb <- model.dots; cb.reg.ON <- F
}

if (cb.reg.ON) {
B    <- binsreg.spdes(eval=x.sub, p=cb.p, s=cb.s, deriv=0, knot=knot)
P    <- cbind(B, w.sub)            # full design matrix
model.cb <- lm(y.sub~P-1, weights=weights.sub)
check.drop(model.cb$coeff, ncol(B)) } basis <- binsreg.spdes(eval=cb.x, p=cb.p, s=cb.s, knot=knot, deriv=deriv) pos <- !is.na(model.cb$coeff[1:ncol(B)])
k.new <- sum(pos)
cb.pred <- binsreg.pred(X=basis, model=model.cb, type="all",
vce=vce, cluster=cluster.sub)

### Compute cval ####
x.grid <- binsreg.grid(knot, simsgrid)$eval basis.sim <- binsreg.spdes(eval=x.grid, p=cb.p, s=cb.s, knot=knot, deriv=deriv) sim.pred <- binsreg.pred(X=basis.sim, model=model.cb, type="all", vce=vce, cluster=cluster.sub) vcv <- binsreg.vcov(model.cb, type=vce, cluster=cluster.sub)[1:k.new, 1:k.new] Sigma.root <- lssqrtm(vcv) num <- basis.sim[,pos,drop=F] %*% Sigma.root cval <- binsreg.pval(num, sim.pred$se, rep=nsims, tstat=NULL, side="two", alpha=level)$cval cb.l <- cb.pred$fit - cval*cb.pred$se cb.r <- cb.pred$fit + cval*cb.pred$se if (cb.s == 0 | cb.s - deriv <=0) { cb.l[cb.isknot==1] <- NA cb.r[cb.isknot==1] <- NA } data.cb <- data.frame(group=byvals[i], x=cb.x, bin=cb.bin, isknot=cb.isknot, mid=cb.mid, cb.l=cb.l, cb.r=cb.r) data.by$data.cb <- data.cb
}
cval.by <- c(cval.by, cval)

# Save all data for each group
data.plot[[i]] <- data.by
names(data.plot)[i] <- paste("Group", byvals[i], sep=" ")

}

########################################################
############ Hypothesis Testing ########################
########################################################
testON <- F
test <- NA
if ((ntestshape !=0 | !is.null(testmodelparfit) | !is.null(testmodelpoly)) &
!test.fewobs & !fewobs) {
testON <- T
}

if (testON) {
if (is.na(Ndist))  {
Ndist.sel <- NULL
} else {
Ndist.sel <- Ndist
}
if (is.na(Nclust)) {
Nclust.sel <- NULL
} else {
Nclust.sel <- Nclust
}
test <- binsregtest(y, x, w, deriv=deriv,
testmodel=testmodel, testmodelparfit=testmodelparfit,
testmodelpoly=testmodelpoly,
testshape=testshape, testshapel=testshapel,
testshaper=testshaper, testshape2=testshape2,
nbins=nbins, binspos=binspos,
nsims=nsims, simsgrid=simsgrid, simsseed=simsseed,
vce=vce, cluster=cluster,
dfcheck=dfcheck, masspoints=masspoints, weights=weights,
numdist=Ndist.sel, numclust=Nclust.sel)
}

########################################
############# Ploting ? ################
binsplot <- NULL
if (!noplot) {
binsplot <- ggplot() + theme_bw()
x <- fit <- ci.l <- ci.r <- cb.l <- cb.r <- polyci.l <- polyci.r <- group <- NULL
for (i in 1:ngroup) {
data.by <- data.plot[[i]]
if (!is.null(data.by$data.dots)) { if (!legendoff) { binsplot <- binsplot + geom_point(data=data.by$data.dots[complete.cases(data.by$data.dots[c("x", "fit")]),], aes(x=x, y=fit, colour=group), shape=bysymbols[i], size=2) } else { binsplot <- binsplot + geom_point(data=data.by$data.dots[complete.cases(data.by$data.dots[c("x", "fit")]),], aes(x=x, y=fit), col=bycolors[i], shape=bysymbols[i], size=2) } } if (!is.null(data.by$data.line)) {
binsplot <- binsplot +
geom_line(data=data.by$data.line, aes(x=x, y=fit), col=bycolors[i], linetype=bylpatterns[i]) } if (!is.null(data.by$data.poly)) {
binsplot <- binsplot +
geom_line(data=data.by$data.poly, aes(x=x, y=fit), col=bycolors[i], linetype=bylpatterns[i]) } if (!is.null(data.by$data.polyci)) {
binsplot <- binsplot +
geom_errorbar(data=data.by$data.polyci, aes(x=x, ymin=polyci.l, ymax=polyci.r), alpha=1, col=bycolors[i], linetype=bylpatterns[i]) } if (!is.null(data.by$data.ci)) {
binsplot <- binsplot +
geom_errorbar(data=data.by$data.ci[complete.cases(data.by$data.ci[c("x", "ci.l", "ci.r")]),],
aes(x=x, ymin=ci.l, ymax=ci.r),
alpha=1, col=bycolors[i], linetype=bylpatterns[i])
}
if (!is.null(data.by$data.cb)) { binsplot <- binsplot + geom_ribbon(data=data.by$data.cb, aes(x=x, ymin=cb.l, ymax=cb.r),
alpha=0.2, fill=bycolors[i])
}
}

if (!legendoff) {
binsplot <- binsplot +
scale_color_manual(name=legendTitle, values = bycolors[1:ngroup],
guide=guide_legend(override.aes = list(
linetype=rep(0, ngroup), shape=bysymbols[1:ngroup])))
} else {
binsplot <- binsplot + theme(legend.position="none")
}
binsplot <- binsplot + labs(x=paste(xname), y=paste(yname))
print(binsplot)
}

######################################
########### Output ###################
######################################
out <- list(bins_plot=binsplot, data.plot=data.plot, cval.by=cval.by,
test=test, opt=list(dots=dots, line=line, ci=ci, cb=cb,
polyreg=polyreg, deriv=deriv,
binspos=position, binsmethod=selectmethod,
N.by=N.by, Ndist.by=Ndist.by, Nclust.by=Nclust.by,
nbins.by=nbins.by, byvals=byvals))
out$call <- match.call() class(out) <- "CCFFbinsreg" return(out) } ########################################################################## #' Internal function. #' #' @param x Class \code{CCFFbinsreg} objects. #' #' @keywords internal #' @export #' print.CCFFbinsreg <- function(x, ...) { cat("Call: binsreg\n\n") cat("Binscatter Plot\n") cat(paste("Bin selection method (binsmethod) = ", x$opt$binsmethod, "\n", sep="")) cat(paste("Placement (binspos) = ", x$opt$binspos, "\n", sep="")) cat(paste("Derivative (deriv) = ", x$opt$deriv, "\n", sep="")) cat("\n") for (i in 1:length(x$opt$byvals)) { cat(paste("Group (by) = ", x$opt$byvals[i], "\n", sep="")) cat(paste("Sample size (n) = ", x$opt$N.by[i], "\n", sep="")) cat(paste("# of distinct values (Ndist) = ", x$opt$Ndist.by[i], "\n", sep="")) cat(paste("# of clusters (Nclust) = ", x$opt$Nclust.by[i], "\n", sep="")) cat(paste("dots, degree (p) = ", x$opt$dots[1], "\n", sep="")) cat(paste("dots, smooth (s) = ", x$opt$dots[2], "\n", sep="")) cat(paste("# of bins (nbins) = ", x$opt$nbins.by[i], "\n", sep="")) cat("\n") } } ################################################################################ #' Internal function. #' #' @param object Class \code{CCFFbinsreg} objects. #' #' @keywords internal #' @export summary.CCFFbinsreg <- function(object, ...) { x <- object args <- list(...) cat("Call: binsreg\n\n") cat("Binscatter Plot\n") cat(paste("Bin selection method (binsmethod) = ", x$opt$binsmethod, "\n", sep="")) cat(paste("Placement (binspos) = ", x$opt$binspos, "\n", sep="")) cat(paste("Derivative (deriv) = ", x$opt$deriv, "\n", sep="")) cat("\n") for (i in 1:length(x$opt$byvals)) { cat(paste("Group (by) = ", x$opt$byvals[i], "\n", sep="")) cat(paste("Sample size (n) = ", x$opt$N.by[i], "\n", sep="")) cat(paste("# of distinct values (Ndist) = ", x$opt$Ndist.by[i], "\n", sep="")) cat(paste("# of clusters (Nclust) = ", x$opt$Nclust.by[i], "\n", sep="")) cat(paste("dots, degree (p) = ", x$opt$dots[1], "\n", sep="")) cat(paste("dots, smooth (s) = ", x$opt$dots[2], "\n", sep="")) cat(paste("# of bins (nbins) = ", x$opt$nbins.by[i], "\n", sep="")) cat("\n") cat(paste(rep("=", 8 + 10 + 10 + 10), collapse="")); cat("\n") cat(format(" ", width= 8, justify="right")) cat(format("p", width=10, justify="right")) cat(format("s", width=10, justify="right")) cat(format("df", width=10, justify="right")) cat("\n") cat(paste(rep("-", 8 + 10 + 10 + 10), collapse="")); cat("\n") cat(format("dots", width= 8, justify="right")) cat(format(sprintf("%3.0f", x$opt$dots[1]), width=10 , justify="right")) cat(format(sprintf("%3.0f", x$opt$dots[2]), width=10 , justify="right")) dots.df <- x$opt$dots[1]+1+(x$opt$nbins.by[i]-1)*(x$opt$dots[1]-x$opt$dots[2]+1) cat(format(sprintf("%3.0f", dots.df), width=10 , justify="right")) if (!is.null(x$opt$line)) { cat("\n") cat(format("line", width= 8, justify="right")) cat(format(sprintf("%3.0f", x$opt$line[1]), width=10 , justify="right")) cat(format(sprintf("%3.0f", x$opt$line[2]), width=10 , justify="right")) line.df <- x$opt$line[1]+1+(x$opt$nbins.by[i]-1)*(x$opt$line[1]-x$opt$line[2]+1) cat(format(sprintf("%3.0f", line.df), width=10 , justify="right")) } if (!is.null(x$opt$ci)) { cat("\n") cat(format("CI", width= 8, justify="right")) cat(format(sprintf("%3.0f", x$opt$ci[1]), width=10 , justify="right")) cat(format(sprintf("%3.0f", x$opt$ci[2]), width=10 , justify="right")) ci.df <- x$opt$ci[1]+1+(x$opt$nbins.by[i]-1)*(x$opt$ci[1]-x$opt$ci[2]+1) cat(format(sprintf("%3.0f", ci.df), width=10 , justify="right")) } if (!is.null(x$opt$cb)) { cat("\n") cat(format("CB", width= 8, justify="right")) cat(format(sprintf("%3.0f", x$opt$cb[1]), width=10 , justify="right")) cat(format(sprintf("%3.0f", x$opt$cb[2]), width=10 , justify="right")) cb.df <- x$opt$cb[1]+1+(x$opt$nbins.by[i]-1)*(x$opt$cb[1]-x$opt$cb[2]+1) cat(format(sprintf("%3.0f", cb.df), width=10 , justify="right")) } if (!is.null(x$opt$polyreg)) { cat("\n") cat(format("polyreg", width= 8, justify="right")) cat(format(sprintf("%3.0f", x$opt$polyreg), width=10 , justify="right")) cat(format(NA, width=10 , justify="right")) cat(format(sprintf("%3.0f", x$opt$polyreg+1), width=10 , justify="right")) } cat("\n") cat(paste(rep("-", 8 + 10 + 10 + 10), collapse="")) cat("\n") cat("\n") } if (!all(is.na(x$test))) {
cat("\n")
x <- x$test if (!is.null(x$testshapeL$testvalL)|!is.null(x$testshapeR$testvalR)|!is.null(x$testshape2$testval2)) { cat(paste("Shape Restriction Tests:")); cat("\n") cat(paste("degree (p) = ", x$opt$testshape[1], sep="")); cat(paste(" ")) cat(paste("smooth (s) = ", x$opt$testshape[2], sep="")); cat("\n") if (!is.null(x$testshapeL$testvalL)) { cat(paste(rep("=", 15 + 12 + 12), collapse="")); cat("\n") cat(format("H0: sup mu <=", width = 15, justify = "left")) cat(format("sup T", width = 12, justify = "right")) cat(format("p value", width = 12, justify = "right")) cat("\n") cat(paste(rep("-", 15 + 12 + 12), collapse="")); cat("\n") cat("\n") for (i in 1:length(x$testshapeL$testvalL)) { cat(format(paste(x$testshapeL$testvalL[i]), width = 15, justify = "centre")) cat(format(sprintf("%3.3f", x$testshapeL$stat.shapeL[i]), width = 12, justify = "right")) cat(format(sprintf("%3.3f", x$testshapeL$pval.shapeL[i]), width = 12, justify = "right")) cat("\n") } cat(paste(rep("-", 15 + 12 + 12), collapse="")) cat("\n") cat("\n") } if (!is.null(x$testshapeR$testvalR)) { cat(paste(rep("=", 15 + 12 + 12), collapse="")); cat("\n") cat(format("H0: inf mu >=", width = 15, justify = "left")) cat(format("inf T", width = 12, justify = "right")) cat(format("p value", width = 12, justify = "right")) cat("\n") cat(paste(rep("-", 15 + 12 + 12), collapse="")); cat("\n") cat("\n") for (i in 1:length(x$testshapeR$testvalR)) { cat(format(paste(x$testshapeR$testvalR[i]), width = 15, justify = "centre")) cat(format(sprintf("%3.3f", x$testshapeR$stat.shapeR[i]), width = 12, justify = "right")) cat(format(sprintf("%3.3f", x$testshapeR$pval.shapeR[i]), width = 12, justify = "right")) cat("\n") } cat(paste(rep("-", 15 + 12 + 12), collapse="")) cat("\n") cat("\n") } if (!is.null(x$testshape2$testval2)) { cat(paste(rep("=", 15 + 12 + 12), collapse="")); cat("\n") cat(format("H0: mu =", width = 15, justify = "left")) cat(format("sup |T|", width = 12, justify = "right")) cat(format("p value", width = 12, justify = "right")) cat("\n") cat(paste(rep("-", 15 + 12 + 12), collapse="")); cat("\n") cat("\n") for (i in 1:length(x$testshape2$testval2)) { cat(format(paste(x$testshape2$testval2[i]), width = 15, justify = "centre")) cat(format(sprintf("%3.3f", x$testshape2$stat.shape2[i]), width = 12, justify = "right")) cat(format(sprintf("%3.3f", x$testshape2$pval.shape2[i]), width = 12, justify = "right")) cat("\n") } cat(paste(rep("-", 15 + 12 + 12), collapse="")) cat("\n") cat("\n") } } if (!is.null(x$testpoly$testpoly)|!all(is.na(x$testmodel$stat.model))) { cat(paste("Model Specification Tests:")); cat("\n") cat(paste("degree (p) = ", x$opt$testmodel[1], sep="")); cat(paste(" ")) cat(paste("smooth (s) = ", x$opt$testmodel[2], sep="")); cat("\n") if (!is.null(x$testpoly$testpoly)) { cat(paste(rep("=", 15 + 12 + 12), collapse="")); cat("\n") cat(format("H0: mu =", width = 15, justify = "left")) cat(format("sup |T|", width = 12, justify = "right")) cat(format("p value", width = 12, justify = "right")) cat("\n") cat(paste(rep("-", 15 + 12 + 12), collapse="")); cat("\n") cat("\n") cat(format(paste("poly. p=", x$testpoly$testpoly, sep=""), width = 15, justify = "left")) cat(format(sprintf("%3.3f", x$testpoly$stat.poly), width = 12, justify = "right")) cat(format(sprintf("%3.3f", x$testpoly$pval.poly), width = 12, justify = "right")) cat("\n") cat(paste(rep("-", 15 + 12 + 12), collapse="")) cat("\n") } if (!all(is.na(x$testmodel$stat.model))) { cat(paste(rep("=", 15 + 12 + 12), collapse="")); cat("\n") cat(format("H0: mu =", width = 15, justify = "left")) cat(format("sup |T|", width = 12, justify = "right")) cat(format("p value", width = 12, justify = "right")) cat("\n") cat(paste(rep("-", 15 + 12 + 12), collapse="")); cat("\n") cat("\n") for (i in 1:length(x$testmodel$stat.model)) { cat(format(paste("model ", i, sep=""), width = 15, justify = "centre")) cat(format(sprintf("%3.3f", x$testmodel$stat.model[i]), width = 12, justify = "right")) cat(format(sprintf("%3.3f", x$testmodel\$pval.model[i]),   width = 12, justify = "right"))
cat("\n")
}
cat(paste(rep("-", 15 + 12 + 12), collapse=""))
cat("\n")
}
}
}
cat("\n")
}


## Try the binsreg package in your browser

Any scripts or data that you put into this service are public.

binsreg documentation built on May 2, 2019, 12:19 p.m.