R/chngptm.R
In chngpt: Estimation and Hypothesis Testing for Threshold Regression

Documented in AIC.chngptm chngptm chngptm.xy chngptm.xy chngptm.xy coef.chngptm convert.coef lincomb logLik.chngptm plot.chngptm predictx print.chngptm residuals.chngptm summary.chngptm threshold.func vcov.chngptm

#family="gaussian"; type="M20"; threshold.type="M20"; formula.1=y ~ z; formula.2=~x; data=dat; formula.strat=NULL; subsampling=0; tol=1e-4; maxit=1e2; verbose=TRUE; est.method="fastgrid"; var.type="bootstrap"; lb.quantile=.1; ub.quantile=.9; grid.search.max=500; weights=NULL; chngpt.init=NULL; alpha=0.05; b.transition=Inf; search.bound=10; ci.bootstrap.size=10; m.out.of.n=0; aux.fit=NULL; offset=NULL; bootstrap.type="nonparametric"; order.max=10; boot.test.inv.ci=F; save.boot=T; library(kyotil)
chngptm = function(formula.1, formula.2, family, data, 
  type=c(
      "hinge","M01","M02","M03","M04", # hinge = M01
      "upperhinge","M10","M20","M30","M40",# upperhinge = M10
      "M21","M12","M21c","M12c","M22","M22c","M31","M13","M33c", # segmented models with higher order trends
      "segmented","M11","segmented2", # segmented = M11
      "M111",# three phase segmented
      "step","stegmented"), # segmented2 is the model studied in Cheng (2008)
  formula.strat=NULL, weights=NULL, offset=NULL,  
  # lmer options
  REML=TRUE, re.choose.by.loglik=FALSE,
  # estimation
  est.method=c("default","fastgrid2","fastgrid","grid","smoothapprox"), 
  var.type=c("default","none","robust","model","bootstrap","all"), aux.fit=NULL,  # robusttruth is for development only
  lb.quantile=.05, ub.quantile=.95, grid.search.max=Inf,# chngpts=NULL,
  test.inv.ci=TRUE, boot.test.inv.ci=FALSE, # test.inv.ci is passed to local functions, boot.test.inv.ci is global within this function
  # bootstrap arguments
  bootstrap.type=c("nonparametric","wild","sieve","wildsieve","awb"), 
  m.out.of.n=0, subsampling=0, 
  order.max=10, # for autoregressive
  ci.bootstrap.size=1000, alpha=0.05, save.boot=TRUE, 
  # others
  b.transition=Inf,# controls whether threshold model or smooth transition model
  tol=1e-4, maxit=1e2, chngpt.init=NULL, search.bound=10,
  keep.best.fit=TRUE, # best.fit is needed for making prediction and plotting
  ncpus=1, # for multicore bootstrap
  verbose=FALSE, ...) 
  
{   
    if (missing(type)) stop("type missing")    
    type<-match.arg(type)  # change name from type to threshold.type
    var.type<-match.arg(var.type)    
    est.method<-match.arg(est.method)    
    bootstrap.type<-match.arg(bootstrap.type)    
    
    # disable lower level parallelization in favor of higher level of parallelization
    # without this, code may hang when ncpus>1
    RhpcBLASctl::blas_get_num_procs()
    RhpcBLASctl::blas_set_num_threads(1L)
    #stopifnot(blas_get_num_procs() == 1L) # Commented this out as it does not work as expected any more!
    RhpcBLASctl::omp_set_num_threads(1L)

    
    if (var.type=="bootstrap" & ci.bootstrap.size %% ncpus != 0) stop("ci.bootstrap.size needs to be a multiple of ncpus")
    
    threshold.type=type
    if (est.method=="fastgrid") est.method="fastgrid2" # keep fastgrid only for backward compatibility
    if(threshold.type=="M01") threshold.type="hinge"
    if(threshold.type=="M10") threshold.type="upperhinge"
    if(threshold.type=="M11") threshold.type="segmented"
    
    if (m.out.of.n>0 & subsampling>0) stop("Only one of m.out.of.n and subsampling can be greater than 0. subsampling is without replacement, and m.out.of.n is with replacement.")
    n.sub=m.out.of.n+subsampling
    
    if(!is.character(family)) stop("Please enter a string as family, e.g. \"gaussian\"")
    family=tolower(family)
        
    # set b.search
    stopifnot(b.transition>0)
    b.search=if(b.transition==Inf) 30 else b.transition
    if(threshold.type=="segmented2" & b.transition==Inf) stop("for segmented2, b.transition should not be Inf") # limited implementation for segmented2
    
    #### var.type
    if(var.type=="default") {
        if(family=="gaussian") {
            var.type="bootstrap"
        } else if (!is.null(aux.fit)) {
            var.type="robust"
        } else if (family %in% c("gaussian","binomial") & threshold.type %in% c("hinge","upperhinge","segmented") ) {
            var.type="model"
        } else  {
            # model-based variance only implemented for gaussian and binomial
            var.type="none"
        }
        
        # once we support fast grid search for discontinuous models, the following line will be changed
        if(threshold.type %in% c("stegmented")) var.type="none" 
    }    
    if(!family %in% c("gaussian","binomial") & !var.type %in% c("bootstrap","none")) stop("no analytical variance estimates are provided for this family: "%.%family)
    if (var.type %in% c("robust","robusttruth","all") & is.null(aux.fit)) stop("need an aux.fit for robust variance estimate")   
    
    if(ci.bootstrap.size==0 & var.type=="bootstrap") var.type="none"     
    
    # formula.1 may have random effects
    anyBars=function(term) any(c("|", "||") %in% all.names(term)) # same as lme4:::anyBars
    has.re=anyBars(formula.1)
    if (has.re) {
        # remove missing observations
        subset. = complete.cases(model.frame(lme4::nobars(formula.1), data, na.action=na.pass)) & complete.cases(model.frame(formula.2, data, na.action=na.pass))
        data=data[subset.,,drop=FALSE]
        
        y=model.frame(lme4::nobars(formula.1), data)[,1] #if lhs of formula.1 is cbind(,), y is a matrix
        Z=model.matrix(lme4::nobars(formula.1), data)        
    } else {
        # remove missing observations
        subset. = complete.cases(model.frame(formula.1, data, na.action=na.pass)) & complete.cases(model.frame(formula.2, data, na.action=na.pass))
        data=data[subset.,,drop=FALSE]
        
        y=model.frame(formula.1, data)[,1] #if lhs of formula.1 is cbind(,), y is a matrix
        Z=model.matrix(formula.1, data)        
    }
    
    # extract design matrix, x, and y
    formula.2.dat=model.matrix(formula.2, data)[,-1,drop=F]
    # figuring out what is the covariate to be thresholded
    chngpt.var.name=setdiff(colnames(formula.2.dat), colnames(Z))[1];     
    if(is.na(chngpt.var.name)) stop("Something is wrong. Check the formula.")
    
    # interaction
    z.1.name=intersect(colnames(formula.2.dat), colnames(Z))
    has.itxn = length(z.1.name)>0
    z.1 = formula.2.dat[,z.1.name] # if the intersection is a null set, z.1 is a matrix of n x 0 dimension
    
    fastgrid.ok = family %in% c("gaussian") & !has.itxn & !has.re & threshold.type %in% c("hinge","upperhinge","segmented","M20","M02","M30","M03","M21","M12","M21c","M12c","M22","M22c","M31","M13","M33c","step","M111")
                  
                  
    #### est.method
    # smoothapprox cannot work when lhs of formula.1 is cbind() 
    if (est.method=="smoothapprox" & ((!family%in%c("binomial","gaussian")) | !threshold.type %in% c("hinge","step","upperhinge","segmented") | has.re)) {
        stop ("smoothapprox only implemented for this, please try grid")
    }
    if(!fastgrid.ok & est.method%in%c("fastgrid","fastgrid2")) {
        warning ("Switching to grid search since fast grid search not implemented for this ...")
        est.method="grid"
    }
    if (est.method=="default") {
        if(fastgrid.ok) { 
            if(family=="binomial") est.method="fastgrid" else est.method="fastgrid2" 
        } else if(family=="binomial") { 
            if (has.re | !threshold.type %in% c("hinge","step","upperhinge","segmented")) est.method="smoothapprox" else est.method="grid"
        } else { 
            est.method="grid"
        }        
    }
    #if (fastgrid.ok & !est.method%in%c("fastgrid","fastgrid2") & var.type!="none") cat("Note that for linear models, fastgrid2 is the best est.method.\n") # var.type condition is added so that this is not printed during bootstrap
    
    if (est.method=="fastgrid2" & grid.search.max!=Inf) {
        grid.search.max=Inf
        warning("When doing fast grid search, grid.search.max is automatically set to Inf because it does not take more time to examine all potential thresholds")
    }
    
    # convert hinge models (M0x) to upperhinge models (Mx0)
    # var.model and var.robust has not been fixed and won't work when hinge is converted to upper hinge
    hinge.to.upperhinge=FALSE
    if(!var.type %in% c("all","robust","model") & est.method!="smoothapprox") {
        if (threshold.type == "hinge") {hinge.to.upperhinge=TRUE; threshold.type= "upperhinge"}
        if (threshold.type == "M02")   {hinge.to.upperhinge=TRUE; threshold.type= "M20"}
        if (threshold.type == "M03")   {hinge.to.upperhinge=TRUE; threshold.type= "M30"}
        if (threshold.type == "M04")   {hinge.to.upperhinge=TRUE; threshold.type= "M40"}
        if (threshold.type == "M12")   {hinge.to.upperhinge=TRUE; threshold.type= "M21"}
        if (threshold.type == "M12c")  {hinge.to.upperhinge=TRUE; threshold.type= "M21c"}
        if (threshold.type == "M13")   {hinge.to.upperhinge=TRUE; threshold.type= "M31"}        
    }
    
    if(hinge.to.upperhinge) {
        data[[chngpt.var.name]]=-data[[chngpt.var.name]]    
        tmp=lb.quantile
        lb.quantile=1-ub.quantile
        ub.quantile=1-tmp
    }
    
    if(fastgrid.ok) {
        imodel=switch(threshold.type, upperhinge=10, segmented=10, M20=20, M21=20, M21c=204, M22=22, M22c=224, M30=30, M31=30, M33c=334, M111=111,
                                  step=5, M40=NA, stop("wrong imodel"))
    } else imodel=NA
                
    chngpt.var = data[[chngpt.var.name]]
    # note that in chngpt.test, Z may already include chngptvar if threshold.type is segmented
    
    # higher order polynomial 
    has.cubic=startsWith(threshold.type,"cubic") | contain(threshold.type, "3") 
    has.quad =startsWith(threshold.type,"quad")  | contain(threshold.type, "2") | has.cubic
        
    # stratification, a.k.a. covariate-dependent threshold regression
    if (is.null(formula.strat)) {
        stratified.by=NULL
        stratified=FALSE
    } else {
        # note that we have not actually checked stratification and upper hinge
        if(!threshold.type %in% c("hinge","upperhinge","segmented")) stop("stratification only implemented for hinge, upper hinge, and segmented threshold effects")
        if(!inherits(formula.strat, "formula")) stop("formula.strat needs to be a formula")
        strat.dat=model.matrix(formula.strat, data)[,-1,drop=F]
        if (ncol(strat.dat)!=1) {str(strat.dat); stop("something is wrong with formula.strat") }
        stratified.by=strat.dat[,1]
        if (!all(sort(unique(stratified.by))==c(0,1))) {stop("Only 0/1 or T/F stratification variables are supported for now.") }
        stratified=TRUE
        if(!est.method %in% c("default","fastgrid2","grid")) stop("est.method not supported for stratified")
        if(!family %in% c("gaussian")) stop("family not supported for stratified")
        n.0=sum(stratified.by==0)
        n.1=sum(stratified.by==1)
    }
    
    # dimensions
    n=nrow(Z)
    p.z=ncol(Z)
    p.2=switch(threshold.type, step=1, hinge=1, upperhinge=1, segmented=2, segmented2=2, stegmented=3)
    p.2.itxn=p.2*ifelse(has.itxn,2,1)
    p=p.z+p.2.itxn+1 #total number of paramters, including threshold
    if(stratified) {
        # if these dimensions are needed, e.g. in smoothapprox, they need to be updated
    }
        
    #### formula.new is the model to fit in grid search, data is made by make.chngpt.var
    # start with the components not dependent on threshold
    formula.new = formula.1
    # add the non-thresholded version of x if needed
    # include.x works through formula.new in grid search and through design matrix in fast grid search
    include.x=threshold.type %in% c("segmented","segmented2","stegmented","M21","M21c","M12","M31","M13","M111")
    if(include.x) formula.new = update(formula.new, as.formula("~.+"%.%chngpt.var.name)) 
    # add the components dependent on threshold
    if (!has.itxn) {
        formula.new=update(formula.new, get.f.alt(threshold.type, chngpt.var.name, modified.by=NULL, stratified=stratified, has.quad=has.quad, has.cubic=has.cubic))
    } else {
        for (each in z.1.name) {
            formula.new=update(formula.new, get.f.alt(threshold.type, chngpt.var.name, modified.by=each, stratified=stratified, has.quad=has.quad, has.cubic=has.cubic))
        }
    }    
    
    # weights
    # chngpt.glm.weights goes with the data because glm() is incredible that if the weights arg does not have global scope, it needs to be a variable name in the data frame
    # chngpt.glm.weights and does not get updated in the next block while weights are updated
    if (is.null(weights)) weights=rep(1,nrow(data)) 
    if (!is.numeric(weights)) stop("If weights is specified, it needs to be a numeric vector")
    data$chngpt.glm.weights=weights
    # create a copy of chngpt.glm.weights in the function scope so that do.regression works
    chngpt.glm.weights=data$chngpt.glm.weights  
    # offset, a hack to make glm call work
    if (is.null(offset)) offset=rep(0,nrow(data)) 
    if (!is.numeric(offset)) {
        str(offset)
        stop("If offset is specified, it needs to be a numeric vector")
    }
    data$chngpt.glm.offset=offset    
    chngpt.glm.offset=data$chngpt.glm.offset  
    do.regression=function(formula.new, data, family){
        if (family=="coxph") {
            fit = keepWarnings(survival::coxph(formula.new, data=data, weights=chngpt.glm.weights)) # interestingly and I don't understand why weights=data$chngpt.glm.weights fails
            
        } else if (has.re) {
            fit = keepWarnings(lme4::lmer(formula.new, data=data, REML=REML) )
            
        } else {
            #str(formula.new); str(data); str(chngpt.glm.weights); str(family)
            fit = keepWarnings(glm(formula.new, data=data, weights=chngpt.glm.weights, family=family, offset=chngpt.glm.offset) )
        }
        fit
    }
    
    # deal with cbind() in lhs in logistic regression 
    # convert cbind() to single column and update weights (but not chngpt.glm.weights, b/c formula is still cbind())
    if(is.matrix(y) & family=="binomial") {
        n.tmp <- y[,1]+y[,2]
        y <- ifelse(n.tmp == 0, 0, y[,1]/n.tmp)        
        weights <- n.tmp*weights
    }
    
    
    # sort data
    if(!stratified) {
        order.=order(chngpt.var)
        stratified.by.sorted=NULL
    } else {
        order.=order(stratified.by, chngpt.var)
        stratified.by.sorted=stratified.by[order.]
    }
    chngpt.var.sorted=chngpt.var[order.]
    Z.sorted=Z[order.,,drop=FALSE]
    y.sorted=y[order.]
    w.sorted=weights[order.]
    data.sorted=data[order.,]
    o.sorted = offset[order.]
    
    # print model info
    
    if (verbose) {
        print(formula.new)
        myprint(stratified)
        myprint(family)
        myprint(threshold.type, imodel, hinge.to.upperhinge)
        myprint(est.method, fastgrid.ok, var.type)
        myprint(has.quad, has.cubic, has.itxn)
        if (verbose>=2) myprint(p.z, p.2.itxn, p)
        if (var.type=="bootstrap") myprint(var.type, m.out.of.n, subsampling, bootstrap.type) 
        if(verbose>=3) cat("variable to be thresholded: ", chngpt.var.name, " \n")
    }
        
    # threshold candidates
#    if(is.null(chngpts)) 
        chngpts=get.chngpts(chngpt.var.sorted,lb.quantile,ub.quantile,n.chngpts=grid.search.max, stratified.by.sorted=stratified.by.sorted)
#    }
    if(verbose) myprint(chngpts)


    ###############################################################################################
    # estimation
    
    grid.search=function(){        
        if(fastgrid.ok & est.method%in%c("gridC", "fastgrid", "fastgrid2")) {
            #################################
            # fastgrid search
            #################################
        
#            # for fastgrid2 debugging
#            e=chngpts[1]; e2=chngpts[2]            
#            ve=(chngpt.var.sorted-e)*(chngpt.var.sorted<e)
#            ue=(chngpt.var.sorted-e)*(chngpt.var.sorted>e)
#            Ve1=cbind(ve**2) #(chngpt.var.sorted-e), (chngpt.var.sorted-e)**2, ve**3, ue**3)
#            print(t(Ve1)%*%Ve1)
##            e=e2
##            ve=(chngpt.var.sorted-e)*(chngpt.var.sorted<e)
##            ue=(chngpt.var.sorted-e)*(chngpt.var.sorted>e)
##            Ve2=cbind((chngpt.var.sorted-e), (chngpt.var.sorted-e)**2, ve**3, ue**3)
##            print(t(Ve2)%*%Ve2)
#            
#            desg=cbind(Z.sorted, if(include.x) chngpt.var.sorted)
#            #str(desg);# stop("debugging")
#            A <- solve(t(desg) %*% desg)
#            H <- desg %*% A %*% t(desg)
#            r <- as.numeric((diag(n)-H) %*% y.sorted)
#            print(t(Ve1) %*% r)
#            
#            # find sqrt of A
#            if (ncol(A)==1) {
#                A.sqrt=sqrt(A)
#            } else {
#                a.eig <- eigen(A)   
#                A.sqrt <- a.eig$vectors %*% diag(sqrt(a.eig$values)) %*% t(a.eig$vectors) # solve can be replaced by t b/c A is symmetrical
#            }
#            B = desg %*% A.sqrt                                    
#            
#            #r %*% Ve1 %*% solve(t(Ve1)%*%Ve1 - t(Ve1)%*%B%*%t(B)%*%Ve1) %*% t(r %*% Ve1) - r %*% Ve2 %*% solve(t(Ve2)%*%Ve2 - t(Ve2)%*%B%*%t(B)%*%Ve2) %*% t(r %*% Ve2)
#            
#            print(t(Ve1)%*%B)
                
    
#            # gridC:     Only used for a fair comparison of speed with fastgrid. It is not as robust as grid, which is an R implementation that handles weights more easily
#            # for upperhinge fastgrid2, get.liks.upperhinge is an R implementation for a simple case: linear, without weights. It is even slower than grid search, 
#            # but it implements a good algorithm and can be used to debug fastgrid2
#            logliks=get.liks.upperhinge (y.sorted,Z.sorted,chngpt.var.sorted,chngpts)   
        
#                # for fastgrid2 development, the first two argments may be replaced by:
#                cbind(B, chngpt.var.sorted), #design matrix, the last column is to be used as (x-e)+ in the C program
#                as.double(r), 
    
            # computes Y' H_e Y
            if(!stratified) {
                # weirdly, f.name cannot be put inline b/c rcmdcheck throws an error otherwise
                #f.name=est.method %.% ifelse(threshold.type %in% c("M22","M22c","step"), threshold.type, ifelse(has.cubic,"cubic", ifelse(has.quad,"quad",""))) %.%  "_" %.% family
                f.name=paste0(est.method, "_", family)
                if(verbose) cat(paste0("making call to ", f.name, " ...\n"))
                logliks = .Call(f.name
                    , as.integer(imodel)
                    , cbind(Z.sorted, chngpt.var.sorted, if(include.x) chngpt.var.sorted) #design matrix, the last column is to be used as (x-e)+ in the C program
                    , as.double(y.sorted-o.sorted) # note that this way of handling offset only works for linear regression
                    , as.double(w.sorted)
                    , as.integer(attr(chngpts,"index"))
                    , as.integer(verbose)
                    , 0 # bootstrap size
                    , 0 # subsampling or m.out.of.n sample size
                    , 0 # false
                    , 0 # sieve.dat
                )  
                # fastgrid is more sensitive to singularity of design matrix (A <- solve(t(desg) %*% desg) can lead to trouble)
                # the following check handles this exception more gracefully
                if(all(is.nan(logliks))) stop("fastgrid fails, probably due to singularity")
                #print(logliks)
            } else {
                #cbind(chngpt.var.sorted*c(rep.int(0,n.0),rep.int(1,n.1)), chngpt.var.sorted*c(rep.int(1,n.0),rep.int(0,n.1))), # added col
                f.name="twoD_"%.%family
                if(verbose) cat(paste0("making call to ", f.name, " ...\n"))
                #print(cbind(Z.sorted, chngpt.var.sorted))
                logliks = .Call(f.name 
                    , cbind(Z.sorted, if(include.x) chngpt.var.sorted) # X
                    , as.double(chngpt.var.sorted) # threshold variable
                    , as.double(y.sorted-o.sorted) # note that this way of handling offset only works for linear regression
                    , as.double(w.sorted) 
                    , as.integer(stratified.by.sorted)
                    , as.double(lb.quantile)
                    , as.double(ub.quantile)
                    , 0 # bootstrap size
                    , as.integer(verbose)
                )
                logliks=matrix(logliks, nrow=length(chngpts[[1]]), ncol=length(chngpts[[2]]))
            }
            # remember to to put as.double around y.sorted since if y.sort is integer, this throws an error b/c the cpp function expects real 
            # don't put as.double around a matrix since changes the matrix into a vector, which affects the ordering of elements
            
            # end fastgrid search
            
        } else {
            #################################
            # brute force grid search
            #################################
            
            logliks = if(!stratified & threshold.type!="M111") {
                # single threshold
                sapply (chngpts, function(e) {
                    data=make.chngpt.var(chngpt.var, e, threshold.type, data, b.transition, stratified.by)
                    #myprint(e); tmp=data[,c("x","x.mod.e")]; tmp=tmp[order(tmp[,"x"]),]; print(tmp); stop("debug")
                    #if(e==chngpts[1]) {tmp=model.matrix(formula.new, data); print(solve(t(tmp)%*%tmp))} #check for singularity
                    
                    fit = do.regression (formula.new, data, family)     
                    if(length(fit$warning)!=0 & verbose>=3) {myprint(e); print(fit$warning); print(coef(fit$value)); print((logLik(fit$value)))}                 
#                    #cat(sum(resid(fit$value)**2), " ") # to compare with fastgrid
#                    if (chngpts[2]==e) {# the index can be changed at different stages of debugging
#                        #print(model.matrix(formula.new, data))
#                        Ve=model.matrix(formula.new, data)[,3:4]
#                        tmp=cbind(1, data$z)
#                        H=tmp%*%solve(t(tmp)%*%tmp)%*%t(tmp)
#                        print(t(Ve)%*%H%*%Ve)
#                        print(t(Ve)%*%Ve-t(Ve)%*%H%*%Ve)
#                        #print(t(data$y) %*% H %*% data$y)
#                    }
                    if(has.re) {
                        if (re.choose.by.loglik) {
                            as.numeric(logLik(fit$value))
                        } else {
                            sum(w.sorted*(y.sorted-o.sorted)**2)-sum(resid(fit$value)**2)
                        }
                    } else {
                        if(family=="gaussian") {
                            sum(w.sorted*(y.sorted-o.sorted)**2)-sum(resid(fit$value)**2)
                        } else if(family=="quasipoisson") {
                            -deviance(fit$value)
                        } else {
                            as.numeric(logLik(fit$value))
                        }   
                    }
                    
                })
    
            } else if (threshold.type=="M111") {
                # two thresholds on x
                n.chngpts=length(chngpts)
                tmp=matrix(NA,nrow=n.chngpts,ncol=n.chngpts)
                for (i in 1:(n.chngpts-1)) {
                    for (j in (i+1):n.chngpts) {
                        data=make.chngpt.var(chngpt.var, chngpts[c(i,j)], threshold.type, data, b.transition, stratified.by)
                        fit = do.regression (formula.new, data, family)       
#                        if (i==2 & j==3) {
#                            #print(model.matrix(formula.new, data))
#                            Ve=model.matrix(formula.new, data)[,4:5]
#                            tmptmp=cbind(1, data$z, dat$x)
#                            H=tmptmp%*%solve(t(tmptmp)%*%tmptmp)%*%t(tmptmp)                            
#                            r=dat$y-H%*%dat$y
#                            myprint(t(Ve)%*%Ve, digits=10)
#                            #print(t(Ve)%*%H%*%Ve)
#                            myprint(t(Ve)%*%Ve-t(Ve)%*%H%*%Ve, digits=10)
#                            myprint(t(Ve)%*%r, digits=10)
#                            myprint(t(t(Ve)%*%r) %*% solve(t(Ve)%*%Ve-t(Ve)%*%H%*%Ve, t(Ve)%*%r), digits=10)
#                        }     
                        if(length(fit$warning)!=0 & verbose>=3) {print(fit$warning); print(coef(fit$value)); print((logLik(fit$value)))}                 
                        tmp[i,j] = if(family=="gaussian" & !has.re) sum(w.sorted*(y.sorted-o.sorted)**2)-sum(resid(fit$value)**2) else as.numeric(logLik(fit$value))                                
                    }
                }
                #print(tmp);# stop("debug")
                tmp
                
            } else {
                # two thresholds in two different strata defined by z
                if (verbose) { myprint(chngpts[[1]]); myprint(chngpts[[2]]) }                
                sapply (chngpts[[2]], simplify="array", function(f) {
                sapply (chngpts[[1]], simplify="array", function(e) {
                    data=make.chngpt.var(chngpt.var, c(e,f), threshold.type, data, b.transition, stratified.by)
                    #myprint(e,f); print(formula.new); print(chngpts); print(data[order.,c("z","x","x.mod.e","x.mod.f")]); #stop("rest")
                    fit = do.regression (formula.new, data, family)            
#                    cat(sum(resid(fit$value)**2), " ") # to compare with fastgrid
#                    if (chngpts[[1]][2]==e & chngpts[[2]][1]==f) {# the index can be changed at different stages of debugging
#                        #str(formula.new); #print(model.matrix(formula.new, data))
#                        Ve=model.matrix(formula.new, data)[,3:4]# 3:4 for upperhinge
#                        #print(Ve)
#                        tmp=cbind(1, data$z); H=tmp%*%solve(t(tmp)%*%tmp)%*%t(tmp)
#                        #print("t(Ve)%*%Ve"); print(t(Ve)%*%Ve)
#                        #print("t(Ve)%*%H%*%Ve"); print(t(Ve)%*%H%*%Ve)
#                        #print("t(Ve)%*%Ve-t(Ve)%*%H%*%Ve"); print(t(Ve)%*%Ve-t(Ve)%*%H%*%Ve)
#                        #stop("debug")
#                        #print(t(data$y) %*% H %*% data$y)
#                    }
                    if(length(fit$warning)!=0 & verbose>=3) {myprint(e); print(fit$warning); print(coef(fit$value)); print((logLik(fit$value)))}                 
                    if(family=="gaussian" & !has.re) sum(w.sorted*(y.sorted-o.sorted)**2)-sum(resid(fit$value)**2) else as.numeric(logLik(fit$value))            
                })
                })
            } # end if stratified
            
            if (verbose) myprint(logliks, digits=10)       
        } 
        
        #myprint(stratified, threshold.type, fastgrid.ok, est.method); str(chngpts)
        if (stratified) {
            e.final=arrayInd(which.max(logliks), .dim=dim(logliks))
            e.final=c(chngpts[[1]][e.final[1,1]], chngpts[[2]][e.final[1,2]]);  
                   
        } else if (!stratified & threshold.type!="M111") {
            e.final=chngpts[which.max(logliks)]
        
        } else if (!stratified & threshold.type=="M111" & fastgrid.ok & est.method%in%c("gridC", "fastgrid", "fastgrid2")) {
            # logliks actually holds estimated threshold
            e.final=logliks 
            logliks=NULL
        
        } else {
            e.final=arrayInd(which.max(logliks), .dim=dim(logliks)); 
            e.final=c(chngpts[e.final[1,1]], chngpts[e.final[1,2]]); 
        }
        #print(e.final)
        #attr(e.final, "glm.warn")=any(is.na(logliks))
        if(verbose>=2) {
            if(!stratified & threshold.type!="M111") {
                plot(chngpts, logliks, type="b", xlab="threshold")
                abline(v=e.final)
            } else {
                # comment out, to be moved a function, not good to have to import rgl because of error msg on linux about x11
#                # cannot use points3d to add a red point after plot
#                col=0*logliks; col[which.max(logliks)]=1; col=col+1
#                plot3d(cbind(expand.grid(chngpts[[1]],chngpts[[2]]), c(logliks)), xlab="f", ylab="e", zlab="loglik", type="p", col=c(col)) # from rgl package
            }
        }
        
        #for segmented, grid search is done based on upper hinge, but fit at the final e is based on hinge
        
        # fit glm using e.final
        data = make.chngpt.var(chngpt.var, e.final, threshold.type, data, b.transition, stratified.by) # note that b.transition is used here instead of b.search
        #if (hinge.to.upperhinge) data[[chngpt.var.name]]=-data[[chngpt.var.name]] #this line shouldn't be here
        fit = do.regression (formula.new, data, family)$value
        # return e.final and logliks through attr. If we return them as elements of fit, upon removing these elements, the type of fit changes to a list, not glm anymore
        attr(fit,"e.final")=e.final
        attr(fit,"logliks")=logliks
        fit
    
    } # end grid.search function
    
    
    
    if (verbose>=2) cat("find e.final\n")
    if (est.method %in% c("grid","gridC","fastgrid","fastgrid2")) {
    #### grid search
        best.fit=grid.search()   
        e.final=attr(best.fit,"e.final")        
        logliks=attr(best.fit,"logliks")
        attr(best.fit,"logliks")<-attr(best.fit,"e.final")<-NULL
        if (has.re) {
            coef.hat=c(lme4::fixef(best.fit), e.final)
        } else {
            coef.hat=c(coef(best.fit), e.final)
        }
        
        glm.warn=attr(e.final,"glm.warn")  
        
    } else if (est.method=="smoothapprox") {
    #### Newton-Raphson
        
        if(verbose>1) cat("smoothapprox search\n")
        e.init=chngpt.init
        if (is.null(e.init)) {
            if(verbose>1) cat("initializing through coarse grid search\n")
            # est.method has to be grid in the following, otherwise it will create an infinite loop
            tmp=chngptm (formula.1, formula.2, family=family, data, type=threshold.type, weights=weights, offset=offset, est.method="grid", lb.quantile=lb.quantile, ub.quantile=ub.quantile, grid.search.max=50, 
                        b.transition=b.transition, search.bound=search.bound, keep.best.fit=FALSE, var.type="none", verbose=ifelse(verbose>3,verbose-3,0))# pass verbose-3 directly seems to fail to do the job 
            e.init=tmp$chngpt
        } 
        
        if(length(e.init)==0) {
            # if there are no initial values for some reason, just do grid search
            e.final=attr(grid.search(), "e.final")
            glm.warn=attr(e.final,"glm.warn")
            
        } else {
            # smooth approximation 
            names(e.init)="e"
            if (verbose>1) cat("init e: ", e.init, " ...\n")
            glm.warn=FALSE
            
            coef.hat=rep(0, 1+ncol(Z)+p.2.itxn)
            n.iter=0
            converged=TRUE   
            e.effective.maxit=numeric(maxit)     
            while(TRUE){    
            
                n.iter=n.iter+1
                if (n.iter>maxit) {converged=FALSE; break}
                if (verbose>1) cat("iter ", n.iter, "\t")
                
                # remake the binary change point variable in every iteration based on the change point estimate from last iteration
                data = make.chngpt.var(chngpt.var, e.init, threshold.type, data, b.search) # b.search is used here to be consistent with optim which uses b.search as well
                fit.0 = do.regression(formula.new, data, family)$value
                
                # update threshold and associated coefficients
                beta.init=coef(fit.0)[p.z+1:p.2.itxn]; #names(beta.init)=c("beta1","beta2")# we may need this for variance calculation to work
                alpha.hat=coef(fit.0)[1:p.z]
                stopifnot(all(!is.na(alpha.hat)))
                alpha.z = c(Z %*% alpha.hat)
    
                # search for better e and slopes associated with x and thresholded x
                optim.out = try(optim(par=c(beta.init, e.init), 
                      fn = get("dev."%.%threshold.type%.%"."%.%ifelse(has.itxn,"itxn.","")%.%"f"), 
                      gr = NULL,
                      #gr = get("dev."%.%threshold.type%.%"."%.%ifelse(has.itxn,"itxn.","")%.%"deriv.f"), 
    #                  # if we use analytical gradient function by deriv3 in optim, we can get situations like exp(100), which will be Inf, and Inf/Inf will be NaN
    #                  fn = function(theta,...) sum(dev.step.itxn.deriv(theta[1],theta[2],theta[3],...)), 
    #                  gr = function(theta,...) colSums(attr(dev.step.itxn.deriv(theta[1],theta[2],theta[3],...), "gradient")), 
                      chngpt.var, y, b.search, alpha.z, z.1, family, weights,
                      lower = c(rep(-search.bound,length(beta.init)), quantile(chngpt.var, lb.quantile)), 
                      upper = c(rep( search.bound,length(beta.init)), quantile(chngpt.var, ub.quantile)), 
                      method="L-BFGS-B", control = list(), hessian = TRUE))
                      
                if(inherits(optim.out,"try-error")) {
                    if (verbose) cat("error doing smoothapprox search, switch to grid search\n")
                    e.final=attr(grid.search(), "e.final")
                    glm.warn=attr(e.final,"glm.warn")
                    break;
                }
                            
                e.init=optim.out$par["e"]
                # to avoid algorithm ocillation due to artifact
                e.effective.maxit[n.iter]=which(chngpt.var.sorted>=e.init)[1]
    #            # set the "effective" change point to the data point that is just to the right of the optim output. This leads to more clear indications when perfect separate happens
                e.init=mean(chngpt.var.sorted[e.effective.maxit[n.iter]])
                # set the "effective" change point to the mid point between the two data points that sandwich the optim output. 
    #            e.init=mean(chngpt.var.sorted[e.effective.maxit[n.iter]+(-1):0])
                names(e.init)="e" # this is needed for optim to work
                
                coef.tmp=c(alpha.hat, optim.out$par)
                coef.tmp[length(coef.tmp)]=e.init # set to effective change point
                if (verbose>1) cat(coef.tmp, "\n")
                if (max(abs(coef.tmp - coef.hat)) < tol) {
                    coef.hat = coef.tmp
                    e.final=last(coef.hat)
                    break
                } else {
                    coef.hat = coef.tmp
                    e.final=last(coef.hat)
                }
                
            } # end while 
        } # end if else
    
        # fit glm using e.final
        data = make.chngpt.var(chngpt.var, e.final, threshold.type, data, b.transition) # note that b.transition is used here instead of b.search
        #if (hinge.to.upperhinge) data[[chngpt.var.name]]=-data[[chngpt.var.name]] # this line probably shouldn't be here
        best.fit = do.regression (formula.new, data, family)$value
        coef.hat=c(coef(best.fit), "chngpt"=e.final)
        
    } else stop("wrong est.method") # end if grid/smoothapprox
    
    
    if(!stratified) {
        if(threshold.type=="M111") {
            names(coef.hat)[length(coef.hat)]  ="chngpt.2"
            names(coef.hat)[length(coef.hat)-1]="chngpt.1"
        } else names(coef.hat)[length(coef.hat)]="chngpt"
        new.names=name.conversion(threshold.type, chngpt.var.name, names(coef.hat), hinge.to.upperhinge)    
    } else {
        names(coef.hat)[length(coef.hat)-1:0]=c("chngpt.0","chngpt.1")
        if (threshold.type %in% c("hinge","segmented","segmented2")) {
            new.names=sub("x.mod.e", "("%.%chngpt.var.name%.%".0-chngpt.0)+", names(coef.hat))    
            new.names=sub("x.mod.f", "("%.%chngpt.var.name%.%".1-chngpt.1)+", new.names)    
        } else if (threshold.type %in% c("upperhinge")) {
            new.names=sub("x.mod.e", "("%.%chngpt.var.name%.%".0-chngpt.0)-", names(coef.hat))    
            new.names=sub("x.mod.f", "("%.%chngpt.var.name%.%".1-chngpt.1)-", new.names)    
        } else stop("wrong threshold type")
    }
    if (verbose>1) cat(new.names,"\n")
    names(coef.hat)=new.names
    
    # convert now, otherwise there may be trouble with bootstrap
    if (hinge.to.upperhinge) {
        if(!stratified) {
            coef.hat["chngpt"]=-coef.hat["chngpt"]
        } else {
            # assuming two strata
            coef.hat["chngpt.0"]=-coef.hat["chngpt.0"]
            coef.hat["chngpt.1"]=-coef.hat["chngpt.1"]
        }
        tmp=chngpt.var.name;                           if(!is.na(coef.hat[tmp])) coef.hat[tmp]=-coef.hat[tmp]
        tmp=paste0("(",chngpt.var.name,"-chngpt)+");   if(!is.na(coef.hat[tmp])) coef.hat[tmp]=-coef.hat[tmp]
        tmp=paste0("(",chngpt.var.name,"-chngpt)+^3"); if(!is.na(coef.hat[tmp])) coef.hat[tmp]=-coef.hat[tmp]
    }
    

    ###############################################################################################
    # variance estimate
    
    warn.var=NULL
    
    # only implemented when there is no interaction and for linear and logistic regression
    if (var.type!="bootstrap" & (has.itxn | !family %in% c("binomial","gaussian"))) {
        var.est=NULL
    } else {
    
        ci.bootstrap=function(){
            if(verbose>1) cat("\nin ci.bootstrap\n")            
            
            # save rng state before set.seed in order to restore before exiting this function
            save.seed <- try(get(".Random.seed", .GlobalEnv), silent=TRUE) 
            if (inherits(save.seed,"try-error")) {set.seed(1); save.seed <- get(".Random.seed", .GlobalEnv) }      
            
            if (fastgrid.ok & est.method%in%c("fastgrid","fastgrid2","gridC")) {
                if (bootstrap.type=="nonparametric") {
                    sieve.y=NULL
                } else {
                    # the functions surrogate.AR, awb, etc are defined at the end of this file
                    # this assumes that the ordering of observations is based on the chngpt variable
                    
                    # because sieve.y is a matrix of ci.bootstrap.size columns, we have to do this
                    if(ncpus>1) stop("when bootstrap.type is not nonparametric, ncpus has to be set to 1 for now")
                    
                    set.seed(1) # only in effect for this data generation because the seed is set again before the bootstrap 
                    
                    sieve.y = predict(best.fit) + 
                        if (bootstrap.type=="sieve") {
                            surrogate.AR (resid(best.fit), order.max=order.max, nsurr=ci.bootstrap.size, wild=FALSE)$surr 
                        } else if (bootstrap.type=="wildsieve") {
                            surrogate.AR (resid(best.fit), order.max=order.max, nsurr=ci.bootstrap.size, wild=TRUE)$surr 
                        } else if (bootstrap.type=="wild") {
                            replicate(ci.bootstrap.size, resid(best.fit) * rnorm(n))
                        } else if (bootstrap.type=="awb") {
                            replicate(ci.bootstrap.size, awb(resid(best.fit), chngpt.var.sorted))
                        } else stop("wrong value for bootstrap.type: "%.%bootstrap.type)
                        
                }
            
                # call a c function that implements bootstrap
                boot.out=list()
                class(boot.out)="boot"
                boot.out$t0=coef.hat
                boot.out$R=ci.bootstrap.size
                boot.out$call=c("boot")
                boot.out$sim="ordinary"
                boot.out$stype="i"
                boot.out$fake=TRUE
                
                # give the bootstrap to multiple cores
                # each core gets a seed to differentiate it from others
                tmp=mclapply(1:ncpus, mc.cores = ncpus, FUN=function(seed) {     
                    set.seed(seed)   
                               
                    if(!stratified) {
                        f.name=paste0(est.method, "_", family)
                        if(verbose) cat(paste0("making call to ", f.name, "...\n"))
                        .Call(f.name
                            , as.integer(imodel)
                            , cbind(Z.sorted, chngpt.var.sorted, if(include.x) chngpt.var.sorted) #design matrix, the last column is to be used as (x-e)+ in the C program
                            , as.double(y.sorted-o.sorted) # note that this way of handling offset only works for linear regression
                            , as.double(w.sorted)
                            , as.integer(attr(chngpts,"index"))
                            , as.integer(verbose)
                            , as.integer(ci.bootstrap.size/ncpus)
                            , n.sub # subsampling or m.out.of.n sample size
                            , m.out.of.n>0 # with replacement
                            , sieve.y
                        )     
                    } else {
                        f.name="twoD_"%.%family
                        if(verbose) cat(paste0("making call to ", f.name, "...\n"))
                        .Call(f.name
                            , cbind(Z.sorted, if(include.x) chngpt.var.sorted) # X
                            , as.double(chngpt.var.sorted) # threshold variable
                            , as.double(y.sorted-o.sorted) # note that this way of handling offset only works for linear regression 
                            , as.double(w.sorted)
                            , as.integer(stratified.by.sorted)
        #                    as.integer(attr(chngpts[[1]],"index")), # potential thresholds 
        #                    as.integer(attr(chngpts[[2]],"index")), # potential thresholds
                            , as.double(lb.quantile)
                            , as.double(ub.quantile)
                            , as.integer(ci.bootstrap.size/ncpus) # bootstrap size
                            , as.integer(verbose)
                        )
                    }
                    
                }) # end mclapply
                boot.out$t=do.call(cbind, tmp) # cbind b/c because tmp is a list of vectors
                #str(boot.out$t); print(coef.hat); 
        
                tmp=t(matrix(boot.out$t, ncol=ci.bootstrap.size, dimnames=list(names(coef.hat), NULL)))                
                
                # for segmented, need to reparameterize b/c .C implements upperhinge based parameterization
                if (threshold.type=="segmented") {
                    tmp[,1] = tmp[,1] - tmp[,ncol(tmp)-1] * tmp[,ncol(tmp)]
                    tmp[,ncol(tmp)-2] = tmp[,ncol(tmp)-2] + tmp[,ncol(tmp)-1]
                    tmp[,ncol(tmp)-1] = -tmp[,ncol(tmp)-1]
                }
                boot.out$t=tmp                                        
    
            } else {
            # grid search
                set.seed(1)     
                
                boot.out=boot::boot(data.sorted, R=ci.bootstrap.size, sim = "ordinary", stype = "i", parallel = ifelse(ncpus==1,"no","multicore"), ncpus=ncpus, statistic=function(dat, ii){
                    # this function is run R+1 times, the first time is the data itself without resampling
                    #print(ii); print(dat[sort(ii),c("z","x","y")])
                    if (m.out.of.n>0) ii=ii[1:m.out.of.n] # m out of n bootstrap with replacement, one rule of thumb 4*sqrt(n)
                    # use chngpt.init so that the mle will be less likely to be inferior to the model conditional on e.hat, but it does not always work
                    # using chngpt.init makes bootstrap much faster though
                    # for studentized bootstrap interval, need variance estimates as well, however, stud performs pretty badly
                    fit.ii=try(chngptm (formula.1, formula.2, family, dat[ii,], 
                        type=threshold.type, 
                        formula.strat=formula.strat,
                        weights=w.sorted[ii], offset=o.sorted[ii], 
                        REML=REML, re.choose.by.loglik=re.choose.by.loglik,
                        est.method=est.method, var.type="none", 
                        b.transition=b.transition, verbose=ifelse(verbose>1, verbose-1, FALSE), keep.best.fit=TRUE, lb.quantile=lb.quantile, ub.quantile=ub.quantile, 
                        chngpt.init=coef.hat["chngpt"], 
                        grid.search.max=grid.search.max))
                    tmp=length(coef.hat)*1+ifelse(!boot.test.inv.ci, 0, ifelse(family=="gaussian",2,1)) # need to adjust depending on the last else clause
                    out = if(inherits(fit.ii,"try-error")) {
                        rep(NA,tmp) 
                    } else if (any(is.na(fit.ii$coefficients))) {
                        rep(NA,tmp) 
#                        # the next if is controversial, but at least has no impact on qudratic/250/gaussian
#                        } else if (any(abs(fit.ii$coefficients[1:(length(fit.ii$coefficients)-1)])>search.bound)) {
#                            rep(NA,tmp)
                    } else {
                        if(!boot.test.inv.ci) {
                            # bootstrap datasets model fits may not have all the factor levels as the original dataset, that is why we cannot just return and neeed to select here
                            if(hinge.to.upperhinge) {
                            # if it is a converted problem, names are different
                                tmp.name=sub("-chngpt)\\+","-chngpt)-",new.names)
                                fit.ii$coefficients[tmp.name]
                            } else {
                                fit.ii$coefficients[new.names]
                            }
                        } else {
                            # compute profile likelihood ratio
                            # pl can be negative even when grid search is used in both point estimate and bootstrap
                            # this is because the estimated chngpt may not be in the {x} of the bootstrapped dataset, and a value in between can be better than {x}
                            dat.tmp=make.chngpt.var(chngpt.var[ii], coef.hat["chngpt"], threshold.type, data[ii,], b.transition) # don't forget to do chngpt.var[ii]!
                            fit.ii.ehat=do.regression (formula.new, data=dat.tmp, family); if(length(fit.ii.ehat$warning)!=0 & verbose) print(fit.ii.ehat$warning)
                            pl= 2*(as.numeric(logLik(fit.ii$best.fit)) - as.numeric(logLik(fit.ii.ehat$value)))
                            Fs=n*(sigma(fit.ii.ehat$value)^2/sigma(fit.ii$best.fit)^2-1) # F statistic, an approximation of pl, but may work better for linear regression
                            if(pl<0 & verbose) {
                                print(summary(fit.ii$best.fit))
                                print("------------------------------------------------------------")
                                print(summary(fit.ii.ehat$value))
                                print("------------------------------------------------------------")
                                print(summary(fit.ii))
                                print(sort(dat[ii,"x"]))
                                #stop("debugging")
                            }
                            c(fit.ii$coefficients, pl, if(family=="gaussian") Fs) # for Gaussian, return both pl and Fs
                        }  
                    }
                    out                        
                }) # end boot call
                #str(boot.out$call)
            }
            # restore rng state 
            assign(".Random.seed", save.seed, .GlobalEnv)     
            
            if (hinge.to.upperhinge) {
            # switch signs of some coefficients as needed
                tmp1=which(paste0("(",chngpt.var.name,"-chngpt)+")==names(coef.hat))
                boot.out$t[,tmp1]=(-boot.out$t[,tmp1])
                boot.out$t0[tmp1]=(-boot.out$t0[tmp1])
                
                tmp1=which(paste0("(",chngpt.var.name,"-chngpt)+^3")==names(coef.hat))
                boot.out$t[,tmp1]=(-boot.out$t[,tmp1])
                boot.out$t0[tmp1]=(-boot.out$t0[tmp1])
                
                tmp1=which("chngpt"==names(coef.hat))
                boot.out$t0[tmp1]=(-boot.out$t0[tmp1])
                boot.out$t[,tmp1]=(-boot.out$t[,tmp1])
                
                tmp1=which(chngpt.var.name==names(coef.hat))
                boot.out$t[,tmp1]=(-boot.out$t[,tmp1])
                boot.out$t0[tmp1]=(-boot.out$t0[tmp1])
            }
    
            boot.samples=boot.out$t            
            if(is.null(colnames(boot.samples))) colnames(boot.samples)=c(names(coef.hat), rep("",ncol(boot.samples)-length(coef.hat)))
                        
            # use boot.ci to get CI has a problem when hinge.to.upperhinge is on
            outs=list(perc=NULL, basic=NULL, bc=NULL) # this fixes the order of the list components
#            for (i in 1:length(coef.hat)){
#                # for stud, index needs to be of length 2 and the second element needs to be var. If the second element is not present, will have warnings: index out of bounds; minimum index only used
#                #ci.out=try(boot.ci(boot.out, type=c("perc","basic",if(ci.bootstrap.size>n) "bca","stud"), index=c(i, i+length(coef.hat))))
#                # invisible is used here because boot.ci prints messages about having only 1 bootstrap replicate when running unit testing code
#                capture.output({
#                  ci.out=suppressWarnings({
#                    #try(boot.ci(boot.out, type=c("perc","basic",if(ci.bootstrap.size>n & is.null(boot.out$fake)) "bca"), index=i), silent=TRUE)# bca requires a true boot.out object
#                    try(boot::boot.ci(boot.out, type=c("perc","basic"), index=i), silent=TRUE)# bca requires a true boot.out object, changed to this for performance timing to be fair to true boot call
#                  })
#                })
#                # suppressWarnings changes the return type
#                outs$perc= cbind(outs$perc,  if(!inherits(ci.out,"bootci")) c(NA,NA) else ci.out$percent[1,4:5])
#                outs$basic=cbind(outs$basic, if(!inherits(ci.out,"bootci")) c(NA,NA) else ci.out$basic[1,4:5])  
#                #outs$abc=  cbind(outs$abc,   if(!inherits(ci.out,"bootci")) c(NA,NA) else if(ci.bootstrap.size>n) ci.out$bca[1,4:5] else c(NA,NA) ) # for bca to work, the number of bootstrap replicates needs to be greater than sample size
#                #outs$stud= cbind(outs$stud,  if(!inherits(ci.out,"bootci")) c(NA,NA) else ci.out$student[1,4:5])
#            }
#            colnames(outs$perc)<-colnames(outs$basic)<-names(coef.hat) # <-colnames(outs$abc)
    
            outs$perc=sapply (1:length(coef.hat), function(i) {
                quantile(boot.samples[,i], c(alpha/2,1-alpha/2), na.rm=TRUE)
            })
            colnames(outs$perc)<-names(coef.hat) 
            outs$basic=sapply (1:length(coef.hat), function(i) {
                tmp=quantile(boot.samples[,i], c(alpha/2,1-alpha/2), na.rm=TRUE)
                c(2*coef.hat[i]-tmp[2], 2*coef.hat[i]-tmp[1])
            })
            colnames(outs$basic)<-names(coef.hat) 
            # find bc CI from boot.out$t as ci.out does not provide bc
            outs$bc=sapply (1:length(coef.hat), function(i) {
                z.0=qnorm(mean(boot.samples[,i]<coef.hat[i], na.rm=TRUE))                    
                quantile(boot.samples[,i], c(pnorm(2*z.0+qnorm(alpha/2)), pnorm(2*z.0+qnorm(1-alpha/2))), na.rm=TRUE)
            })
            colnames(outs$bc)<-names(coef.hat) 
            # symmetric percentile CI as defined in Hansen (2017)
            outs$symm=sapply (1:length(coef.hat), function(i) {
                q.1=quantile(abs(boot.samples[,i]-coef.hat[i]), 1-alpha, na.rm=TRUE)
                coef.hat[i]+c(-q.1, q.1)
            })                
            colnames(outs$symm)<-names(coef.hat) 
            rownames(outs$symm)<-c((alpha/2*100)%.%"%",(100-alpha/2*100)%.%"%") 
            
            # compute test inversion CI for threshold based on bootstrap critical value
            if(boot.test.inv.ci) {
                outs$testinv=matrix(NA,nrow=2,ncol=length(coef.hat))
                colnames(outs$testinv)<-names(coef.hat)
                if(verbose>=2) print(summary(boot.samples[,1+length(coef.hat)]))
                if (family=="gaussian") {
                    # print both c.alpha for comparison, but use Fs-based, because it seems to work better
                    c.alpha=quantile(boot.samples[,1+length(coef.hat)], 1-alpha, na.rm=TRUE); if(verbose) myprint(c.alpha)
                    #c.alpha=quantile(boot.samples[,2+length(coef.hat)], 1-alpha, na.rm=TRUE); if(verbose) myprint(c.alpha)
                    if (!is.na(c.alpha)) outs$testinv[,length(coef.hat)]= ci.test.inv.F(c.alpha) 
                } else {
                    c.alpha=quantile(boot.samples[,1+length(coef.hat)], 1-alpha, na.rm=TRUE); if(verbose) myprint(c.alpha)
                    if (!is.na(c.alpha)) outs$testinv[,length(coef.hat)]= ci.test.inv(c.alpha)                     
                }
            }
     
            # the following percentile CI is numerically different from boot.ci results because the latter use norm.inter to do interpolation on the normal quantile scale
            #ci.perc=apply(boot.out$t, 2, function (x) quantile(x, c(alpha/2, 1-alpha/2), na.rm=TRUE))   
                            
#                # this takes a long time and returns non-sensible result
#                ci.abc=abc.ci(data, statistic=function(dat, ww){
#                        fit.ii=try(chngptm (formula.1, formula.2, family, dat, threshold.type, est.method="smoothapprox", var.type="none"))
#                        # note that we cannot use | in the following line
#                        if(inherits(fit.ii,"try-error")) 
#                            rep(NA,length(coef.hat)) 
#                        else if (any(abs(fit.ii$coefficients[1:(length(fit.ii$coefficients)-1)])>search.bound)) 
#                            rep(NA,length(coef.hat)) 
#                        else fit.ii$coefficients
#                    }, 
#                    index=4, conf=1-alpha
#                )
#                myprint(ci.abc)
            
            if(save.boot) outs[["boot.samples"]]=boot.samples            
            outs                
        }# end ci.bootstrap
    
            
        # model-based
        var.est.model=function(test.inv.ci, robust=FALSE){
            if(verbose>1) cat("in var.est.model\n")
    
            # set up some variables
            e=coef.hat["chngpt"]
            beta.4=coef.hat["("%.%chngpt.var.name%.%"-chngpt)"%.%ifelse(threshold.type=="upperhinge","-","+")]
    
            # x.tilda
            if(threshold.type %in% c("hinge","segmented")) {
                x.gt.e=chngpt.var>e
                if (b.transition==Inf) {
                    x.tilda=cbind(Z, if(threshold.type=="segmented") chngpt.var, (chngpt.var-e)*x.gt.e, -beta.4*x.gt.e)
                } else {
                    x.tilda=cbind(Z, if(threshold.type=="segmented") chngpt.var, (chngpt.var-e)*expit(b.transition*(chngpt.var-e)), -beta.4*expit(b.transition*(chngpt.var-e))*(1+b.transition*(chngpt.var-e)*(1-expit(b.transition*(chngpt.var-e)))))
                }               
            } else if(threshold.type %in% c("upperhinge")) {
                x.lt.e=chngpt.var<e
                if (b.transition==Inf) {
                    x.tilda=cbind(Z, (chngpt.var-e)*x.lt.e, -beta.4*x.lt.e)
                } else {
                    stop("var.est.model: not yet implemented")
                    #x.tilda=cbind(Z, (chngpt.var-e)*expit(b.transition*(chngpt.var-e)), -beta.4*expit(b.transition*(chngpt.var-e))*(1+b.transition*(chngpt.var-e)*(1-expit(b.transition*(chngpt.var-e)))))
                }               
            } else if (threshold.type=="segmented2") {
                # earlier we checked that b.transition should not be Inf
                x.tilda=cbind(Z, chngpt.var, chngpt.var*expit(b.transition*(chngpt.var-e)),                               -beta.4*expit(b.transition*(chngpt.var-e))        *b.transition*chngpt.var*(1-expit(b.transition*(chngpt.var-e))) )
            } else stop("threshold.type not recognized in var.est.model")
            
            if (family=="binomial") {
                p.est=drop(expit(x.tilda[,-p] %*% coef.hat[-p]))                                                  
                if(!robust) {
                    V=-tXDX(x.tilda, p.est*(1-p.est))/n    
                    var.est=solve(-V)/n                        
                } else {
                    # sandwich estimation
                    B.inv=solve(tXDX(x.tilda, p.est*(1-p.est))/n)
                    M=tXDX(x.tilda, resid(best.fit,type="response")**2)/n    
                    var.est=B.inv %*% M %*% B.inv/n                        
                }
            } else if (family=="gaussian") {
                if(!robust) {
                    V=-t(x.tilda) %*% (x.tilda)/n/sigma(best.fit)^2
                    var.est=solve(-V)/n                        
                } else {
                    # sandwich estimation
                    B.inv=solve(t(x.tilda) %*% (x.tilda)/n)
                    M=tXDX(x.tilda, resid(best.fit,type="response")**2)/n    
                    var.est=B.inv %*% M %*% B.inv/n                        
                }
            }
            #myprint(e); print(eigen(V)$values); print(x.tilda[order(chngpt.var),])
            
            rownames(var.est) <- colnames(var.est) <- names(coef.hat)                
            
            # profile likelihood ratio test inversion CI
            if(test.inv.ci) {
                chngpt.ci=ci.test.inv(qchisq(1-alpha, df=1) * 1)
                attr(var.est,"chngpt.ci")=chngpt.ci
            }
            
            var.est
        }
    
        var.est.robust=function(aux.fit, test.inv.ci, true.param=FALSE){
            if(verbose>1) cat("in var.est.robust\n")
                            
            # compute density of X at change point. have to use coef.hat["chngpt"] for change point here b/c e is defined later
            den=density(chngpt.var)
            pt1=which(den$x>=coef.hat["chngpt"])[1]
            f.x.e = interpolate(c(den$x[pt1],den$y[pt1]), c(den$x[pt1-1],den$y[pt1-1]), coef.hat["chngpt"])   # interploate to find the density at change point
            
            # debugging only: use population parameter values to calculate variance
            if (true.param) {
                # quadratic model                    
                coef.hat=c("(Intercept)"=-2.8297600157,   "z"=0.3378716621,   "x"=0.5526491719,   "(x-chngpt)+"=1.3409043525,   "chngpt"=3.7541306365) 
                f.x.e=dnorm(3.75, 4.7, 1.4) #0.22
                ## sigmoid2, in sim studies, we don't need robust variance estimate for sigmoid2
                #coef.hat=c("(Intercept)"=-0.09428639728, "z"=log(1.4), "x"=-log(0.67), "(x-chngpt)+"=log(0.4), "chngpt"=4.5) 
                #f.x.e=dnorm(4.5, 4.7, 1.6) 
            }
            
            # set up some variables
            e=coef.hat["chngpt"]
            if (threshold.type=="upperhinge") {
                x.lt.e=chngpt.var<e
                beta.4=coef.hat["("%.%chngpt.var.name%.%"-chngpt)-"]                    
                # x.tilda
                x.tilda=cbind(Z, (chngpt.var-e)*x.lt.e, -beta.4*x.lt.e)
            } else {
                x.gt.e=chngpt.var>e
                beta.4=coef.hat["("%.%chngpt.var.name%.%"-chngpt)+"]                    
                # x.tilda
                x.tilda=cbind(Z, if (threshold.type=="segmented") chngpt.var, (chngpt.var-e)*x.gt.e, -beta.4*x.gt.e)
            }
            
            antilink=get(family)()$linkinv
            mu.est=drop(antilink(x.tilda[,-p] %*% coef.hat[-p]))        
            M=tXDX(x.tilda, resid(best.fit,"response")^2)/n # has to use the robust, sandwich estimation version. Note the threshold.type needs to be response for glm
            # non-robust version is not justified because it depends on the assumption that mean model is correct
            #M=t(x.tilda)%*%x.tilda *sigma(best.fit)^2 /n 
            
            if (family=="binomial") {
                V.1=-tXDX(x.tilda, mu.est*(1-mu.est))/n    
            } else if (family=="gaussian") {
                V.1=-t(x.tilda) %*% (x.tilda)/n                    
            }
            
            # V.2 is the same between binomial and gaussian families
            V.2=matrix(0,nrow=p,ncol=p)
            # there are two ways to compute the off-diagonal element. when use true coefficients, it works better to use predicted response by true model; when use estimated, using y works better
            #V.2[p,p-1]<-V.2[p-1,p]<- if(is.null(attr(aux.fit,"truemodel"))) -mean((y-mu.est)*x.gt.e) else -mean((predict(aux.fit, data, "response")-mu.est)*x.gt.e)
            V.2[p,p-1]<-V.2[p-1,p]<- -mean((y-mu.est)*if(threshold.type=="upperhinge") x.lt.e else x.gt.e) 
            # compute V.2[p, p]
            newdata=data; newdata[[chngpt.var.name]]=e
            m.0=mean(predict(aux.fit, newdata, "response"))
            p.e.z.0=mean(antilink(cbind(Z, if (threshold.type=="segmented") e) %*% coef.hat[1:(p-2)]))                
            V.2[p, p]= ifelse(threshold.type=="upperhinge",-1,1)* beta.4 * f.x.e * (m.0-p.e.z.0)
            V=V.1+V.2    
            V.inv=solve(V) 
            
            var.est=V.inv%*%M%*%V.inv/n       
            rownames(var.est) <- colnames(var.est) <- names(coef.hat)                
            
            if(verbose==2){
                print(V.1)
                print(V.2)
                myprint(diag(V))
                myprint(diag(V.inv))
                cat("eigen values: \n")
                myprint(round(eigen(V.1)$values, 5))
                myprint(round(eigen(V.2)$values, 5))
                myprint(round(eigen(V)$values, 5))
#                    myprint(beta.4, f.x.e, m.0, p.e.z.0, m.0-p.e.z.0)
#                    myprint(mean(y*x.gt.e), mean(p.est*x.gt.e))
#                    myprint(mean(predict(aux.fit, data, "response") * x.gt.e))
            }    
            
            # profile likelihood ratio test inversion CI
            if(test.inv.ci) {
                scale.chisq= last(diag(var.est))/(-last(diag(V.inv))/n) 
                if (verbose>=2) {
                    myprint(last(diag(var.est)), -last(diag(V.inv))/n, scale.chisq)
                }
                if (scale.chisq<0) {
                    cat("scale.chisq is negative\n")
                    scale.chisq=abs(scale.chisq)
                }                     
                if (family=="binomial") {
                    # no need to do anything
                } else if (family=="gaussian") {
                    scale.chisq=scale.chisq/sigma(best.fit)^2
                }
                chngpt.ci=ci.test.inv(qchisq(1-alpha, df=1) * scale.chisq)
                attr(var.est,"chngpt.ci")=chngpt.ci
            }
    
            var.est
        }
        
        # c.alpha is the critical value for diff in deviance
        ci.test.inv=function(c.alpha){
            if (verbose>1) cat("in ci.test.inv\n")
            
            # TODO: we do a better job when it is grid-based search since we have already computed the profile likelihood 
#                if (est.method %in% c("grid","fastgrid","fastgrid2")) {
#                    c(NA,NA)# todo
#                } else if (est.method=="smoothapprox") {
                
                idx.chngpt=which(chngpt.var.sorted>=coef.hat["chngpt"])[1]
                data = make.chngpt.var(chngpt.var, chngpt.var.sorted[idx.chngpt], threshold.type, data, b.transition)
                fit=do.regression (formula.new, data, family)
                lik.max = as.numeric(logLik(fit$value))
                
                profile.liks=rep(NA, length(chngpt.var.sorted))
                
                # go left
                lik=lik.max
                idx=idx.chngpt
                #myprint(idx, lik, c.alpha)
                while (2*(lik.max-lik)<c.alpha) {
                    profile.liks[idx]=lik
                    idx=idx-1
                    if(idx==0) {
                        #if (verbose) warning("no more data on the left")
                        lik=NA; break
                    }
                    data=make.chngpt.var(chngpt.var, chngpt.var.sorted[idx], threshold.type, data, b.transition)
                    fit = do.regression (formula.new, data, family)
                    if(length(fit$warning)!=0) {
                        #if(verbose) print(fit$warning)
                        lik = NA; break
                    } else lik = as.numeric(logLik(fit$value))                             
                } 
                #lb=if(is.na(lik)) NA else chngpt.var.sorted[idx+1] 
                lb=chngpt.var.sorted[idx+1] 
                
                # go right
                lik=lik.max
                idx=idx.chngpt
                while (2*(lik.max-lik)<c.alpha) {
                    profile.liks[idx]=lik
                    idx=idx+1
                    if(idx>n) {
                        #if (verbose) warning("no more data on the right")
                        lik=NA; break
                    }
                    data=make.chngpt.var(chngpt.var, chngpt.var.sorted[idx], threshold.type, data, b.transition)
                    fit = do.regression (formula.new, data, family)
                    if(length(fit$warning)!=0) {
                        #if(verbose) print(fit$warning)
                        lik = NA; break
                    } else lik = as.numeric(logLik(fit$value))     
                } 
                #ub=if(is.na(lik)) NA else chngpt.var.sorted[idx-1]
                ub=chngpt.var.sorted[idx-1]
                
                if (verbose==2) {
                    plot(chngpt.var.sorted, 2*profile.liks, xlab="change points", ylab="deviance relative to null model", main="Test Inversion CI")
                }
                
                c(lb,ub)
#                } else stop("wrong est.method")
            
        } 
    
        # use F statistics to find test-inversion CI
        ci.test.inv.F=function(c.alpha){
            if (verbose>1) cat("in ci.test.inv.F\n")
            
            if (est.method %in% c("grid","fastgrid","fastgrid2")) {
                c(NA,NA)# todo
            } else if (est.method=="smoothapprox") {
                
                idx.chngpt=which(chngpt.var.sorted>=coef.hat["chngpt"])[1]
                data = make.chngpt.var(chngpt.var, chngpt.var.sorted[idx.chngpt], threshold.type, data, b.transition)# it is ok that data is assigned to b/c the function adds columns
                fit=do.regression (formula.new, data, family)
                sigsq.max = (sigma(fit$value))**2; #myprint(sigsq.max)
                #sigsq.max = (sigma(best.fit))**2; myprint(sigsq.max)
                
                profile.sigsqs=rep(NA, length(chngpt.var.sorted))
                
                # go left
                sigsq=sigsq.max
                idx=idx.chngpt
                #myprint(idx, sigsq, c.alpha)
                while (n*(sigsq/sigsq.max-1)<c.alpha) {
                    profile.sigsqs[idx]=sigsq
                    idx=idx-1
                    if(idx==0) {
                        if (verbose) warning("no more data on the left")
                        sigsq=NA; break
                    }
                    data=make.chngpt.var(chngpt.var, chngpt.var.sorted[idx], threshold.type, data, b.transition)
                    fit = do.regression (formula.new, data, family)
                    if(length(fit$warning)!=0) {
                        #if(verbose) print(fit$warning)
                        sigsq = NA; break
                    } else sigsq = (sigma(fit$value))**2
                    if(verbose>=2) print(sigsq)
                } 
                #lb=if(is.na(sigsq)) NA else chngpt.var.sorted[idx+1] 
                lb=chngpt.var.sorted[idx+1] 
                
                # go right
                sigsq=sigsq.max
                idx=idx.chngpt
                while (n*(sigsq/sigsq.max-1)<c.alpha) {
                    profile.sigsqs[idx]=sigsq
                    idx=idx+1
                    if(idx>n) {
                        if (verbose) warning("no more data on the right")
                        sigsq=NA; break
                    }
                    data=make.chngpt.var(chngpt.var, chngpt.var.sorted[idx], threshold.type, data, b.transition)
                    fit = do.regression (formula.new, data, family)
                    if(length(fit$warning)!=0) {
                        #if(verbose) print(fit$warning)
                        sigsq = NA; break
                    } else sigsq = (sigma(fit$value))**2
                } 
                #ub=if(is.na(sigsq)) NA else chngpt.var.sorted[idx-1]
                ub=chngpt.var.sorted[idx-1]
                
                c(lb,ub)
                
            } else stop("wrong est.method")
            
        } 
                    
        
        # if perfect segregation happens, cann't compute variance estimate
        # what to do with glm.warn?
        if (family=="binomial" & any(abs(coef.hat[-c(1,length(coef.hat))])>=search.bound, na.rm=T)) {
            warn.var="Point estimate over search bound. Not computing variance estimate."
            cat(warn.var, "\n")
            var.est=NULL
        } else {
            if (var.type=="none") {
                var.est=NULL
            } else if (var.type=="bootstrap") {
                var.est=ci.bootstrap()
            } else {                
                var.est=switch(var.type, 
#                        smooth=var.est.smooth(), 
                    model=var.est.model(test.inv.ci), 
                    robust=     var.est.robust(aux.fit, test.inv.ci), 
                    robusttruth=var.est.robust(aux.fit, test.inv.ci, true.param=TRUE), 
                    all=list(#"smooth"=var.est.smooth(), 
                              "model"=var.est.model(test.inv.ci), 
                             "robust"=var.est.robust(aux.fit, test.inv.ci), 
                           "sandwich"=var.est.model(test.inv.ci=FALSE,robust=TRUE) )
                ) 
            }
        }
        
    } # end variance estimate

    
    
    # cannot move this to before variance estimation for some reason
    # convert M20 problem back to M02 problem
    # slope for (x-chngpt)-^2 need not be changed
    # boot.samples is already transformed inside ci.bootstrap()
    # best.fit is already transformed inside grid.search() and 
    if (hinge.to.upperhinge) {
        if (threshold.type == "upperhinge") threshold.type= "hinge"
        if (threshold.type == "M20") threshold.type= "M02"
        if (threshold.type == "M30") threshold.type= "M03"
        if (threshold.type == "M40") threshold.type= "M04"
        if (threshold.type == "M21") threshold.type= "M12"
        if (threshold.type == "M21c") threshold.type= "M12c"
        if (threshold.type == "M31") threshold.type= "M13"
        
#        coef.hat["chngpt"]=-coef.hat["chngpt"]
##        tmp=chngpt.var.name;                           if(!is.na(coef.hat[tmp])) coef.hat[tmp]=-coef.hat[tmp]
#        tmp=paste0("(",chngpt.var.name,"-chngpt)+");   if(!is.na(coef.hat[tmp])) coef.hat[tmp]=-coef.hat[tmp]
#        tmp=paste0("(",chngpt.var.name,"-chngpt)+^3"); if(!is.na(coef.hat[tmp])) coef.hat[tmp]=-coef.hat[tmp]
    
        e.final= - e.final
        
        # when stratified, chngpts is a list
        if (is.null(stratified.by.sorted)) {
            chngpts=-chngpts
        } else {
            for (tmp in 1:length(chngpts)) chngpts[[tmp]]=-chngpts[[tmp]]
        }
    }
    
    res=list(
          coefficients=coef.hat
        , vcov=var.est
#        , formula.1=formula.1
#        , formula.2=formula.2
        , formula.new=formula.new
        , chngpt.var=chngpt.var.name
        , chngpt=e.final
        , est.method = est.method
        , b.transition=b.transition
        , threshold.type = threshold.type
        , glm.warn = glm.warn
        , family=family    
        , var.type=var.type
        , n=n # after removing rows with missing values
        , hinge.to.upperhinge=hinge.to.upperhinge
    )
    
    if (est.method=="smoothapprox") {
        res=c(res, list(
            converged=converged, 
            iter=n.iter))
    } else if (est.method %in% c("grid","fastgrid","fastgrid2","gridC")) {
        res=c(res, list(
            chngpts=chngpts, 
            logliks=logliks))
#        # good.soln needs to take as input a chngptm object that has chngpts etc
#        class(res)=c("chngptm", class(res)) 
#        tmp=good.soln(res, plot=verbose==3)
#        res=c(res, list(good.soln=tmp) )
    }
    if (!is.null(warn.var)) res[["warning"]]=warn.var
    if(keep.best.fit) res=append(res, list(best.fit=best.fit), 0)
    #if (!stratified & threshold.type!="M111") names(res$chngpt) = round(100*mean(chngpt.var<res$chngpt),1) %.% "%" else res$chngpt=unname(res$chngpt)
    
#    # If lmerMod fit@frame does not have enough data, then we need this
#    if(has.re) {
#        if(hinge.to.upperhinge) data[[chngpt.var.name]]=-data[[chngpt.var.name]]    
#        res[["data"]]=data 
#    }
    
    if(verbose) cat("\n") # it helps if we fit multiple models
    
    class(res)=c("chngptm", threshold.type, class(res))
    res    
    
}


# for linear only
chngptm.xy = function(x, y, type=c("step","hinge","segmented","segmented2","stegmented"),  ...)  {
    tmp.dat=data.frame(x=x, y=y)
    chngptm(y~1, ~x, family="gaussian", tmp.dat, type=type, ...)
}


get.chngpts=function (chngpt.var.sorted, lb.quantile, ub.quantile, n.chngpts, stratified.by.sorted=NULL, min.max.not.allowed=FALSE) {    
    if (is.null(stratified.by.sorted)) {
        n=length(chngpt.var.sorted)
        nLower=round(n*lb.quantile)+1; nUpper=round(n*ub.quantile)
        chngpts=chngpt.var.sorted[nLower:nUpper]
        attr(chngpts, "index")=nLower:nUpper
        attr(chngpts, "skipping")=FALSE
        
        # subset chngpts if needed
        if (length(chngpts)>n.chngpts) {
            ind=round(seq(1,length(chngpts),length=n.chngpts))
            chngpts=chngpts[ind]
            attr(chngpts, "index")=(nLower:nUpper)[ind]
            attr(chngpts, "skipping")=TRUE
        } 
        
    } else {
        # stratified
        chngpts = lapply (0:1, function(v) {
            n=sum(stratified.by.sorted==v)
            nLower=round(n*lb.quantile)+1; nUpper=round(n*ub.quantile)
            chngpts=chngpt.var.sorted[stratified.by.sorted==v][nLower:nUpper]
            attr(chngpts, "index")=nLower:nUpper + if (v==1) sum(stratified.by.sorted==0) else 0 # when v is 1, add a base so that the index correspond to the chngpt.var.sorted
            chngpts
        })
        
        if (length(chngpts)>n.chngpts) {
            stop("subsetting chngpts not supported for now for stratified")
        } 
    }
    
    # if there are many duplicated values, the min and max may be part of chngpts. For testing, that could be a problem
    if(min.max.not.allowed) {
        if(chngpts[1]==chngpt.var.sorted[1]) {
            chngpts=chngpts[chngpts!=chngpt.var.sorted[1]]
        } 
        if (chngpts[length(chngpts)]==last(chngpt.var.sorted)) {
            chngpts=chngpts[chngpts!=last(chngpt.var.sorted)]
        }
    }
    
    chngpts
}
# an alternative and equivalent way to define nLower/nUpper: nLower=sum(chngpt.var.sorted<quantile(chngpt.var.sorted, lb.quantile))+1; nUpper=sum(chngpt.var.sorted<=quantile(chngpt.var.sorted, ub.quantile));myprint(nLower, nUpper)
#    # old from chngptm, take the mid points between data points as change point candidates. For step models, use any point in between does not change likelihood, but it does change likelihood for segmented model
#    chngpts=(chngpts[1:(length(chngpts)-1)]+chngpts[-1])/2
#    # old from chngpt.test, chngpts are mostly between observed values
#    chngpts=quantile(chngpt.var, seq(lb.quantile,ub.quantile,length=chngpts.cnt)) 
#get.chngpts (c(1:5,5:10), lb.quantile=.1, ub.quantile=.9, n.chngpts=Inf)


## make.chngpt.var creates thresholded covariates
## For some threshold effects such as segmented, the original covariate (not thresholded) is also part of the model and that is coded by include.x.
## This function is used in both testing and estimation
## in estimation by grid search, b.transition is set to null when this function is called
## in estimation by smooth approx, b.transition value is saved to object
make.chngpt.var=function(x, e, threshold.type, data=NULL, b.transition=Inf, stratified.by=NULL) {    
    
    if (is.null(stratified.by) & length(e)==1) {
        transition=expit(b.transition*(x-e))
        # if this is not here, there will be NaN and that messes up things
        # at e, the value is 0
        if(b.transition==Inf) transition[is.nan(transition)]=0 
#        # 20200613 experimenting with 1
#        if(b.transition==Inf) transition[is.nan(transition)]=1
        
        if(threshold.type=="step") {
            out=transition
        } else if (threshold.type %in% c("upperhinge", "M20", "M30", "M40", "M21", "M21c", "M31")) {
            out=(1-transition)*(x-e)
        } else if (threshold.type %in% c("segmented", "hinge", "M02", "M03", "M04", "M12", "M12c", "M13")) {
            # these models are needed here for predict and for use by chngpt.test and for segmented
            out=transition*(x-e)
        } else if (threshold.type %in% c("M22","M22c","M33c")) {
            out=cbind((1-transition)*(x-e), transition*(x-e), x-e )
        } else if (threshold.type=="segmented2") {
            out=transition*x 
        } else if (threshold.type=="stegmented") {
            out=cbind(transition, transition*(x-e)) 
    
        } else stop("wrong threshold.type")
        
        if (is.null(data)) {
            out    
        } else {
            if (threshold.type=="stegmented") {
                data$x.mod.e   = out[,1]
                data$x.mod.e.2 = out[,2]
            } else if (threshold.type %in% c("M22","M22c","M33c")) {
                data$x   = x
                data$x.lt.e   = out[,1]
                data$x.gt.e   = out[,2]
                data$x.mi.e   = out[,3]
            } else if (threshold.type %in% c("M21c","M12c")) {# this is a little weird for M12c, but seems to work
                data$x.lt.e   = out
                
            } else if (threshold.type %in% c("M111")) {
                data$x   = x
                data$x.lt.e   = out[,1]
                data$x.lt.f   = out[,2]
                
            } else {
                data$x.mod.e = out
            }
            data
        }
        
    } else if (threshold.type %in% c("M111")) {    
        # three phases
        transition=expit(b.transition*(x-e[1]))
        if(b.transition==Inf) transition[is.nan(transition)]=0 
        transition.2=expit(b.transition*(x-e[2]))
        if(b.transition==Inf) transition.2[is.nan(transition.2)]=0 
        out=cbind((1-transition)*(x-e[1]), (1-transition.2)*(x-e[2]) )
    
        if (is.null(data)) {
            out    
        } else {
            if (threshold.type %in% c("M111")) {
                data$x   = x
                data$x.lt.e   = out[,1]
                data$x.lt.f   = out[,2]
                
            } else stop("wrong threshold type")
            data
        }
    
    } else {
        if (length(e)!=2) stop("make.chngpt.var needs a vector of length two for the argument e when stratified.by is not null")
        if (length(stratified.by)!=length(x)) stop("make.chngpt.var: stratified.by needs to be a vector of 0/1 of the same length as x")
        # create thresholded var within each stratum
        # first col of out is for stratified.by==0, second for stratified.by==1
        out = cbind(1-stratified.by, stratified.by)
        out[stratified.by==0,1] = make.chngpt.var (x[stratified.by==0], e[1], threshold.type, b.transition=b.transition) 
        out[stratified.by==1,2] = make.chngpt.var (x[stratified.by==1], e[2], threshold.type, b.transition=b.transition)
        if (is.null(data)) {
            colnames(out)=c("x.mod.e","x.mod.f")
            out    
        } else {
            # assuming only segmented/hinge are supported
            data$x.mod.e = out[,1]
            data$x.mod.f = out[,2]
            data
        }
        
    }    
    
}
#
# The difference between the next two versions is that when b.transition is not infinity, if x<e, x is 0 in the older version but only close to 0 in the newer version
## before May 13, 2017
#make.chngpt.var=function(x, e, threshold.type, data=NULL, b.transition=NULL) {    
#    if(threshold.type=="step") {
#        out=ifelse(x>=e, 1, 0)
#    } else if (threshold.type=="hinge") {
#        out=ifelse(x>=e, x-e, 0)
#    } else if (threshold.type=="segmented") {
#        out=ifelse(x>=e, x-e, 0) # x also becomes part of the null model
#    } else if (threshold.type=="stegmented") {
#        out=cbind(ifelse(x>=e, 1, 0), ifelse(x>=e, x-e, 0))  # x also becomes part of the null model
#    }    
#    if (!is.null(b.transition)) out=out * 1/(1+exp(b.transition*(x-e))) 
#


# the added terms will be named as, say x.mod.e. these variables will be created by make.chngpt.var
get.f.alt=function(threshold.type, chngpt.var.name, modified.by=NULL, stratified=FALSE, has.quad=FALSE, has.cubic=FALSE) {
    
    if (is.null(modified.by) & !stratified) {
        if (threshold.type=="M22") {
            f.alt="~.+x.lt.e+I(x.lt.e^2)+x.gt.e+I(x.gt.e^2)"
        } else if (threshold.type=="M22c") {
            f.alt="~.+x+I(x.lt.e^2)+I(x.gt.e^2)"
        } else if (threshold.type=="M21c") {
            f.alt="~.+I(x.lt.e^2)" # x is already added outside get.f.alt
        } else if (threshold.type=="M33c") {
            f.alt="~.+x+I(x.mi.e^2)+I(x.lt.e^3)+I(x.gt.e^3)"
        } else if (threshold.type %in% c("M40")) {
        #} else if (threshold.type %in% c("M04", "M40")) {
            f.alt="~.+x.mod.e+I(x.mod.e^2)+I(x.mod.e^3)+I(x.mod.e^4)"
        } else if (threshold.type=="M111") {
            f.alt="~.+I(x.lt.e)+I(x.lt.f)"
        } else {
            f.alt = "~.+x.mod.e"
            if (threshold.type=="stegmented") f.alt=f.alt %.% "+x.mod.e.2" 
            if (has.quad) f.alt=f.alt %.% "+I(x.mod.e^2)"
            if (has.cubic) f.alt=f.alt %.% "+I(x.mod.e^2)" %.% "+I(x.mod.e^3)"
        }
        
    } else if (!is.null(modified.by) & !stratified) {
        f.alt = if (threshold.type=="stegmented") {
            "~.+(x.mod.e+x.mod.e.2)*"%.%modified.by%.%"+"%.%chngpt.var.name%.%":"%.%modified.by 
        } else if (threshold.type %in% c("segmented","segmented2")) {
            "~."%.%"+"%.%chngpt.var.name%.%":"%.%modified.by%.%"+x.mod.e*"%.%modified.by
        } else if (threshold.type %in% c("step","hinge","upperhinge")) {
            "~.+x.mod.e*"%.%modified.by
        } else stop("wrong threshold.type within get.f.alt")
         
    } else if (stratified & is.null(modified.by)) {
        f.alt = if (threshold.type %in% c("segmented","hinge","upperhinge")) {
            "~.+x.mod.e+x.mod.f"
        } else stop("wrong threshold.type within get.f.alt")
        
    } else stop("modified.by and stratified.by cannot both be there")
    as.formula(f.alt)
}
#get.f.alt("step", "x", modified.by=c("a","b"), stratified=FALSE, has.quad=FALSE) 
        
#deviance.3pl <- expression( (1-y) * ( c + (d-c)/(1+exp(b*(t-e))) )  +  log( 1 + exp( -c - (d-c)/(1+exp(b*(t-e))) ) ) )
#deviance.3pl.deriv=deriv3(deviance.3pl, c("c","d","e"), c("c","d","e","t","y","b"))


model.frame.chngptm=function(formula, ...) model.frame(formula$best.fit, ...)
print.chngptm=function(x, ...) {
    if (x$est.method=="smoothapprox") {
        if (!x$converged) cat("Warning: not converged\n")
    }
    print(x$coefficients)
}
coef.chngptm=function(object, ...) {
    object$coefficients[-length(object$coefficients)] # not return the chngpoint estimate
}
residuals.chngptm=function(object, ...) {
    residuals(object$best.fit, ...)
}
vcov.chngptm=function(object, var.type=NULL, ...) {
#    if(object$threshold.type %in% c("hinge","segmented")) {
#        object$vcov[-length(object$coefficients),-length(object$coefficients)] # not return the chngpoint estimate
#    } else  {
#        object$vcov
#    }
    boot.conf=FALSE        
    if(!is.null(var.type)) {
        if (var.type %in% c("perc","basic","bc","bcabc","symm")) {
            if (is.null(object$boot.samples)) vcov=NULL else vcov=cov(object$boot.samples)
            boot.conf=TRUE
        }
        vcov=object$vcov[[var.type]]
    
    } else {            
        if(is.list(object$vcov)){
            if (!is.null(object$vcov$robust)) {
                vcov=object$vcov$robust
            } else if (!is.null(object$vcov$boot.samples)){
                if (is.null(object$vcov$boot.samples)) vcov=NULL else vcov=cov(object$vcov$boot.samples, use="pairwise.complete.obs")
                boot.conf=TRUE
                #vcov=object$vcov$symm;
                #if (all(is.na(vcov))) vcov=object$vcov$bc # if the bootstrap samples have too many NA, only bc quantiles can be estimated. However, bc quantiles may be misleading
            }
            
        } else {
            vcov=object$vcov
        }
    } 
    attr(vcov, "boot.conf")=boot.conf
    vcov
}
lincomb=function(object, comb, alpha=0.05, boot.type="perc"){
    if(!is.matrix(comb)) comb=matrix(comb, ncol=1)
    est=c(coef(object)%*%comb)
    
    if (is.matrix(object$vcov)) {
        # analytical
        if(length(comb)==ncol(object$vcov)-1) {
            # vcov has chngpt
            comb=c(comb, 0)
        }
        se=sqrt(comb%*%object$vcov%*%comb)
        c(est, est-1.96*se, est+1.96*se)
    } else {
        # bootstrap
        samples=object$vcov$boot.samples[,1:length(comb)] %*% comb        
        if(boot.type=="perc") {
            ci=quantile(samples, c(alpha/2,1-alpha/2))
        } else if (boot.type=="symmetric") {
            q.1=quantile(abs(samples-est), 1-alpha, na.rm=TRUE)
            ci=est+c(-q.1, q.1)
        } else stop("boot.type not supported: "%.%boot.type)
        res=c(est, ci)    
        names(res)=c("est", "lb","ub")
        res
    }
}

logLik.chngptm=function(object,...) {
    res=logLik(object$best.fit)
    attr(res,"df")=1+attr(res,"df")
    res
}

# which=1: scatterplot with fitted line, only works for simple regression
plot.chngptm=function(x, which=NULL, xlim=NULL, ylim=NULL, lwd=2, lcol="red", lty=1, add=FALSE, add.points=TRUE, add.ci=TRUE, breaks=20, mark.chngpt=TRUE, xlab=NULL, ylab=NULL, 
    plot.individual.line=FALSE, main="", y.adj=NULL, auto.adj.y=FALSE, transform=NULL, ...) {
    
    has.boot.samples=FALSE
    if(is.list(x$vcov)) if(!is.null(x$vcov$boot.samples)) has.boot.samples=TRUE 
    has.submodel.lik=FALSE
    if(!is.null(x$logliks)) has.submodel.lik=TRUE 
    if(x$threshold.type=="M111") has.submodel.lik=FALSE # not plotting this for now
    
    if(is.null(which)) {
        par(mfrow=c(1+has.submodel.lik+has.boot.samples,1))
                              plot(x,1,xlim=xlim,lwd=lwd,lcol=lcol,mark.chngpt=mark.chngpt,y.adj=y.adj, auto.adj.y=auto.adj.y,...)
        if (has.submodel.lik) plot(x,2,xlim=xlim,lwd=lwd,lcol=lcol,...)
        if (has.boot.samples) plot(x,3,xlim=xlim,lwd=lwd,lcol=lcol,...)
        return(invisible())
    }    
    
    fit=x
    linkinv=if (is.null(transform)) get(fit$family)()$linkinv else transform
    
    has.re=inherits(fit$best.fit, "lmerMod")
    
    if(has.re) {
        data=fit$best.fit@frame
        offset=0#fit$best.fit@resp$offset # this has length n
        y=fit$best.fit@resp$y
        yname=names(fit$best.fit@frame)[1]    
    } else {
        data=fit$best.fit$data
        offset=ifelse(!is.null(fit$best.fit$offset), fit$best.fit$offset, 0)
        y=fit$best.fit$y
        yname=names(fit$best.fit$model)[1]
    }
    
    if (fit$hinge.to.upperhinge) data[[fit$chngpt.var]]=-data[[fit$chngpt.var]]
    
    if(is.null(xlim)) xlim=range(data[[fit$chngpt.var]])
        
    out=list()
    out[[1]]=NULL
    if(which==1) {
    # scatterplot with lines
        chngpt.est=fit$chngpt        
        
        # random effect intercepts
        if (has.re & plot.individual.line) {
            intercepts=coef(fit$best.fit)[[1]][,"(Intercept)"]
        } else {
            intercepts=ifelse(names(coef(fit))[1]=="(Intercept)", coef(fit)[1], 0)
        }        
        for (intercept in intercepts) {
            if (fit$threshold.type=="M111") {
                # three phase
                slope=coef(fit)[fit$chngpt.var]; if (is.na(slope)) slope=0
                pre.f   =coef(fit)["("%.%fit$chngpt.var%.%"-chngpt2)-"]; if (is.na(pre.f))    pre.f=0
                pre.e   =coef(fit)["("%.%fit$chngpt.var%.%"-chngpt1)-"]; if (is.na(pre.e))    pre.e=0
                
                xx=seq(xlim[1],chngpt.est[1],length=100)
                yy = offset + intercept + slope*xx + pre.e*(xx-chngpt.est[1]) + pre.f*(xx-chngpt.est[2])
                yy=linkinv(yy)
                
                # may need to adjust y if there are other covariates
                if(auto.adj.y) {
                    y.adj=median(y)-last(yy)
                } else if(is.null(y.adj)) y.adj=0
                
                yy=yy+y.adj
                out[[1]]=cbind(xx,yy)
                #lines(xx, yy, lwd=lwd, col=lcol, lty=lty)
                #str(xx); str(yy); str(chngpt.est); str(pre.slope); str(intercept); str(linkinv)
                
                xx=seq(chngpt.est[1], chngpt.est[2], length=100)
                yy = offset + intercept + slope*xx + pre.f*(xx-chngpt.est[2])
                yy=linkinv(yy)+y.adj
                out[[1]]=rbind(out[[1]], cbind(xx,yy))
#                lines(xx, yy, lwd=lwd, col=lcol, lty=lty)
    
                #if(mark.chngpt) points(chngpt.est, yy[c(1,length(yy))], pch=19, col=lcol, cex=1.5)
               
                xx=seq(chngpt.est[2], xlim[2], length=100)
                yy = offset + intercept + slope*xx
                yy=linkinv(yy)+y.adj
                out[[1]]=rbind(out[[1]], cbind(xx,yy))
#                lines(xx, yy, lwd=lwd, col=lcol, lty=lty)
                
            } else {
                # two phase
                slope=coef(fit)[fit$chngpt.var]; if (is.na(slope)) slope=0
                #
                pre.slope   =coef(fit)[  "("%.%fit$chngpt.var%.%"-chngpt)-"]; if (is.na(pre.slope))    pre.slope=0
                pre.slope.2 =coef(fit)["("%.%fit$chngpt.var%.%"-chngpt)-^2"]; if (is.na(pre.slope.2))  pre.slope.2=0 # quad
                pre.slope.3 =coef(fit)["("%.%fit$chngpt.var%.%"-chngpt)-^3"]; if (is.na(pre.slope.3))  pre.slope.3=0 # cubic
                pre.slope.4 =coef(fit)["("%.%fit$chngpt.var%.%"-chngpt)-^4"]; if (is.na(pre.slope.4))  pre.slope.4=0 # 4
                #
                post.slope  =coef(fit)[  "("%.%fit$chngpt.var%.%"-chngpt)+"]; if (is.na(post.slope))   post.slope=0
                post.slope.2=coef(fit)["("%.%fit$chngpt.var%.%"-chngpt)+^2"]; if (is.na(post.slope.2)) post.slope.2=0 # quad
                post.slope.3=coef(fit)["("%.%fit$chngpt.var%.%"-chngpt)+^3"]; if (is.na(post.slope.3)) post.slope.3=0 # cubic
                post.slope.4=coef(fit)["("%.%fit$chngpt.var%.%"-chngpt)+^4"]; if (is.na(post.slope.4)) post.slope.4=0 # 4
                post.jump   =coef(fit)[ fit$chngpt.var%.%">chngpt"]; if (is.na(post.jump)) post.jump=0 # step
                # M22c and M33c have x-chngpt and (x-chngpt)^2 
        #        M3bslope=coef(fit)["("%.%fit$chngpt.var%.%"-chngpt)"]; if (is.na(M3bslope)) M3bslope=0
                M6bquad =coef(fit)["("%.%fit$chngpt.var%.%"-chngpt)^2"]; if (is.na(M6bquad)) M6bquad=0
                
#                myprint(slope)
#                myprint(pre.slope, pre.slope.2, pre.slope.3, pre.slope.4)
#                myprint(post.slope, post.slope.2, post.slope.3, post.slope.4)
#                myprint(post.jump, M6bquad)
                
                # pre
                xx=seq(xlim[1],chngpt.est,length=100)
                yy = offset + intercept + slope*xx + (pre.slope)*(xx-chngpt.est) + (pre.slope.2+M6bquad)*(xx-chngpt.est)^2 + pre.slope.3*(xx-chngpt.est)^3  + pre.slope.4*(xx-chngpt.est)^4 
                yy=linkinv(yy)
                
                # may need to adjust y if there are other covariates
                if(auto.adj.y) {
                    y.adj=median(y)-last(yy)
                } else if(is.null(y.adj)) y.adj=0
                
                yy=yy+y.adj
                
                out[[1]]=cbind(xx,yy)
#                lines(xx, yy, lwd=lwd, col=lcol, lty=lty)
                #str(xx); str(yy); str(chngpt.est); str(pre.slope); str(intercept); str(linkinv)
                
                # post
                xx=seq(chngpt.est, xlim[2], length=100)
                yy = offset + intercept + slope*xx + (post.slope)*(xx-chngpt.est) + (post.slope.2+M6bquad)*(xx-chngpt.est)^2 + post.slope.3*(xx-chngpt.est)^3 + post.slope.4*(xx-chngpt.est)^4 + post.jump
                yy=linkinv(yy)+y.adj
                out[[1]]=rbind(out[[1]], cbind(xx,yy))
#                lines(xx, yy, lwd=lwd, col=lcol, lty=lty)
        
            } # end twophase
        }# end for intercept
        
        if (is.null(ylim)) ylim=range(out[[1]][,2], if(add.points) y)
        if(!add) plot(data[[fit$chngpt.var]], y, xlim=xlim, xlab=ifelse(is.null(xlab),fit$chngpt.var,xlab), ylab=ifelse(is.null(ylab),yname,ylab), type="n", main=main, ..., ylim=ylim)
        if(add.points) points(data[[fit$chngpt.var]], y, ...)
        lines(out[[1]][,1], out[[1]][,2], lwd=lwd, col=lcol, lty=lty)
        #if(mark.chngpt) points(chngpt.est, yy[1], pch=19, col=lcol, cex=1.5)

        #myprint(chngpt.est, yy[1])
        #myprint(mark.chngpt)
        
    } else if (which==2) {
    # loglik vs threshold
        if (!has.submodel.lik) stop("no submodel likelihoods")
        if (fit$threshold.type=="M111") {
            empty.plot()
            title(main="profile likelihood plot not yet implemented for M111")            
        } else {
            plot(fit$chngpts, fit$logliks, xlim=xlim, xlab="threshold", ylab="log likelihood", type="b", ...)
            abline(v=fit$chngpt, lty=c(1), col=2) # point est 
        }
        
    } else if (which==3) {
    # histograms of threshold estimates from bootstrap. the first bar of the second threshold histogram may not overlap with one of the bars from the first threshold histogram because of the way histogram works
        if (!has.boot.samples) stop("boot samples not saved")
        chngpt.cols=which(startsWith(colnames(fit$vcov$boot.samples), "chngpt"))
        if(length(chngpt.cols)>1) {
            mycol=col2rgb(2); mycol <- rgb(mycol[1,1], mycol[2,1], mycol[3,1], maxColorValue = 255, alpha = 125)        
            tmp=hist(fit$vcov$boot.samples[,chngpt.cols[1]], breaks=breaks, xlim=xlim, xlab="bootstrap threshold estimate", add=add, main=main, col=mycol, ...)
            for (i in 2:length(chngpt.cols)) {
                mycol=col2rgb(i+1); mycol <- rgb(mycol[1,1], mycol[2,1], mycol[3,1], maxColorValue = 255, alpha = 125)        
                tmp=hist(fit$vcov$boot.samples[,chngpt.cols[i]], breaks=breaks, add=T, col=mycol, ...)
            }
                        
            for (i in 1:length(chngpt.cols)) abline(v=fit$chngpt[i], col=i+1) 
            if(add.ci) {
                ci=summary(fit)$chngpt[,c("(lower","upper)")]
                for (i in 1:length(chngpt.cols)) abline(v=ci[i,], lty=c(2,2), col=i+1) 
            }
            
        } else {
            hist(fit$vcov$boot.samples[,chngpt.cols[1]], breaks=breaks, xlim=xlim, xlab="bootstrap threshold estimate", add=add, main=main, ...)
            if(add.ci) {
                ci=summary(fit)$chngpt[c("(lower","upper)")]
                abline(v=ci, lty=c(2,2)) 
            }
            abline(v=fit$chngpt, col=2) 
        }
        
    } else stop("wrong which")
    
    invisible(out)
}

summary.chngptm=function(object, var.type=NULL, expo=FALSE, show.slope.post.threshold=FALSE, verbose=FALSE, boot.type="perc", ...) {    
    # "Estimate" "Std. Error" "t value" "Pr(>|t|)"        
    fit=object
    p.z=length(fit$coefficients) # this count includes threshold parameter(s)
    n=fit$n
    transf=if(fit$family=="binomial" & expo) exp else identity
    
    if (!is.null(fit$threshold.type)) threshold.type=fit$threshold.type else threshold.type=fit$type
    
    ncut=ifelse(threshold.type=="M111",2,1)
    
    res=list()
    
    if (is.null(fit$vcov)) {
        cat("No variance estimate available.\n\n")
        #print(fit)
        res$coefficients = cbind(transf(fit$coefficients[1:(p.z-ncut)]))
        colnames(res$coefficients)[1]=if(fit$family=="binomial" & expo) "OR" else "est"
        res$chngpt=c("est" = fit$chngpt)
        return (res)
    } else {    
        vcov=vcov(fit, var.type)
        boot.conf= attr(vcov,"boot.conf")
        if (boot.conf) vcov=fit$vcov[[boot.type]]
        if(is.null(vcov)) {
            cat("No variance estimate available.\n\n")
            res$coefficients = cbind(transf(fit$coefficients[1:(p.z-ncut)]))
            colnames(res$coefficients)[1]=if(fit$family=="binomial" & expo) "OR" else "est"
            res$chngpt=c("est" = fit$chngpt)
            return (res)
        }             
    }    
    if(threshold.type %in% c("hinge","segmented","upperhinge") & !boot.conf) {
        vcov.t=vcov[p.z,p.z]
        tmp=vcov[-p.z,-p.z] # not return the chngpoint estimate
        # the last line lost the attr chngpt.ci, need to make a copy
        if(!is.null(attr(vcov,"chngpt.ci"))) attr(tmp,"chngpt.ci")=attr(vcov,"chngpt.ci")
        vcov=tmp
    } 
    if(verbose) str(vcov)
    
    # assuming the last of coefficients is always the change point
    # deal with coefficients and change point separately
    
    if (boot.conf){
        lb=transf(vcov[1,])
        ub=transf(vcov[2,])
        # an approximate p value, prompted by comment about it being NA from Katie Chartrand, James Cook University
        sd=(ub-lb)/1.96/2
        pval=pnorm(abs(fit$coefficients/sd), lower.tail=F)*2
    } else  {
        lb=transf(unname(fit$coefficients[1:(p.z-ncut)] - sqrt(diag(vcov)) * qt(0.975, df=n-p.z, lower.tail=TRUE)))
        ub=transf(unname(fit$coefficients[1:(p.z-ncut)] + sqrt(diag(vcov)) * qt(0.975, df=n-p.z, lower.tail=TRUE)))
        sd = sqrt(diag(vcov))
        pval=unname(pt(abs(fit$coefficients[1:(p.z-ncut)] / sqrt(diag(vcov))), df=n-p.z, lower.tail=FALSE)) *2 # *2 is added on 8/2/2016
    }
    
    res$coefficients=mysapply(1:(p.z-ncut), function (i) {
        c(
              transf(unname(fit$coefficients[i]))
            , "Std. Error" = unname(sd[i])
            , "(lower" = unname(lb[i])
            , "upper)" = unname(ub[i])
            , "p.value" = unname(pval[i])
#              "Estimate"=unname(fit$coefficients[i])
#            , "t value" = unname(fit$coefficients[i] / sqrt(vcov[i,i]))
#            , "Pr(>|t|)" = unname(pt(abs(fit$coefficients[i] / sqrt(vcov[i,i])), df=n-p.z, lower.tail=FALSE))
        )
    })
    rownames(res$coefficients)=names(fit$coefficients)[-(p.z-(ncut-1):0)]
    colnames(res$coefficients)[1]=if(fit$family=="binomial" & expo) "OR" else "est"
    if(boot.conf) colnames(res$coefficients)[c(2,5)]=colnames(res$coefficients)[c(2,5)]%.%"*"
    
    # add a column to indicate whether CI includes 0
    
    # may need to find linear combination
    if(show.slope.post.threshold){
        if(ncut==1) {
            comb=rep(0,length=p.z-1)
            comb[p.z-2:1]=1
            tmp=lincomb(object, comb, alpha=0.05)
            res$coefficients[p.z-1,]=c(tmp[1],NA,tmp[2],tmp[3],NA)
            rownames(res$coefficients)[p.z-1]=sub("\\+",">",rownames(res$coefficients)[p.z-1])
        } else {
            warning("show.slope.post.threshold not yet implemented for more than 1 threshold")
        }
    }
    
    # change point
    lb<-ub<-NULL
    if (ncut>=2) {
        i=p.z-(ncut-1):0
        if (boot.conf){
            lb=vcov[1,i]
            ub=vcov[2,i]
            sd=(ub-lb)/1.96/2
        } else {
            lb=(unname(fit$coefficients[i] - sqrt(vcov.t) * qt(0.975, df=n-p.z, lower.tail=TRUE)))
            ub=(unname(fit$coefficients[i] + sqrt(vcov.t) * qt(0.975, df=n-p.z, lower.tail=TRUE)))
            sd=sqrt(vcov.t)
        }
        res$chngpt=cbind(
                  "est" = fit$chngpt
                , "Std. Error" = sd
                , "(lower" = lb
                , "upper)" = ub
#                , "p.value" = NA # makes no sense to have a pvalue for chngpt
        )
        
    } else {
        i=p.z
        if (boot.conf){
            lb=vcov[1,p.z]
            ub=vcov[2,p.z]
            sd=(ub-lb)/1.96/2
        } else if(!is.null(attr(vcov,"chngpt.ci"))) {
            if(verbose) print("get test inversion CI for threshold")
            lb=unname(attr(vcov,"chngpt.ci")[1])
            ub=unname(attr(vcov,"chngpt.ci")[2])
            sd=(ub-lb)/1.96/2
        } else {
            lb=(unname(fit$coefficients[i] - sqrt(vcov.t) * qt(0.975, df=n-p.z, lower.tail=TRUE)))
            ub=(unname(fit$coefficients[i] + sqrt(vcov.t) * qt(0.975, df=n-p.z, lower.tail=TRUE)))
            sd=sqrt(vcov.t)
        }
        res$chngpt=c(
                  "est" = fit$chngpt
                , "Std. Error" = sd
                , "(lower" = lb
                , "upper)" = ub
#                , "p.value" = NA # makes no sense to have a pvalue for chngpt
        )
    }
    
    res$threshold.type=fit$threshold.type
    
    class(res) <- "summary.chngptm"
    res
}
print.summary.chngptm=function(x,...) {
    cat("Change point model threshold.type: ",x$threshold.type,"\n\n")
    cat("Coefficients:\n")
    print(x$coefficients)
    cat("\nThreshold:\n")
    print(x$chngpt)    
}
getFixedEf.chngptm=function(object, exp=FALSE, show.slope.post.threshold=FALSE, exclude.chngpt=FALSE, ...) {
    capture.output({
        res=summary(object, expo=exp, show.slope.post.threshold=show.slope.post.threshold, ...)
    })
    if (exclude.chngpt) {
        res$coefficients
    } else {
        rbind(res$coefficients, chngpt=c(res$chngpt,if(ncol(res$coefficients)>1) NA))
    }
}


# the following functions are used as objective functions in smoothapprox, b/c we don't supply derivative to optim anymore. 
.lik.f=function(linear, y, family, weights) {
    tmp=if(family=="binomial") {
        (1-y)*linear + log(1+exp(-linear)) 
    } else if(family=="gaussian") {
        (linear-y)**2
    }
    sum(tmp*weights)
}
# step change point model
dev.step.f <- function(theta,x,y,b,alpha.z,z.1,family,weights)  {
    beta=theta[1]; e=theta[2]
    eta=beta/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
}
dev.step.itxn.f <- function(theta,x,y,b,alpha.z,z.1,family,weights)  {
    beta1=theta[1]; beta2=theta[2]; e=theta[3]
    eta=(beta1+beta2*z.1)/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
}
# hinge change point model
dev.hinge.f <- function(theta,x,y,b,alpha.z,z.1,family,weights) {
    beta=theta[1]; e=theta[2]
    eta=(x-e)*beta/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
} 
dev.M01.f=dev.hinge.f
# upperhinge model
dev.upperhinge.f <- function(theta,x,y,b,alpha.z,z.1,family,weights) {
    beta=theta[1]; e=theta[2]
    eta=(x-e)*beta*(1-1/(1+exp(-b*(x-e))))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
} 
dev.hinge.itxn.f <- function(theta,x,y,b,alpha.z,z.1,family,weights)  {
    beta1=theta[1]; beta2=theta[2]; e=theta[3]
    eta=(x-e)*(beta1+beta2*z.1)/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
}
# segmented change point model
dev.segmented.f <- function(theta,x,y,b,alpha.z,z.1,family,weights) {
    beta1=theta[1]; beta2=theta[2]; e=theta[3]
    eta=beta1*x + (x-e)*beta2/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
} 
dev.segmented2.f <- function(theta,x,y,b,alpha.z,z.1,family,weights) {
    beta1=theta[1]; beta2=theta[2]; e=theta[3]
    eta=beta1*x + x*beta2/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
} 
dev.segmented.itxn.f <- function(theta,x,y,b,alpha.z,z.1,family,weights) {
    beta1=theta[1]; beta2=theta[3]; beta3=theta[2]; beta4=theta[4]; e=theta[5]# note the order change between beta and theta
    eta=(beta1+beta2*z.1)*x + (beta3+beta4*z.1)*(x-e)/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
} 
dev.segmented2.itxn.f <- function(theta,x,y,b,alpha.z,z.1,family,weights) {
    beta1=theta[1]; beta2=theta[3]; beta3=theta[2]; beta4=theta[4]; e=theta[5]# note the order change between beta and theta
    eta=(beta1+beta2*z.1)*x + (beta3+beta4*z.1)*x/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
} 
# stegmented change point model
dev.stegmented.f <- function(theta,x,y,b,alpha.z,z.1,family,weights) {
    beta1=theta[1]; beta2=theta[2]; beta3=theta[3]; e=theta[4]
    eta=beta1*x + ((x-e)*beta3 + beta2)/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
} 
# the order of parameters in the following needs to be fixed
dev.stegmented.itxn.f <- function(theta,x,y,b,alpha.z,z.1,family,weights) {
    beta1=theta[1]; beta2=theta[3]; beta3=theta[2]; beta4=theta[4]; beta5=theta[5]; beta6=theta[6]; e=theta[5]# note the order change between beta and theta
    eta=(beta1+beta2*z.1)*x + ((beta3+beta4*z.1)*(x-e) + beta4+beta5*z.1)/(1+exp(-b*(x-e)))
    linear=alpha.z+eta
    .lik.f(linear,y,family,weights)
} 
# dev.step.deriv.f is experimental, not really used, does not seem to bring improvement when compared to gr=NULL
dev.step.deriv.f <- function(theta,x,y,b,alpha.z,z.1,family)  {
    if(family!="binomial") stop("only binomial supported")
    beta=theta[1]; e=theta[2]
    approxi=expit(b*(x-e))
    dBde=b*approxi*(1-approxi)
    pi=expit(alpha.z+beta*approxi)
    colSums((y-pi)*cbind(x>e, beta*dBde))
}
# expressions, binomial only
# step change point model
dev.step <- expression( (1-y) * ( alpha.z + beta/(1+exp(b*(x-e))) )  +  log( 1 + exp( -alpha.z - beta/(1+exp(-b*(x-e))) ) ) )
dev.step.deriv=deriv3(dev.step, c("beta","e"), c("beta","e","x","y","b","alpha.z"))
dev.step.itxn <- expression( (1-y) * ( alpha.z + (beta1+beta2*z.1)/(1+exp(-b*(x-e))) )  +  log( 1 + exp( -alpha.z - (beta1+beta2*z.1)/(1+exp(-b*(x-e))) ) ) )
dev.step.itxn.deriv=deriv3(dev.step.itxn, c("beta1","beta2","e"), c("beta1","beta2","e","x","y","b","alpha.z","z.1"))
# hinge change point model
dev.hinge <- expression( (1-y) * ( alpha.z + (x-e)*beta/(1+exp(-b*(x-e))) )  +  log( 1 + exp( -alpha.z - (x-e)*beta/(1+exp(-b*(x-e))) ) ) )
dev.hinge.deriv=deriv3(dev.hinge, c("beta","e"), c("beta","e","x","y","b","alpha.z"))
dev.hinge.itxn <- expression( (1-y) * ( alpha.z + (x-e)*(beta1+beta2*z.1)/(1+exp(-b*(x-e))) )  +  log( 1 + exp( -alpha.z - (x-e)*(beta1+beta2*z.1)/(1+exp(-b*(x-e))) ) ) )
dev.hinge.itxn.deriv=deriv3(dev.hinge.itxn, c("beta1","beta2","e"), c("beta1","beta2","e","x","y","b","alpha.z","z.1"))
# upper hinge model
dev.upperhinge <- expression( (1-y) * ( alpha.z + (x-e)*beta*(1-1/(1+exp(-b*(x-e)))) )  +  log( 1 + exp( -alpha.z - (x-e)*beta * (1-1/(1+exp(-b*(x-e)))) ) ) )


# check whether the MLE is considered good, return Boolean
good.soln=function(fit, df=7, plot=FALSE) {
    if (!inherits(fit, "chngptm")) {
        warning("fit has to be of class chngptm")
        return (NA)
    }
    
    # fit a smoothing spline
    complete=!is.na(fit$logliks)
    fm1 <- smooth.spline(x=fit$chngpts[complete], y=fit$logliks[complete], df=df)
    # compute locations where the first derivatives are 0
    deriv.fm1=predict(fm1,deriv=1)
    tmp=deriv.fm1$y>0
    k=length(tmp)
    tmp.2=xor(tmp[1:(k-1)], tmp[2:k])
    optimum=which(tmp.2)
    optimum.1=ifelse(abs(deriv.fm1$y[optimum])<abs(deriv.fm1$y[optimum+1]), optimum, optimum+1)
    
    if(plot) {
        par(mfrow=c(2,1))
        plot(fit$chngpts, fit$logliks)
        lines(fm1, col=2, type="l")        
        abline(v=fit$chngpts[optimum.1])
        plot(deriv.fm1$x, deriv.fm1$y); abline(h=0)
        abline(v=fit$chngpts[optimum.1])
    }    
    
    if(length(optimum.1)!=1) return (FALSE) else {
        if (abs(match(fit$chngpt,fit$chngpts)-optimum.1)<=length(fit$chngpts)/10) return (TRUE) else return (FALSE) # 20 is hardcoded
    }        
}


# Adam Elder
# assum input are sorted
# was useful for debugging C implementation
get.liks.upperhinge=function(y,Z,x,cand_e){    
    n=length(y)
    
    ## Solving for the quantities that only need to be found once
    A <- solve(t(Z) %*% Z)
    H <- Z %*% A %*% t(Z)
    U <- as.numeric(t(y) %*% (H - diag(n)))
    M <- diag(n) - H
    
    ## change to pass this info in
#    ## Selecting the possible cutoff values
#    cand_e <- x[floor(n * 0.05):ceiling(n*0.95)]
    sudo_liks = rep(NA,length(cand_e))
    
    ## Creating a variable that stores the best cuttoff point, as
    ## a proxy for the maximum likelihood conditional on this cuttoff
    best_e <- c(cand_e[1], 0)
    for(i in 1:length(cand_e)){
        e=cand_e[i]
        nu_e <- pmax(e - x, 0) #the last row vector of our design matrix for a given cuttoff value
        #if(sum(nu_e) != 0){
        sudo_liks[i] <- as.numeric((t(U) %*% nu_e))**2/as.numeric((t(nu_e) %*% M %*% nu_e))
      #}
    }
    
    sudo_liks
}


name.conversion=function(threshold.type, chngpt.var.name, new.names, hinge.to.upperhinge=FALSE){
    if (threshold.type=="stegmented") {
        new.names=sub("x.mod.e.2", "("%.%chngpt.var.name%.%"-chngpt)+", new.names)    
        new.names=sub("x.mod.e", "I("%.%chngpt.var.name%.%">chngpt)", new.names)    
        
    } else if (threshold.type == "M21c" & hinge.to.upperhinge) {    
        new.names=sub("x.lt.e",   "("%.%chngpt.var.name%.%"-chngpt)+",   new.names)    
    
    } else if (threshold.type == "M111") {    
        new.names=sub("x.lt.e",   "("%.%chngpt.var.name%.%"-chngpt1)-",   new.names)    
        new.names=sub("x.lt.f",   "("%.%chngpt.var.name%.%"-chngpt2)-",   new.names)    
    
    } else if (threshold.type %in% c("M22","M22c","M33c","M12c","M21c")) {
        new.names=sub("x.lt.e.3", "("%.%chngpt.var.name%.%"-chngpt)-^3", new.names)    
        new.names=sub("x.lt.e.2", "("%.%chngpt.var.name%.%"-chngpt)-^2", new.names)    
        new.names=sub("x.lt.e",   "("%.%chngpt.var.name%.%"-chngpt)-",   new.names)    
        new.names=sub("x.gt.e.2", "("%.%chngpt.var.name%.%"-chngpt)+^2", new.names)    
        new.names=sub("x.gt.e.3", "("%.%chngpt.var.name%.%"-chngpt)+^3", new.names)    
        new.names=sub("x.gt.e",   "("%.%chngpt.var.name%.%"-chngpt)+",   new.names)    
        new.names=sub("x.mi.e",   "("%.%chngpt.var.name%.%"-chngpt)",    new.names)    
        new.names=sub("x",        chngpt.var.name,                       new.names)    
        
    } else if (threshold.type=="step") {
            new.names=sub("x.mod.e", "I("%.%chngpt.var.name%.%">chngpt)", new.names)
            
    } else if (threshold.type %in% c("hinge","M02","M03","M04","M12","M13","segmented","segmented2") | hinge.to.upperhinge) {
        new.names=sub("x.mod.e", "("%.%chngpt.var.name%.%"-chngpt)+", new.names)
        
    } else if (threshold.type %in% c("upperhinge",  "M20","M30","M40","M21","M31")) {
        new.names=sub("x.mod.e", "("%.%chngpt.var.name%.%"-chngpt)-", new.names)
        
    } else stop("wrong threshold type")
                    
    new.names=sub("^I\\((.+))$", "\\1", new.names)    
    new.names    
}


name.conversion.2=function(new.names, chngpt.var.name="x"){
    new.names=sub("x.uhinge.cubic", "("%.%chngpt.var.name%.%"-chngpt)-^3", new.names)    
    new.names=sub("x.uhinge.quad", "("%.%chngpt.var.name%.%"-chngpt)-^2", new.names)    
    new.names=sub("x.uhinge",   "("%.%chngpt.var.name%.%"-chngpt)-",   new.names)    
    new.names=sub("x.hinge.cubic", "("%.%chngpt.var.name%.%"-chngpt)+^3", new.names)    
    new.names=sub("x.hinge.quad", "("%.%chngpt.var.name%.%"-chngpt)+^2", new.names)    
    new.names=sub("x.hinge",   "("%.%chngpt.var.name%.%"-chngpt)+",   new.names)    
    new.names=sub("x.mi.e",   "("%.%chngpt.var.name%.%"-chngpt)",   new.names)    
    new.names=sub("intercept",   "(Intercept)",   new.names)    
    new.names    
}


# converting from parameterization used in sim.chngpt to that used in chngptm
convert.coef=function(coef.0, threshold.type) {
           
    if (threshold.type=="M12")  {
        coef.0["x"]=coef.0["(x-chngpt)-"]
        coef.0["(x-chngpt)+"]=coef.0["(x-chngpt)+"] - coef.0["x"]
        coef.0["(Intercept)"]= coef.0["(Intercept)"] - coef.0["x"]*coef.0["chngpt"]
    
    } else if (threshold.type=="M13")  {
        coef.0["x"]=coef.0["(x-chngpt)-"]
        coef.0["(x-chngpt)+"]=coef.0["(x-chngpt)+"] - coef.0["x"]
        coef.0["(Intercept)"]= coef.0["(Intercept)"] - coef.0["x"]*coef.0["chngpt"]
    
    } else if (threshold.type=="M21")  {
        coef.0["x"]=coef.0["(x-chngpt)+"]
        coef.0["(x-chngpt)-"]=coef.0["(x-chngpt)-"] - coef.0["x"]
        coef.0["(Intercept)"]= coef.0["(Intercept)"] - coef.0["x"]*coef.0["chngpt"]
    
    } else if (threshold.type=="M21c" | threshold.type=="M12c")  {
        coef.0["x"]=coef.0["(x-chngpt)+"]
        coef.0["(Intercept)"]= coef.0["(Intercept)"] - coef.0["x"]*coef.0["chngpt"]
    
    } else if (threshold.type=="M31")  {
        coef.0["x"]=coef.0["(x-chngpt)+"]
        coef.0["(x-chngpt)-"]=coef.0["(x-chngpt)-"] - coef.0["x"]
        coef.0["(Intercept)"]= coef.0["(Intercept)"] - coef.0["x"]*coef.0["chngpt"]
    
    } else if (threshold.type=="M22c") {
        coef.0["x"]=coef.0["(x-chngpt)+"]
        coef.0["(Intercept)"]= coef.0["(Intercept)"] - coef.0["x"]*coef.0["chngpt"]
    
    } else if (threshold.type=="M33c") {
        coef.0["x"]=coef.0["(x-chngpt)+"]
        coef.0=c(coef.0, "(x-chngpt)^2"=unname(coef.0["(x-chngpt)-^2"]))
        coef.0["(Intercept)"]= coef.0["(Intercept)"] - coef.0["x"]*coef.0["chngpt"]
    
    }
    coef.0
}


# important to include intercept it seems
threshold.func=function(threshold.type, coef, xx, x.name, include.intercept=FALSE) { 
  with(as.list(coef), switch(threshold.type 
  , segmented = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) + get("("%.%x.name%.%"-chngpt)+")*(xx>chngpt)*(xx-chngpt) 
  , M111      = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) + get("("%.%x.name%.%"-chngpt2)-")*(xx<=chngpt.2)*(xx-chngpt.2) + get("("%.%x.name%.%"-chngpt1)-")*(xx<=chngpt.1)*(xx-chngpt.1) 
  , hinge     = ifelse(include.intercept,get("(Intercept)"),0) +                  get("("%.%x.name%.%"-chngpt)+")*(xx>chngpt)*(xx-chngpt) 
  , M02       = ifelse(include.intercept,get("(Intercept)"),0) +                  get("("%.%x.name%.%"-chngpt)+")*(xx>chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)+^2")*(xx>chngpt)*(xx-chngpt)^2 
  , M03       = ifelse(include.intercept,get("(Intercept)"),0) +                  get("("%.%x.name%.%"-chngpt)+")*(xx>chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)+^2")*(xx>chngpt)*(xx-chngpt)^2 + get("("%.%x.name%.%"-chngpt)+^3")*(xx>chngpt)*(xx-chngpt)^3 
  , M04       = ifelse(include.intercept,get("(Intercept)"),0) +                  get("("%.%x.name%.%"-chngpt)+")*(xx>chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)+^2")*(xx>chngpt)*(xx-chngpt)^2 + get("("%.%x.name%.%"-chngpt)+^3")*(xx>chngpt)*(xx-chngpt)^3 + get("("%.%x.name%.%"-chngpt)+^4")*(xx>chngpt)*(xx-chngpt)^4 
  , upperhinge= ifelse(include.intercept,get("(Intercept)"),0) +                  get("("%.%x.name%.%"-chngpt)-")*(xx<chngpt)*(xx-chngpt) 
  , M20       = ifelse(include.intercept,get("(Intercept)"),0) +                  get("("%.%x.name%.%"-chngpt)-")*(xx<chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)-^2")*(xx<chngpt)*(xx-chngpt)^2
  , M30       = ifelse(include.intercept,get("(Intercept)"),0) +                  get("("%.%x.name%.%"-chngpt)-")*(xx<chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)-^2")*(xx<chngpt)*(xx-chngpt)^2 + get("("%.%x.name%.%"-chngpt)-^3")*(xx<chngpt)*(xx-chngpt)^3
  , M40       = ifelse(include.intercept,get("(Intercept)"),0) +                  get("("%.%x.name%.%"-chngpt)-")*(xx<chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)-^2")*(xx<chngpt)*(xx-chngpt)^2 + get("("%.%x.name%.%"-chngpt)-^3")*(xx<chngpt)*(xx-chngpt)^3 + get("("%.%x.name%.%"-chngpt)-^4")*(xx<chngpt)*(xx-chngpt)^4
  , M21       = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) + get("("%.%x.name%.%"-chngpt)-")*(xx<chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)-^2")*(xx<chngpt)*(xx-chngpt)^2
  , M21c      = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) +                                                           get("("%.%x.name%.%"-chngpt)-^2")*(xx<chngpt)*(xx-chngpt)^2
  , M12       = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) + get("("%.%x.name%.%"-chngpt)+")*(xx>chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)+^2")*(xx>chngpt)*(xx-chngpt)^2
  , M12c      = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) +                                                           get("("%.%x.name%.%"-chngpt)+^2")*(xx>chngpt)*(xx-chngpt)^2
  , M22       = ifelse(include.intercept,get("(Intercept)"),0) +                  get("("%.%x.name%.%"-chngpt)+")*(xx>chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)+^2")*(xx>chngpt)*(xx-chngpt)^2 + get("("%.%x.name%.%"-chngpt)-")*(xx<chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)-^2")*(xx<chngpt)*(xx-chngpt)^2
  , M22c      = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) +                                                           get("("%.%x.name%.%"-chngpt)+^2")*(xx>chngpt)*(xx-chngpt)^2 + get("("%.%x.name%.%"-chngpt)-^2")*(xx<chngpt)*(xx-chngpt)^2
  , M31       = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) + get("("%.%x.name%.%"-chngpt)-")*(xx<chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)-^2")*(xx<chngpt)*(xx-chngpt)^2 + get("("%.%x.name%.%"-chngpt)-^3")*(xx<chngpt)*(xx-chngpt)^3
  , M13       = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) + get("("%.%x.name%.%"-chngpt)+")*(xx>chngpt)*(xx-chngpt) + get("("%.%x.name%.%"-chngpt)+^2")*(xx>chngpt)*(xx-chngpt)^2 + get("("%.%x.name%.%"-chngpt)+^3")*(xx>chngpt)*(xx-chngpt)^3
  , M33c      = ifelse(include.intercept,get("(Intercept)"),0) + xx*get(x.name) +                                                           get("("%.%x.name%.%"-chngpt)^2")*(xx-chngpt)^2 + get("("%.%x.name%.%"-chngpt)+^3")*(xx>chngpt)*(xx-chngpt)^3 + get("("%.%x.name%.%"-chngpt)-^3")*(xx<chngpt)*(xx-chngpt)^3 
  , step      = ifelse(include.intercept,get("(Intercept)"),0) +                  get(""%.%x.name%.%">chngpt")  *(xx>chngpt)
  ))
}


predict.chngptm=function (object, newdata = NULL, type = c("link", "response", "terms"), ...){    
    
    if (is.null(object$best.fit)) stop("To make predictions, chngptm fit needs to have keep.best.fit=TRUE in the option.")
    
    type=match.arg(type)
    
    if (is.null(newdata)) {
        if (inherits(object$best.fit, "lmerMod")) {
            newdata=attr(object$best.fit, "frame")
            if(type=="terms") stop("terms not a supported type for lmer fit")
        } else {
            newdata=object$best.fit$data
        }
    } else {
        if (object$hinge.to.upperhinge) newdata[[object$chngpt.var]]=-newdata[[object$chngpt.var]]
        newdata = make.chngpt.var(newdata[[object$chngpt.var]], object$chngpt, object$threshold.type, newdata, object$b.transition)    
        newdata$chngpt.glm.offset=rep(0, nrow(newdata))
    }
        
    predict(object$best.fit, newdata, type=type, ...)
}



predictx=function(fit, boot.ci.type=c("perc","basic","symm"), alpha=0.05, xx=NULL, verbose=FALSE, return.boot=FALSE, include.intercept=FALSE, get.simultaneous=TRUE) {
    
    boot.ci.type<-match.arg(boot.ci.type)  
    
    threshold.type=fit$threshold.type
    
    if (is.null(xx)) {
        xx=fit$best.fit$data[[fit$chngpt.var]]
        xx=seq(min(xx), max(xx), length=100)
        xx=sort(c(xx, fit$chngpt))
    }
    
    yy=threshold.func(threshold.type, fit$coefficients, xx, fit$chngpt.var, include.intercept=include.intercept)
    
    if (is.null(fit$vcov$boot.samples)) return (list(xx=xx, yy=yy)    )
    
    yy.boot=apply(fit$vcov$boot.samples, 1, function(coef) threshold.func(threshold.type, coef, xx, fit$chngpt.var, include.intercept=include.intercept) )
    get.pointwise.ci=function(yy, yy.boot, boot.ci.type, alpha) {
        if (boot.ci.type=="perc") {
            point.ci=apply(yy.boot, 1, function(x) quantile(x, c(alpha/2,1-alpha/2)))
        } else if (boot.ci.type=="basic") {
            point.ci=apply(yy.boot, 1, function(x) quantile(x, c(alpha/2,1-alpha/2)))
            point.ci=rbind(2*yy-point.ci[2,], 2*yy-point.ci[1,])
        } else if (boot.ci.type=="symm") {
            q.1=apply(abs(yy.boot-yy), 1, function(x) quantile(x, 1-alpha) )        
            point.ci=rbind(yy-q.1, yy+q.1)
        } 
        point.ci
    }
        
    point.ci=get.pointwise.ci(yy, yy.boot, boot.ci.type, alpha)
    out = list(xx=xx, yy=yy, point.ci=point.ci, simul.ci=NULL)    
        
    # get simultaneous CI
    if (get.simultaneous) {
        alpha.cpy=alpha
        while(TRUE) {
            # compute simultaneous coverage
            simul.cvg=mean(sapply (1:ncol(yy.boot), function(j) {
                all(yy.boot[,j] >= point.ci[1,] & yy.boot[,j] <= point.ci[2,])
            }))
            if (verbose) {
                point.cvg=rowMeans(sapply (1:ncol(yy.boot), function(j) {
                    yy.boot[,j] >= point.ci[1,] & yy.boot[,j] <= point.ci[2,]
                }))        
                myprint(alpha, mean(point.cvg), simul.cvg)
            }
            
            if (round(simul.cvg,2)<1-alpha.cpy) {
                alpha=alpha-0.001
                if(abs(alpha)<1e-6) break
                point.ci=get.pointwise.ci(yy, yy.boot, boot.ci.type, alpha)
            } else {
                break
            }
        }
        if (abs(alpha)<1e-6) {
            warning("no simultaneous CI found")
        } else {
            out$simul.ci=point.ci
        }
    }
    
    if(return.boot) out$boot=yy.boot
    
    out
}

AIC.chngptm <- function(object, ...) {
    2+ AIC(object$best.fit)
}

# copied from tseriesEntropy package by Simone Giannerini<simone.giannerini@unibo.it>
surrogate.AR <- function(x, order.max=10, fit.method=c("yule-walker", "burg", "ols", "mle", "yw"), nsurr, wild=FALSE){
    fit.method <- match.arg(fit.method)
    n         <- length(x);
    x.ar      <- ar(x,method=fit.method,order.max = order.max, na.action=na.exclude);
    x.res.ar  <- x.ar$resid;
    ind       <- is.na(x.res.ar); x.res.ar[ind] <- median(x.res.ar,na.rm=T); # impute missing data with median
    if(!wild) x.res.ar  <- scale(x.res.ar,center=TRUE,scale=FALSE) # Recenters the residuals
    x.par.ar  <- x.ar$ar;             # Estimated parameters AR
    le.ar     <- x.ar$order;          # Estimated order of the AR model (AIC)
    x.surr    <- matrix(0,nrow=n,ncol=nsurr) # Matrix of the bootstrap replications
    for(i in (1:nsurr)) {
        if(wild) {
            #X=mysapply((le.ar+1):n, function (t) x[t-le.ar:1])
            #f=sqrt(1-diag(X%*%solve(t(X)%*%X, t(X))))
            x.resb <- abs(x.res.ar) * rnorm(n) #/ c(mean(f),f)
        } else {
            x.resb <- sample(x.res.ar,replace=TRUE)
        }
        x.surr[,i] <- arima.sim(n = n, list(ar = x.par.ar),innov=x.resb,n.start=le.ar+1);
    }
    return(list(surr=x.surr,call=match.call()));
}

#x=rnorm.ar(n=100, sd=1, rho=.9)
##x=rnorm(n=100)


# my implementation of awb
awb=function (resid, time, rho=.5) {
    n=length(time)
    rnorm.ar(n, 1, rho=rho) * abs(resid)
}

## the following function is modified from code downloaded at https://www.stephansmeekes.nl/code and published as part of friedrich2020statistical
### awb: autoregressive wild bootstrap
#awb <- function(resid,time,l=NULL){
#  n <- length(time)
#  theta <- 0.1; 
#  
#  # large l means large gamma, gamma is the autocorrelation coefficient
#  if (is.null(l)) {
#      # the following line corresponds to the downloaded code. note that it differs from the rule specified in friedrich2020statistical
#      #l = 1.75*(n)^(1/3) /365.25 /2
#      
#      # the following line corresponds to the rule specified in friedrich2020statistical
#      l = 1.75*(n)^(1/3)      
#  }
#    
#  L <- cholesky.decomp(time,theta^(1/l))
#  nu <- L%*%rnorm(n,mean=0, sd=1)
#  eps.star <- nu*resid
#  return(eps.star)
#}
### generate matrix L to get the right dependence structure of the bootstrap errors with missing data
#cholesky.decomp <- function(time,gamma){
#  n <- length(time)
#  omega <- matrix(data=0, nrow=n, ncol=n)
#  for (i in 1:n){
#    for (j in i:n){
#      if (i==j){
#        omega[i,j] <- 0.5
#      } else {
#        omega[i,j] <- gamma^(time[j]-time[i])
#      }
#    }
#  }
#  omega <- omega + t(omega)
#  L <- chol(omega)
#  return(L)
#}




## R code for validating 2D search
#X=cbind(1, dat$z, dat$x)
#A <- solve(t(X) %*% X)
#H <- X %*% A %*% t(X)
#dat$y %*% H %*% dat$y
#Y=dat$y
#r=Y-H%*%Y
#
#chngpt.var=dat$x
#stratified.by=dat$z>0
#order.=order(stratified.by, chngpt.var)
#chngpt.var.sorted=chngpt.var[order.]
#r=r[order.]
#v=cbind(chngpt.var.sorted,chngpt.var.sorted)
#v[12:20,1]=0
#v[1:11,1]=v[1:11,1]-v[2,1]
#v[1:2,1]=0
#v[1:11,2]=0
#v[12:20,2]=v[12:20,2]-v[13,2]
#v[11+1,2]=0
#v
#
#X1=X[order.,]
#
#t(v)%*%r
#t(v)%*%v
#t(t(v)%*%r) %*% solve(t(v)%*%v-t(v)%*%X1 %*% solve(t(X1) %*% X1) %*% t(X1)%*%v) %*% t(v)%*%r + dat$y %*% H %*% dat$y
#
#
#Xe=cbind(X[order.,],v)
#Y[order.] %*% Xe %*% solve(t(Xe) %*% Xe) %*% t(Xe) %*% Y[order.]
Any scripts or data that you put into this service are public.
chngpt documentation built on April 12, 2025, 2:27 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
chngpt
Estimation and Hypothesis Testing for Threshold Regression

R/chngptm.R
In chngpt: Estimation and Hypothesis Testing for Threshold Regression

Documented in AIC.chngptm chngptm chngptm.xy chngptm.xy chngptm.xy coef.chngptm convert.coef lincomb logLik.chngptm plot.chngptm predictx print.chngptm residuals.chngptm summary.chngptm threshold.func vcov.chngptm

Try the chngpt package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

chngpt Estimation and Hypothesis Testing for Threshold Regression

R/chngptm.R In chngpt: Estimation and Hypothesis Testing for Threshold Regression

Documented in AIC.chngptm chngptm chngptm.xy chngptm.xy chngptm.xy coef.chngptm convert.coef lincomb logLik.chngptm plot.chngptm predictx print.chngptm residuals.chngptm summary.chngptm threshold.func vcov.chngptm

Try the chngpt package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

chngpt
Estimation and Hypothesis Testing for Threshold Regression

R/chngptm.R
In chngpt: Estimation and Hypothesis Testing for Threshold Regression