lfe: Linear Group Fixed Effects

# $Id: oldfelm.R 1942 2016-04-07 21:25:18Z sgaure $
# Author: Simen Gaure
# Copyright: 2011, Simen Gaure
# Licence: Artistic 2.0




#   Some things in this file is done in a weird way.
#   In some cases there are efficiency reasons for this, e.g. because
#   the "standard" way of doing things may result in a copy which is costly
#   when the problem is *large*.
#   In other cases it may simply be due to the author's unfamiliarity with how
#   things should be done in R
 
# parse our formula
oldparseformula <- function(formula,data) {

  trm <- terms(formula,specials=c('G'))
  feidx <- attr(trm,'specials')$G+1
  va <- attr(trm,'variables')
  festr <- paste(sapply(feidx,function(i) deparse(va[[i]])),collapse='+')

  if(festr != '') { 
    .Deprecated(msg="The G() syntax is deprecated, please use multipart formulas instead")
    # remove the G-terms from formula
    formula <- update(formula,paste('. ~ . -(',festr,') - 1'))
    
    # then make a list of them, and find their names
    felist <- parse(text=paste('list(',gsub('+',',',festr,fixed=TRUE),')',sep=''))
    nm <- eval(felist,list(G=function(arg) deparse(substitute(arg))))
    
    # replace G with factor, eval with this, and the parent frame, or with data
    # allow interaction factors with '*' (dropped, never documented, use ':')
    Gfunc <- function(f) if(is.null(attr(f,'xnam'))) factor(f) else f
    Ginfunc <- function(x,f) {
      if(is.factor(x)) {
        structure(interaction(factor(f),factor(x),drop=TRUE),xnam=deparse(substitute(x)),fnam=deparse(substitute(f)))
      } else {
        structure(factor(f),x=x,xnam=deparse(substitute(x)), fnam=deparse(substitute(f)))
      }
    }
    
    if(is.environment(data)) {
      fl <- eval(felist,list(G=Gfunc, ':'=Ginfunc),data)
    } else {
      fl <- local({eval(felist,data)},list(G=Gfunc, ':'=Ginfunc))
    }
    names(fl) <- nm
    gpart <- eval(parse(text=paste('~',paste(nm,collapse='+'))))

    if(is.null(names(fl))) names(fl) <- paste('fe',1:length(fl),sep='')
  } else {
    fl <- NULL
    gpart <- ~0
  }
  return(list(formula=formula, fl=fl, gpart=gpart,ivpart=~0,cpart=~0))
}

# parse 
# use 2-part Formulas without G() syntax, like
# y ~ x1 + x2 | f1+f2
# or 3-part or more Formulas with iv-specification like
# y ~ x1 + x2 | f1+f2 | (q|w ~ x3+x4) | c1+c2
# returns a list containing
# formula=y~x1+x2
# fl = list(f1,f2)
# ivpart = list(q ~x3+x4, w ~x3+x4)
# cluster=list(c1,c2)
nopart <- function(x) length(all.vars(x))==0

parseformula <- function(form, data, noexpand=FALSE) {
  f <- as.Formula(form)
  len <- length(f)[[2]]
  if(len == 1) return(oldparseformula(form,data))
  opart <- formula(f,lhs=NULL,rhs=1)
  if(len == 1) return(list(formula=opart,gpart=~0,ivpart=~0,cpart=~0))

  # the factor part
  gpart <- formula(f,lhs=0, rhs=2)
  if(!nopart(gpart)) {
    tm <- terms(gpart, keep.order=TRUE)
    ft <- attr(tm,'factors')
    var <- eval(attr(tm,'variables'),data)
    varnames <- rownames(ft)
    names(var) <- varnames
    fl <- apply(ft, 2, function(v) {
      nonz <- sum(v > 0)
      vnam <- varnames[which(v > 0)]
      if(nonz > 2) stop('Interaction only supported for two variables')
      if(nonz == 1) {
#        if(!is.factor(var[[vnam]])) warning('non-factor ',vnam, ' coerced to factor')
        res <- list(factor(var[[vnam]]))
        names(res) <- vnam
      } else {
        xnam <- vnam[[1]]
        fnam <- vnam[[2]]
        x <- var[[xnam]]
        f <- var[[fnam]]
        if(!is.factor(f) && !is.factor(x)) {
          stop('interaction between ', xnam, ' and ', fnam, ', none of which are factors')
        }
        if(!is.factor(f) && is.factor(x)) {
          tmp <- x
          x <- f
          f <- tmp
          tmp <- xnam
          xnam <- fnam
          fnam <- tmp
        }
        if(is.factor(x)) {
          res <- list(structure(interaction(factor(f),factor(x),drop=TRUE),xnam=xnam,fnam=fnam))
        } else {
          res <- list(structure(factor(f),x=x,xnam=xnam, fnam=fnam))
        }
        names(res) <- paste(xnam,fnam,sep=':')
      }
      res
    })
    nm <- names(fl)
    fl <- unlist(fl, recursive=FALSE)
    names(fl) <- nm
  } else {
    fl <- NULL
  }
  
  if(len == 2) return(list(formula=opart,fl=fl,gpart=gpart,ivpart=~0,cpart=~0))

  # Then the iv-part
  ivparts <- formula(f,lhs=0,rhs=3, drop=TRUE)
  if(!nopart(ivparts) && length(ivparts[[2]])>1 && ivparts[[2]][[1]]=='(') {
      # Now, make a list of the iv-formulas where we split the lhs in each
      # to obtain q ~ x3+x4, w ~x3+x4
      ivspec <- as.Formula(ivparts[[2]][[2]])   # it's now q|w ~ x3+x4
      lhs <- formula(ivspec,rhs=0)
      ivpart <- lapply(seq_along(all.vars(lhs)),function(i) formula(ivspec,lhs=i))
  } else {
      ivpart <- NULL
  }

  if(len == 3 && !is.null(ivpart)) return(list(formula=opart,fl=fl,iv=ivpart,gpart=gpart,ivpart=ivparts,cpart=~0))
  
  # The cluster part, this could be the third part if there are no parentheses
  if(len == 3 && is.null(ivpart)) {
    cpart <- ivparts
    ivparts <- NULL
  } else {
    cpart <- formula(f,lhs=0,rhs=4,drop=TRUE)
  }
  if(!nopart(cpart)) {
      # handle the same way as the factors, but without the covariate interaction
      tm <- terms(cpart, keep.order=TRUE)
      nm <- parts <- attr(tm,'term.labels')
      clist <- lapply(paste('factor(',parts,')',sep=''),function(e) parse(text=e))
      cluster <- lapply(clist,eval,data)
      names(cluster) <- nm
      
  } else {
      cluster <- NULL
  }
  list(formula=opart,fl=fl,iv=ivpart,cluster=cluster,gpart=gpart,ivpart=ivparts,cpart=cpart)
}


# ivresid is optional, used in 2. stage of 2sls to pass
# the difference between the original endogenous variable and the prediction
# for the purpose of computing sum of square residuals
doprojols <- function(psys, ivresid=NULL, exactDOF=FALSE, keepX=FALSE, nostats=FALSE) {
  if(is.numeric(exactDOF)) {
    df <- exactDOF
    totvar <- length(psys$y) - df
  } else {
    # numrefs is also used later
    numrefs <- nrefs(psys$fl, compfactor(psys$fl), exactDOF) 
    totvar <- totalpvar(psys$fl)-numrefs
    df <- length(psys$y)-totvar
  }
  if(is.null(psys$yxz$x)) {
    # No covariates
        z <- list(N=psys$N, r.residuals=psys$y,fe=psys$fl,p=totvar,Pp=0,cfactor=compfactor(psys$fl),
                  na.action=psys$na.action, contrasts=psys$contrasts,
                  fitted.values=psys$y - psys$yxz$y,
                  coefficients=matrix(double(0),psys$N,0),
                  df=df,
                  nostats=FALSE,
                  model.assign=psys$assign,
                  model.labels=psys$model.labels,
                  residuals=psys$yxz$y,clustervar=psys$clustervar, call=match.call())
        z$df.residual <- z$df
        class(z) <- 'felm'
        return(z)
    }
  yz <- psys$yxz$y
  xz <- psys$yxz$x
  y <- psys$y
  x <- psys$x
  fl <- psys$fl
  icpt <- psys$icpt
# here we just do an lm.fit, however lm.fit is quite slow since
# it doesn't use blas (in particular it can't use e.g. threaded blas in acml)
# so we have rolled our own.

# we really don't return an 'lm' object or other similar stuff, so
# we should consider using more elementary operations which map to blas-3
# eg. solve(crossprod(xz),t(xz) %*% yz)
# Or, even invert by solve(crossprod(xz)) since we need
# the diagonal for standard errors.  We could use the cholesky inversion
# chol2inv(chol(crossprod(xz)))
  cp <- crossprod(xz)
  b <- crossprod(xz,yz)
  ch <- cholx(cp)
    #  ch <- chol(cp)
    #  beta <- drop(inv %*% (t(xz) %*% yz))
    # remove multicollinearities
  badvars <- attr(ch,'badvars')

  if(is.null(badvars)) {
    beta <- backsolve(ch,backsolve(ch,b,transpose=TRUE))
#    beta <- as.vector(beta)
#    beta <- as.vector(backsolve(ch,backsolve(ch,b,transpose=TRUE)))
    if(!nostats) inv <- chol2inv(ch)
  } else {
    beta <- matrix(NaN, nrow(cp), ncol(b))
#    beta <- rep(NaN,nrow(cp))
    beta[-badvars,] <- backsolve(ch,backsolve(ch,b[-badvars,], transpose=TRUE))
    if(!nostats) {
      inv <- matrix(NA,nrow(cp),ncol(cp))
      inv[-badvars,-badvars] <- chol2inv(ch)
    }
  }
  rm(ch, b, cp)

    
  if(length(fl) > 0 && icpt > 0) 
    rownames(beta) <- colnames(x)[-icpt] else rownames(beta) <- colnames(x)
  colnames(beta) <- colnames(y)
  if(ncol(beta) == 1) names(beta) <- rownames(beta)
  
  z <- list(coefficients=beta,badconv=psys$badconv,Pp=ncol(xz))


  z$N <- nrow(xz)
  z$p <- ncol(xz) - length(badvars)
  if(!nostats) {
    z$inv <- inv
    inv <- nazero(inv)
  }
# how well would we fit with all the dummies?
# the residuals of the centered model equals the residuals
# of the full model, thus we may compute the fitted values
# resulting from the full model.

# for the 2. step in the 2sls, we should replace 
# the instrumented variable with the real ones (the difference is in ivresid)
# when predicting, but only for the purpose of computing
# residuals.

  nabeta <- nazero(beta)

  zfit <- xz %*% nabeta
  zresid <- yz - zfit
  z$beta <- beta
  z$response <- y
  z$fitted.values <- y - zresid
  z$residuals <- zresid
  z$contrasts <- psys$contrasts
  z$model.assign <- psys$model.assign
  z$model.labels <- psys$model.labels
  if(length(fl) > 0) {
  # insert a zero at the intercept position (x may have an intercept, whereas xz has not)
#    if(icpt > 0) ibeta <- append(beta,0,after=icpt-1) else ibeta <- beta
    if(icpt > 0) {
      pre <- seq_len(icpt-1)
      post <- setdiff(seq_len(nrow(beta)), pre)
      ibeta <- rbind(beta[pre,,drop=FALSE], 0, beta[post,,drop=FALSE])
    } else {
      ibeta <- beta
    }

    pred <- x %*% ifelse(is.na(ibeta),0,ibeta)
    z$r.residuals <- y - pred
  } else {
    z$r.residuals <- zresid
  }
  rm(x)


  z$lhs <- colnames(beta)

  # the residuals should be the residuals from the original endogenous variables, not the predicted ones
  # the difference are the ivresid, which we must multiply by beta and subtract.
  # the residuals from the 2nd stage are in iv.residuals
  # hmm, what about the r.residuals?  We modify them as well. They are used in kaczmarz().
  if(!is.null(ivresid)) {
    if(!is.matrix(ivresid)) {
      nm <- names(ivresid)
      ivresid <- matrix(unlist(ivresid),z$N)
      colnames(ivresid) <- nm
    }
    z$ivresid <- ivresid %*% nabeta[colnames(ivresid),,drop=FALSE]
    z$iv.residuals <- z$residuals
    z$residuals <- z$residuals - z$ivresid    
    z$r.iv.residuals <- z$r.residuals
    z$r.residuals <- z$r.residuals - z$ivresid
  }
  
  z$terms <- psys$terms
  z$cfactor <- compfactor(fl)
  totlev <- totalpvar(fl)
  if(is.numeric(exactDOF)) {
    z$df <- exactDOF
    numdum <- z$N - z$p - z$df
    z$numrefs <- totlev - numdum
  } else {
    numdum <- totlev - numrefs
    z$numrefs <- numrefs
    z$df <- z$N - z$p - numdum
  }
  z$df.residual <- z$df
  z$rank <- z$N - z$df
  
  z$exactDOF <- exactDOF

  z$fe <- fl
# should we subtract 1 for an intercept?
# a similar adjustment is done in summary.felm when computing rdf
  z$p <- z$p + numdum - 1  
  z$xp <- z$p
  z$na.action <- psys$na.action
  class(z) <- 'felm'
  cluster <- psys$clustervar
  z$clustervar <- cluster

  if(nostats) {
    z$nostats <- TRUE
    return(z)
  }

  z$nostats <- FALSE
# then we go about creating the covariance matrices and tests
# if there is a single lhs, they are just stored as matrices etc
# in z.  If there are multiple lhs, these quantities are inserted
# in a list z$STATS indexed by z$lhs
# indexed by the name of the lhs

  vcvnames <- list(rownames(beta), rownames(beta))
  Ncoef <- nrow(beta)

  singlelhs <- length(z$lhs) == 1
  if(!singlelhs) z$STATS <- list()

  for(lhs in z$lhs) {
    res <- z$residuals[,lhs]

#  if(!is.null(ivresid)) res <- res - z$ivresid

    vcvfactor <- sum(res**2)/z$df

# when multiple lhs, vcvfactor is a vector
# we need a list of vcvs in this case

    if(singlelhs) {
      z$vcv <- z$inv * vcvfactor
      setdimnames(z$vcv, vcvnames)
    } else {
      z$STATS[[lhs]] <- list()
      z$STATS[[lhs]]$vcv <- z$inv * vcvfactor
      setdimnames(z$STATS[[lhs]]$vcv, vcvnames)
    }

#  dimnames(z$vcv) <- list(names(beta),names(beta))

  # We should make the robust covariance matrix too.
  # it's inv * sum (X_i' u_i u_i' X_i) * inv
  # where u_i are the (full) residuals (Wooldridge, 10.5.4 (10.59))
  # i.e. inv * sum(u_i^2 X_i' X_i) * inv
  # for large datasets the sum is probably best computed by a series of scaled
  # rank k updates, i.e. the dsyrk blas routine, we make an R-version of it.
  # need to check this computation, the SE's are slightly numerically different from Stata's.
  # it seems stata does not do the small-sample adjustment
    dfadj <- z$N/z$df

  # Now, here's an optimzation for very large xz. If we use the wcrossprod and ccrossprod
  # functions, we can't get rid of xz, we end up with a copy of it which blows away memory.
  # we need to scale xz with the residuals in xz, but we don't want to expand res to a full matrix,
  # and even get a copy in the result.
  # Thus we modify it in place with a .Call. The scaled variant is also used in the cluster computation.
#  z$robustvcv <- dfadj * inv %*% wcrossprod(xz,res) %*% inv

    rscale <- ifelse(res==0,1e-40,res)
    .Call(C_scalecols, xz, rscale)
    if(singlelhs) {
      z$robustvcv <- dfadj * inv %*% crossprod(xz) %*% inv
      setdimnames(z$robustvcv, vcvnames)
    } else {
      z$STATS[[lhs]]$robustvcv <- dfadj * inv %*% crossprod(xz) %*% inv
      setdimnames(z$STATS[[lhs]]$robustvcv, vcvnames)
    }


  # then the clustered covariance matrix
    if(!is.null(cluster)) {
      method <- attr(cluster,'method')
      if(is.null(method)) method <- 'cgm'
      dfadj <- (z$N-1)/z$df
      d <- length(cluster)
      if(method == 'cgm') {
#        meat <- matrix(0,nrow(z$vcv),ncol(z$vcv))
        meat <- matrix(0,Ncoef,Ncoef)
        for(i in 1:(2^d-1)) {
          # Find out which ones to interact
          iac <- as.logical(intToBits(i))[1:d]
          # odd number is positive, even is negative
          sgn <- 2*(sum(iac) %% 2) - 1
          # interact the factors
          ia <- factor(do.call(paste,c(cluster[iac],sep='\004')))
          adj <- sgn*dfadj*nlevels(ia)/(nlevels(ia)-1)
          .Call(C_dsyrk,1,meat,adj,rowsum(xz,ia))
        }
        if(singlelhs) {
          z$clustervcv <- inv %*% meat %*% inv
          setdimnames(z$clustervcv, vcvnames)
        } else {
          z$STATS[[lhs]]$clustervcv <- inv %*% meat %*% inv
          setdimnames(z$STATS[[lhs]]$clustervcv, vcvnames)
        }
        rm(meat)
        ## } else if(method == 'gaure') {
        ##   .Call(C_scalecols, xz, 1/rscale)
        ##   meat <- matrix(0,nrow(z$vcv),ncol(z$vcv))
        ##   dm.res <- demeanlist(res,cluster)
        ##   skel <- lapply(cluster, function(f) rep(0,nlevels(f)))
        ##   means <- relist(kaczmarz(cluster,res-dm.res), skel)
        ##   scale <- ifelse(dm.res==0,1e-40, dm.res)
        ##   .Call(C_scalecols, xz, scale)
        ##   .Call(C_dsyrk, 1, meat, dfadj, xz)
        ##   .Call(C_scalecols, xz, 1/scale)
        ##   for(i in seq_along(cluster)) {
        ##     rs <- rowsum(xz, cluster[[i]])
        ##     adj <- nlevels(cluster[[i]])/(nlevels(cluster[[i]])-1)
        ##     .Call(C_scalecols, rs, means[[i]])
        ##     .Call(C_dsyrk, 1, meat, dfadj*adj, rs)
        ##   }
        ##   rm(xz,rs)
        ##   z$clustervcv <- inv %*% meat %*% inv
        ##   rm(meat)
      } else {
        stop('unknown multi way cluster algorithm:',method)
      }
      
      
      if(singlelhs) {
        z$cse <- sqrt(diag(z$clustervcv))
        z$ctval <- coef(z)/z$cse
        z$cpval <- 2*pt(abs(z$ctval),z$df,lower.tail=FALSE)
      } else {
        z$STATS[[lhs]]$cse <- sqrt(diag(z$STATS[[lhs]]$clustervcv))
        z$STATS[[lhs]]$ctval <- z$coefficients[,lhs]/z$STATS[[lhs]]$cse
        z$STATS[[lhs]]$cpval <- 2*pt(abs(z$STATS[[lhs]]$ctval),z$df,lower.tail=FALSE)
      }
    }
    if(singlelhs) {
      z$se <- sqrt(diag(z$vcv))
      z$tval <- z$coefficients/z$se
      z$pval <- 2*pt(abs(z$tval),z$df,lower.tail=FALSE)

      z$rse <- sqrt(diag(z$robustvcv))
      z$rtval <- coef(z)/z$rse
      z$rpval <- 2*pt(abs(z$rtval),z$df,lower.tail=FALSE)
    } else {
      z$STATS[[lhs]]$se <- sqrt(diag(z$STATS[[lhs]]$vcv))
      z$STATS[[lhs]]$tval <- z$coefficients[,lhs]/z$STATS[[lhs]]$se
      z$STATS[[lhs]]$pval <- 2*pt(abs(z$STATS[[lhs]]$tval),z$df,lower.tail=FALSE)

      z$STATS[[lhs]]$rse <- sqrt(diag(z$STATS[[lhs]]$robustvcv))
      z$STATS[[lhs]]$rtval <- z$coefficients[,lhs]/z$STATS[[lhs]]$rse
      z$STATS[[lhs]]$rpval <- 2*pt(abs(z$STATS[[lhs]]$rtval),z$df,lower.tail=FALSE)
    }
    # reset this for next lhs
    if(!singlelhs) .Call(C_scalecols, xz, 1/rscale)
  }
  
  z
}


project <- function(mf,fl,data,contrasts,clustervar=NULL,pf=parent.frame()) {

  m <- match(c("formula", "data", "subset", "na.action"), names(mf), 0L)
  mf <- mf[c(1L, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1L]] <- quote(model.frame)
  subspec <- mf[['subset']]

  # we should handle multiple lhs
  # but how?  model.frame() doesn't handle it, but we need
  # model.frame for subsetting and na.action, with the left hand side
  # included.  We create an artifical single lhs by summing the left hand
  # sides, just to get hold of the rhs.  Then we extract the left hand side

  Form <- as.Formula(mf[['formula']])
  mf[['formula']] <- Form
  mf <- eval(mf, pf)
  mt <- attr(mf,'terms')
  naact <- attr(mf,'na.action')
  if(!is.null(naact))
    naclass <- class(naact)
  fullN <- nrow(mf) + length(naact)
  # then obtain the response matrix through Formula::model.part
  response <- as.matrix(model.part(Form, mf, lhs=NULL,rhs=0))


  cmethod <- attr(clustervar,'method')
  if(!is.null(clustervar)) {
      if(is.character(clustervar)) clustervar <- as.list(clustervar)
      if(!is.list(clustervar)) clustervar <- list(clustervar)
      clustervar <- lapply(clustervar, function(cv) {
          if(!is.character(cv)) factor(cv) else factor(data[,cv])
      })
  }
  # we need to change clustervar and factor list to reflect
  # subsetting and na.action. na.action is ok, it's set as an attribute in mf
  # but subset must be done manually. It's done before na handling
  if(!is.null(subspec)) {
    subs <- eval(subspec,pf)
    if(!is.null(clustervar)) clustervar <- lapply(clustervar,function(cv) cv[subs])
    fl <- lapply(fl, function(fac) {
      f <- factor(fac[subs])
      x <- attr(f,'x')
      if(is.null(x)) return(f)
      structure(f,x=x[subs])
    })
                 
  }
  if(!is.null(naact)) {
    if(!is.null(clustervar)) clustervar <- lapply(clustervar, function(cv) cv[-naact])
    fl <- lapply(fl,function(fac) {
      f <- factor(fac[-naact])
      x <- attr(f,'x')
      if(is.null(x)) return(f)
      structure(f,x=x[-naact])
    })
  }

  attr(clustervar,'method') <- cmethod

#  ret <- list(fl=fl, na.action=naact,terms=mt,clustervar=clustervar, y=model.response(mf,'numeric'))
  ret <- list(fl=fl, na.action=naact, fullN=fullN, terms=mt,clustervar=clustervar, y=response)  
  rm(mt,clustervar,naact)

  lapply(ret$clustervar, function(f)
         if(length(f) != nrow(ret$y)) stop('cluster factors are not the same length as data ',
                      length(f),'!=',nrow(ret$y)))

  # in case of cluster factor specified with the clustervar argument:

# try a sparse model matrix to save memory when removing intercept
# though, demeanlist must be full.  Ah, no, not much to save because
# it won't be sparse after centering
# we should rather let demeanlist remove the intercept, this
# will save memory by not copying.  But we need to remove it below in x %*% beta
# (or should we extend beta with a zero at the right place, it's only
#  a vector, eh, is it, do we not allow matrix lhs? No.)

# we make some effort to avoid copying the data matrix below
# this includes assigning to lists in steps, with gc() here and there.
# It's done for R 3.0.2. The copy semantics could be changed in later versions.

#  ret$x <- model.matrix(ret$terms,mf,contrasts)
  ret$x <- model.matrix(Form, mf, contrasts)
  rm(mf)
  ret$contrasts <- attr(ret$x,'contrasts')
  ret$model.assign <- attr(ret$x, 'assign')
  ret$model.labels <- attr(terms(Form[-2]), 'term.labels')   # ditch lhs when finding terms
  icpt <- attr(ret$x,'assign') == 0
  if(!any(icpt)) icpt <- 0 else icpt <- which(icpt)
  ret$icpt <- icpt

  ncov <- ncol(ret$x) - (icpt > 0)
  if(ncov == 0) {
    ret$x <- NULL
    ret$yxz <- list(y=demeanlist(ret$y,fl))
    ret$Pp <- 0
    ret$N <- length(ret$y)
    ret$yx <- list(y=ret$y)
    return(ret)
  }

  # here we need to demean things
  # we take some care so that unexpected copies don't occur
  # hmm, the list() copies the stuff. How can we avoid a copy
  # and still enable parallelization over y and x in demeanlist? A vararg demeanlist?
  # I.e. an .External version?  Yes.
#  yx <- list(y=ret$y, x=ret$x)
#  gc()
#  ret$yxz <- demeanlist(yx,fl,icpt)
#  rm(fl,yx); gc()

#  ret$yxz <- edemeanlist(y=ret$y,x=ret$x,fl=fl,icpt=c(0,icpt))
  ret$yxz <- demeanlist(list(y=ret$y,x=ret$x),fl=fl,icpt=c(0,icpt))
  ret$badconv <- attr(ret$yxz$x,'badconv') + attr(ret$yxz$y,'badconv')
  # use our homebrewn setdimnames instead of colnames. colnames copies.
  if(length(fl) > 0) {
    if(icpt == 0)
      setdimnames(ret$yxz$x, list(NULL,colnames(ret$x)))
    else 
      setdimnames(ret$yxz$x, list(NULL,colnames(ret$x)[-icpt]))
  }

  ret
}

#' @export
..oldfelm <- function(formula, data, exactDOF=FALSE, subset, na.action, contrasts=NULL,...) {

  knownargs <- c('iv', 'clustervar', 'cmethod', 'keepX', 'nostats')
  keepX <- FALSE
  cmethod <- 'cgm'
  iv <- NULL
  clustervar <- NULL
  nostats <- FALSE

  deprec <- c('iv', 'clustervar')

#  sc <- names(sys.call())[-1]
#  named <- knownargs[pmatch(sc,knownargs)]
#  for(arg in c('iv', 'clustervar')) {
#    if(!is.null(eval(as.name(arg))) && !(arg %in% named)) {
#        warning("Please specify the '",arg,"' argument by name, or use a multi part formula. Its position in the argument list will change in a later version")
#      }
#  }

  mf <- match.call(expand.dots = TRUE)

  # Currently there shouldn't be any ... arguments
  # check that the list is empty

#  if(length(mf[['...']]) > 0) stop('unknown argument ',mf['...'])
  
  # When moved to the ... list, we use this:
  # we do it right away, iv and clustervar can't possibly end up in ... yet, not with normal users


  args <- list(...)
  ka <- knownargs[pmatch(names(args),knownargs, duplicates.ok=FALSE)]
  names(args)[!is.na(ka)] <- ka[!is.na(ka)]
  dpr <- deprec[match(ka, deprec)]
  if(any(!is.na(dpr))) {
    bad <- dpr[which(!is.na(dpr))]
    warning('Argument(s) ',paste(bad,collapse=','), ' are deprecated and will be removed, use multipart formula instead')
  }
  env <- environment()
  lapply(intersect(knownargs,ka), function(arg) assign(arg,args[[arg]], pos=env))
  if(!(cmethod %in% c('cgm','gaure'))) stop('Unknown cmethod: ',cmethod)

  # also implement a check for unknown arguments
  unk <- setdiff(names(args), knownargs)
  if(length(unk) > 0) stop('unknown arguments ',paste(unk, collapse=' '))

  if(missing(data)) mf$data <- data <- environment(formula)
  pf <- parent.frame()
  pform <- parseformula(formula,data)
  
  if(!is.null(iv) && !is.null(pform[['iv']])) stop("Specify EITHER iv argument(deprecated) OR multipart terms, not both")
  if(!is.null(pform[['cluster']]) && !is.null(clustervar)) stop("Specify EITHER clustervar(deprecated) OR multipart terms, not both")
  if(!is.null(pform[['cluster']])) clustervar <- structure(pform[['cluster']], method=cmethod)
  if(is.null(iv) && is.null(pform[['iv']])) {
    # no iv, just do the thing
    fl <- pform[['fl']]
    formula <- pform[['formula']]
    mf[['formula']] <- formula
    psys <- project(mf,fl,data,contrasts,clustervar,pf)
    z <- doprojols(psys,exactDOF=exactDOF, nostats=nostats[1])
    if(keepX) z$X <- if(psys$icpt > 0) psys$x[,-psys$icpt] else psys$x
    rm(psys)

    z$parent.frame <- pf
    z$call <- match.call()
    return(z)
  }

  # IV.  Clean up formulas, set up for 1st stages
  if(!is.null(iv)) {
    # warning("argument iv is deprecated, use multipart formula instead")
    if(!is.list(iv)) iv <- list(iv)
    form <- pform[['formula']]
    # Old syntax, the IV-variables are also in the main equation, remove them
    for(ivv in iv) {
      ivnam <- ivv[[2]]
      # create the new formula by removing the IV lhs.
      form <- update(form, substitute(. ~ . - Z,list(Z=ivnam)))
    }
    pform[['formula']] <- form
    mf[['iv']] <- NULL
    ivpart <- NULL
  } else {
    iv <- pform[['iv']]
    ivpart <- as.Formula(pform[['ivpart']])
  }


  if(is.environment(data)) {
    ivenv <- new.env(parent=data)
  } else {
    ivenv <- new.env(parent=pf)
  }


  if(!is.null(ivpart)) {
    # ivpart is something like ~(P|Q ~ x+x2)
    # strip ~ and ()
    ivpart <- as.Formula(ivpart[[2]][[2]])
    lhs <- formula(ivpart, lhs=NULL, rhs=0)
    rhs <- as.Formula(formula(ivpart, lhs=0, rhs=1))
    if(length(ivpart)[[2]] > 1) {
      stop("Instruments can't be projected out: ", ivpart)
      rhsg <- as.Formula(formula(ivpart, lhs=0, rhs=2))
    } else {
      rhsg <- list(NULL,NULL)
    }
    Form <- as.Formula(formula)
    if(length(Form)[2] == 4) {
      cluform <- formula(Form,lhs=0,rhs=4)[[2]]
      step1form <- as.Formula(substitute(L ~ B + ivO | G|0|C,
                                         list(L=lhs[[2]], B=pform[['formula']][[3]],
                                              ivO=rhs[[2]],
                                              G=pform[['gpart']][[2]],
                                              C=cluform)))
      
    } else {
      step1form <- as.Formula(substitute(L ~ B + ivO | G,
                                         list(L=lhs[[2]], B=pform[['formula']][[3]],
                                              ivO=rhs[[2]],
                                              G=pform[['gpart']][[2]])))
    }
    environment(step1form) <- environment(formula)


    ivform <- parseformula(step1form,data)
    fl <- ivform[['fl']]
    mf[['formula']] <- ivform[['formula']]
    environment(mf[['formula']]) <- environment(formula)
    psys <- project(mf,fl,data,contrasts,clustervar,pf)

    if(length(nostats) == 2)
        nost <- nostats[2]
    else
        nost <- nostats[1]
    z1 <- doprojols(psys, exactDOF=exactDOF, nostats=nost)
    # put the fitted values in ivenv
    for(n in colnames(z1$fitted.values)) {
      nn <- paste(n,'(fit)',sep='')
      assign(nn,z1$fitted.values[,n],envir=ivenv)
    }
    z1$endogvars <- paste('`',colnames(z1$fitted.values),'(fit)`',sep='')

    # we should not use all.vars(rhs), to find the instruments, but
    # pick up things from z1 somehow, in case there are expanded
    # factors as instrumental variables
    # in z1 there are model.assign and model.labels.
    # we locate the instrument names (rhs) in model.labels, and extract the coefficient names from model.assign
    cname <- rownames(z1$coefficients)
    ivnam <- all.vars(rhs)
    asgn <- z1$model.assign
    if(length(asgn) != length(cname)) asgn <- asgn[asgn!=0]
    # now assume there's a factor f among the instruments, with levels f2-f4 (1 is a reference)
    # we look up 'f' in lab, it has position 5, then everything with assign == 5 belong to this factor
    ivars <- cname[asgn %in% which(z1$model.labels %in% ivnam)]
    z1$instruments <- ivars
    # Store any dummy instruments in the ivenv, for possible later use in condfstat
    # There's a psys$x which is the model matrix
    # we may as well store all the instruments there
    for(ivar in ivars) {
      if(!(ivar %in% ivnam))
          assign(ivar,psys$x[,ivar],envir=ivenv)
    }
    if(!nost) {
      z1$iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1,ivars, lhs=lh))
      names(z1$iv1fstat) <- z1$lhs
      z1$rob.iv1fstat <- lapply(z1$lhs, function(lh) waldtest(z1,ivars,type='robust', lhs=lh))
      names(z1$rob.iv1fstat) <- z1$lhs
    }

    z1$call <- match.call()
    environment(step1form) <- environment(formula)
    z1$call[['formula']] <- step1form

    naact <- psys$na.action

    FIT <- as.formula(paste('~',paste(z1$endogvars,sep='', collapse='+'),sep=''))
    step2form <- as.Formula(substitute(y ~ B + FIT | G,
                                    list(y=pform[['formula']][[2]],
                                         B=pform[['formula']][[3]],
                                         FIT=FIT[[2]],
                                         G=pform[['gpart']][[2]])))

    # do the first step
    environment(step2form) <- environment(formula)

    form2 <- parseformula(step2form, data)
    fl <- form2[['fl']]
    formula <- form2[['formula']]
    environment(formula) <- ivenv
    mf$formula <- formula
    if(is.environment(mf$data)) mf$data <- ivenv
    # remove the naact from 1st step
    # any missings in the exogenous variables will be missing in both the 1st and 2nd stage
    # If there are missing instruments or endogenous variables they will be missing in 1st stage, but not in the 2nd
    # so we must make them missing in the 2nd stage as well. We implement that as a subset.
    if(!is.null(naact)) {
      # naact is a vector of observations to remove
      # numbered after a subset has been taken.
      # if there's no subset in mf, we create one
      subset <- mf[['subset']]
      if(is.null(subset)) {
        mf[['subset']] <- -naact
      } else {
        # subset is an indexing vector for the rows in the data frame
        # naact specifies rows to remove after subsetting.
        # we simulate this process. Let's make an integer vector for the
        # rows of the data. The total length can be found
        mf[['subset']] <- seq_len(psys$fullN)[subset][-naact]
      }
    }
    psys <- project(mf=mf,fl=fl,data=data,contrasts=contrasts,clustervar=clustervar,pf=pf)
    ivres <- z1$residuals
    colnames(ivres) <- as.character(sapply(all.vars(FIT),as.name))
    z <- doprojols(psys, ivresid=ivres, exactDOF=exactDOF, nostats=nostats)
    z$stage1 <- z1
    z$st2call <- mf
# backwards compatibility
    z$step1 <- lapply(z1$lhs, function(lh) {
#      warning('Use stage1 instead of step1')
      foo <- z1
      foo$lhs <- lh
      foo$beta <- z1$beta[,lh,drop=FALSE]
      foo$coefficients <- foo$beta
      foo$response <- z1$response[,lh,drop=FALSE]
      foo$fitted.values <- z1$fitted.values[,lh,drop=FALSE]
      foo$residuals <- z1$residuals[,lh,drop=FALSE]
      foo$r.residuals <- z1$r.residuals[,lh,drop=FALSE]
      if(!is.null(z1$iv1fstat)) {
        foo$iv1fstat <- z1$iv1fstat[lh]
        foo$rob.iv1fstat <- z1$rob.iv1fstat[lh]
      }
      if(!is.null(z1$STATS)) 
          foo[names(z1$STATS[[lh]])] <- z1$STATS[[lh]]
      foo[['STATS']] <- NULL
      foo})
    z$endovars <- z1$endogvars
    z$parent.frame <- pf
    rm(psys)
    z$call <- match.call()
    return(z)
  }

  # parse the IV-formulas, they may contain factor-parts
  iv <- lapply(iv,parseformula,data)

  # Now, insert the rhs of the IV in the formula
  # find the ordinary and the factor part

  # It may contain a factor part

  # this is the template for the step1 formula, just insert a left hand side
  step1form <- formula(as.Formula(substitute(~B +ivO | G + ivG,
                                    list(B=pform[['formula']][[3]],G=pform[['gpart']][[2]]))))

  # this is the template for the second stage, it is updated with the IV variables
  step2form <- formula(as.Formula(substitute(y~B | G,
                                    list(y=pform[['formula']][[2]],B=pform[['formula']][[3]],
                                         G=pform[['gpart']][[2]]))))

  nullbase <- formula(as.Formula(substitute(~B | G,
                                    list(B=pform[['formula']][[3]],G=pform[['gpart']][[2]]))))

  # we must do the 1. step for each instrumented variable
  # collect the instrumented variables and remove them from origform
  # then do the sequence of 1. steps
  # we put the instrumented variable on the lhs, and add in the formula for it on the rhs
  # A problem with this approach is that na.action is run independently between the first stages and
  # the second stage. This may result in different number of observations. This isn't any good.
  # I haven't figured out a good solution for this, except for creating the full model.matrix for
  # all of the steps.  This will blow our memory on large datasets.

  ivarg <- list()
  vars <- NULL
  step1 <- list()
  endolist <- c()
  for(ivv in iv) {
    # Now, make the full instrumental formula, i.e. with the rhs expanded with the
    # instruments, and the lhs equal to the instrumented variable

     ivlhs <- ivv[['formula']][[2]]
     rhsivo <- formula(as.Formula(ivv[['formula']]),lhs=0)[[2]]
    if(nopart(ivv[['gpart']]))
        rhsivg <- 0
    else
        rhsivg <- formula(ivv[['gpart']],lhs=0,rhs=2)[[2]]

    fformula <- substitute(Z ~ R, list(Z=ivlhs, R=step1form[[2]]))

    fformula <- do.call(substitute, list(fformula,list(ivO=rhsivo,ivG=rhsivg)))

     ivform <- parseformula(fformula,data)
     fl <- ivform[['fl']]
     mf[['formula']] <- ivform[['formula']]

     # note that if there are no G() terms among the instrument variables,
     # all the other covariates should only be centered once, not in every first stage and the
     # second stage separately. We should rewrite and optimize for this.
     psys <- project(mf,fl,data,contrasts,clustervar,pf)
     z <- doprojols(psys, exactDOF=exactDOF)
     mf[['formula']] <- fformula
     z$call <- mf
     rm(psys)


     # now, we need an ftest between the first step with and without the instruments(null-model)
     # We need the residuals with and without the
     # instruments. We have them with the instruments, but must do another estimation
     # for the null-model
     mfnull <- mf
     nullform <- substitute(Z ~ R,list(Z=ivlhs,R=nullbase[[2]]))     
     pformnull <- parseformula(nullform, data)
     mfnull[['formula']] <- pformnull[['formula']]
     znull <- doprojols(project(mfnull, pformnull[['fl']], data, contrasts, clustervar, pf),
                        exactDOF=exactDOF)
     z$iv1fstat <- ftest(z,znull)
     z$rob.iv1fstat <- ftest(z,znull,vcov=z$robustvcv)
     if(!is.null(clustervar))
       z$clu.iv1fstat <- ftest(z,znull,vcov=z$clustervcv)
     step1 <- c(step1,list(z))
     # then we lift the fitted variable and create a new name
     ivz <- z
     evar <- deparse(ivlhs)
     new.var <- paste(evar,'(fit)',sep='')
     # store them in an environment
     assign(new.var,ivz$fitted.values,envir=ivenv)
#     data[[new.var]] <- ivz$fitted
     # save these, with the backtick for later use
     vars <- c(vars,paste('`',new.var,'`',sep=''))
     # keep the residuals, they are needed to reconstruct the residuals for the
     # original variables in the 2. stage
     ivarg[[paste('`',new.var,'`',sep='')]] <- ivz$residuals
     # and add it to the equation
     step2form <- update(as.Formula(step2form),as.formula(substitute(. ~ . + FIT | .,
                                         list(FIT=as.name(new.var)))))
  }
  names(step1) <- names(iv)
  # now we have a formula in step2form with all the iv-variables
  # it's just to project it

  pform <- parseformula(step2form,data)
  fl <- pform[['fl']]
  formula <- pform[['formula']]
  environment(formula) <- ivenv
  mf$formula <- formula
  if(is.environment(mf$data)) mf$data <- ivenv
  psys <- project(mf=mf,fl=fl,data=data,contrasts=contrasts,clustervar=clustervar,pf=pf)

  z <- doprojols(psys,ivresid=ivarg,exactDOF=exactDOF)
  z$step1 <- step1
  z$endovars <- vars
  rm(psys)
  z$call <- match.call()
  return(z)
}