Nothing
# Author: Simen Gaure
# Copyright: 2011, Simen Gaure
# Licence: Artistic 2.0
# this call changes or adds the dimnames of obj without duplicating it
# it should only be used on objects with a single reference
# Our use of it is safe, and we don't export it.
setdimnames <- function(obj, nm) {
.Call(C_setdimnames, obj, nm)
}
scalecols <- function(obj, vec) {
.Call(C_scalecols, obj, vec)
}
crowsum <- function(x, f, mean = FALSE) .Call(C_rowsum, x, f, mean)
orthonormalize <- function(V) {
structure(V %*% solve(chol(crossprod(V))), ortho = TRUE)
}
# This function sets an attribute 'inplace' on the argument
# It is checked in the C-function demeanlist, and then removed
# If set, the centering will be inplace. It can mess things
# up seriously if used in the wrong manner, so it's not public
# except as in an argument to demeanlist (through eval-trickery).
unnamed <- function(x) {
.Call(C_inplace, x)
}
wmean <- function(x, w) sum(w * x) / sum(w)
wcov <- function(x, y, w) {
sw <- w / sum(w)
mx <- sum(sw * x)
my <- sum(sw * y)
cx <- x - mx
cy <- y - my
sum(sw * cx * cy) / (1 - sum(sw^2))
}
wvar <- function(x, w) wcov(x, x, w)
pinvx <- function(X) {
# return(pinv(nazero(X)))
ch <- cholx(nazero(X))
badvars <- attr(ch, "badvars")
inv1 <- chol2inv(ch)
if (is.null(badvars)) {
return(inv1)
}
inv <- matrix(0, nrow(X), ncol(X))
inv[-badvars, -badvars] <- inv1
structure(inv, badvars = attr(ch, "badvars"))
}
# Do a Cholesky to detect multi-collinearities
cholx <- function(mat, eps = 1e-6) {
if (is.null(dim(mat))) dim(mat) <- c(1, 1)
N <- dim(mat)[1]
if (N == 1) {
return(structure(sqrt(mat), badvars = if (mat <= 0) 1 else NULL))
}
# first, try a pivoted one
tol <- N * getOption("lfe.eps")
chp <- chol(mat, pivot = TRUE, tol = tol)
rank <- attr(chp, "rank")
if (rank == N) {
return(chol(mat))
}
pivot <- attr(chp, "pivot")
oo <- order(pivot)
badvars <- pivot[((rank + 1):N)]
ok <- (1:N)[-badvars]
ch <- chol(mat[ok, ok])
return(structure(ch, badvars = badvars))
}
cholsolve <- function(A, b) {
ch <- chol(A)
backsolve(ch, backsolve(ch, b, transpose = TRUE))
}
pinv <- function(X, tol = sqrt(.Machine$double.eps)) {
if (length(dim(X)) > 2L || !(is.numeric(X) || is.complex(X))) {
stop("'X' must be a numeric or complex matrix")
}
if (!is.matrix(X)) {
X <- as.matrix(X)
}
Xsvd <- svd(X)
badvars <- integer(0)
Positive <- Xsvd$d > max(tol * Xsvd$d[1L], 0)
Positive <- ifelse(is.na(Positive), FALSE, Positive)
badvars <- which(!Positive)
if (all(Positive)) {
res <- Xsvd$v %*% (1 / Xsvd$d * t(Xsvd$u))
} else if (!any(Positive)) {
res <- array(0, dim(X)[2L:1L])
} else {
res <- Xsvd$v[, Positive, drop = FALSE] %*% ((1 / Xsvd$d[Positive]) *
t(Xsvd$u[, Positive, drop = FALSE]))
}
structure(res, badvars = badvars)
}
nazero <- function(x) ifelse(is.na(x), 0, x)
ftest <- function(z, zr = NULL, vcov = z$vcv) {
rdf <- z$df # z$N - z$p - 1
if (is.null(zr)) {
# we should do F-test vs. model with intercept.
# but the intercept in z is implicit.
F <- (t(coef(z)) %*% pinvx(vcov) %*% coef(z)) / z$p
return(c(F = F, p = pf(F, z$p, rdf, lower.tail = FALSE), df1 = z$p, df2 = rdf))
}
# df1 <- length(coef(z)) - length(coef(zr))
df1 <- z$p - zr$p
c1 <- coef(z)
c2 <- rep(0, length(c1))
names(c2) <- names(c1)
c2[names(coef(zr))] <- coef(zr)
F <- (t(c1 - c2) %*% pinvx(vcov) %*% (c1 - c2)) / df1
return(c(F = F, p = pf(F, df1, rdf, lower.tail = FALSE), df1 = df1, df2 = rdf))
}
mkgraph <- function(f1, f2) {
if (!requireNamespace("igraph", quietly = TRUE)) stop("Package igraph not found")
# graph.edgelist(cbind(paste('f1',f1),paste('f2',f2)), directed=FALSE)
# graph.edgelist(cbind(500000000+as.integer(f1),f2), directed=FALSE)
igraph::graph.adjacency(tcrossprod(makeDmatrix(list(f1, f2))) > 0,
"undirected",
diag = FALSE
)
}
diamgraph <- function(flist, approx = TRUE) {
if (!requireNamespace("igraph", quietly = TRUE)) stop("Package igraph not found")
gr <- mkgraph(flist[[1]], flist[[2]])
# find largest cluster
cl <- igraph::clusters(gr)$membership
lcl <- which(cl == which.max(table(cl)))
if (approx) {
max(igraph::shortest.paths(gr, v = sample(lcl, 10), to = sample(lcl, 10)))
} else {
igraph::diameter(igraph::induced.subgraph(gr, lcl))
}
}
#' Find diameters of mobility graphs
#'
#' 'diammatrix' computes the diameters of certain graphs related to convergence
#' speed of `felm`.
#'
#' Each pair of factors (f1,f2) from `flist` defines a bipartite graph in
#' which the vertices are the levels of the factors, and two vertices are
#' adjacent if they are observed simultaneously. The connected components of
#' this graph are important for identification of the coefficients for the
#' factor levels, i.e. for `getfe`. But experience and some trials have
#' led the author to speculate that the diameter of the graph (or its largest
#' component) is also important for the convergence rate. Specifically, the
#' author suspects that under some assumptions, time to convergence goes like
#' the square of the diameter. At least in the case of two factors. This
#' function computes the diameter for each pair of factors. If the graph is
#' disconnected, the largest connected component is used. If `accel=TRUE`
#' (the default), the diameter is approximated from below by drawing two sets
#' of 10 random vertices and finding the maximum length of the shortest paths
#' between them.
#'
#' @param flist a list of factors defining the dummies.
#' @param approx logical. Approximate diameters are computed.
#' @return A matrix of dimension K x K where K is `length(flist)`.
#' @note This function is not important to the operation of the package, it is
#' included for easy experimentation with the convergence rate. It requires
#' that the suggested package \pkg{igraph} is attached.
#' @keywords internal
diammatrix <- function(flist, approx = TRUE) {
flen <- length(flist)
if (flen < 2) {
return(0)
}
val <- matrix(0, flen, flen)
colnames(val) <- names(flist)
rownames(val) <- names(flist)
for (if1 in 1:(flen - 1)) {
for (if2 in (if1 + 1):flen) {
val[if1, if2] <- diamgraph(flist[c(if1, if2)], approx)
}
}
# fill in lower triangle:
val[row(val) > col(val)] <- t(val)[row(val) > col(val)]
val
}
# if(!exists('fixef')) fixef <- function(object,...) UseMethod('fixef')
# compute rank deficiency of D-matrix
rankDefic <- function(fl, method = "cholesky", mctol = 1e-3) {
eps <- sqrt(.Machine$double.eps)
if (length(fl) == 1) {
return(1)
}
if (length(fl) == 2) {
return(nlevels(compfactor(fl)))
}
if (method == "cholesky") {
D <- makeDmatrix(fl)
Ch <- as(Cholesky(crossprod(D), super = TRUE, perm = TRUE, Imult = eps), "sparseMatrix")
sum(diag(Ch) < eps^(1 / 4))
} else if (method == "mc") {
totlev <- sum(sapply(fl, nlevels))
N <- length(fl[[1]])
len <- length(fl) + 1
# now, we will use the rank deficiency d to compute
# degrees of freedom corrections. I.e. (tr+totlev)+len should
# be within a certain relative tolerance, which translates
# to an absolute tolerance of tr
tolfun <- function(tr) -mctol * abs((tr + totlev) + len)
init <- 0
N - (mctrace(fl, tol = tolfun, init = init) + totlev) + len
} else {
D <- makeDmatrix(fl)
as.integer(ncol(D) - rankMatrix(crossprod(D), method = "qr.R"))
}
}
# in makeDmatrix/makePmatrix, the weights argument should be the
# square root of the weights. This is what is stored in internal structures.
#
#' Make sparse matrix of dummies from factor list
#'
#' Given a list of factors, return the matrix of dummies as a sparse matrix.
#'
#' The function returns the model matrix for a list of factors. This matrix is
#' not used internally by the package, but it's used in some of the
#' documentation for illustrative purposes.
#'
#' @param fl list of factors.
#' @param weights numeric vector. Multiplied into the rows.
#' @return Returns a sparse matrix.
#' @examples
#'
#' fl <- lapply(1:3, function(i) factor(sample(3, 10, replace = TRUE)))
#' fl
#' makeDmatrix(fl, weights = seq(0.1, 1, 0.1))
#'
#' @export makeDmatrix
makeDmatrix <- function(fl, weights = NULL) {
# make the D matrix
# for pure factors f, it's just the t(as(f,'sparseMatrix'))
# if there's a covariate vector x, it's t(as(f,'sparseMatrix'))*x
# for a covariate matrix x, it's the cbinds of the columns with the factor-matrix
ans <- do.call(mycbind, lapply(fl, function(f) {
x <- attr(f, "x", exact = TRUE)
fm <- t(as(f, "sparseMatrix"))
if (is.null(x)) {
return(fm)
}
if (!is.matrix(x)) {
return(fm * x)
}
do.call(mycbind, apply(x, 2, "*", fm))
}))
nm <- names(fl)
if (is.null(nm)) nm <- paste("f", seq_along(fl), sep = "")
levnm <- unlist(sapply(seq_along(fl), function(i) xlevels(nm[i], fl[[i]])))
if (!is.null(weights)) {
ans <- Diagonal(length(weights), weights) %*% ans
}
colnames(ans) <- levnm
ans
}
makePmatrix <- function(fl, weights = NULL) {
D <- makeDmatrix(fl, weights)
DtD <- crossprod(D)
DtDi <- pinvx(as.matrix(DtD))
badvars <- attr(DtDi, "badvars")
if (is.null(badvars)) {
return(D %*% DtDi %*% t(D))
}
D <- D[, -badvars, drop = FALSE]
return(D %*% pinvx(as.matrix(crossprod(D))) %*% t(D))
}
# total number of variables projected out
totalpvar <- function(fl) {
if (length(fl) == 0) {
return(0)
}
sum(sapply(fl, function(f) {
x <- attr(f, "x", exact = TRUE)
if (is.null(x) || !is.matrix(x)) {
return(nlevels(f))
}
return(ncol(x) * nlevels(f))
}))
}
nrefs <- function(fl, cf, exactDOF = FALSE) {
if (length(fl) == 0) {
return(0)
}
if (missing(cf)) cf <- compfactor(fl)
numpure <- sum(sapply(fl, function(f) is.null(attr(f, "x", exact = TRUE))))
if (numpure == 1) {
return(0)
}
if (numpure == 2) {
return(nlevels(cf))
}
if (identical(exactDOF, "rM")) {
return(rankDefic(fl, method = "qr"))
} else if (identical(exactDOF, "mc")) {
return(rankDefic(fl, method = "mc"))
} else if (exactDOF) {
return(rankDefic(fl, method = "cholesky"))
}
return(nlevels(cf) + numpure - 2)
}
wildcard <- function(formula, s = ls(environment(formula)), re = FALSE,
nomatch.failure = TRUE) {
env <- environment(formula)
F <- as.Formula(formula)
lenlhs <- length(F)[1]
lenrhs <- length(F)[2]
if (lenlhs == 1 && lenrhs == 1) {
return(swildcard(formula, s, re, nomatch.failure = nomatch.failure))
}
# do the parts separately
lhs <- lapply(seq_len(lenlhs), function(lh) {
swildcard(formula(F, lhs = lh, rhs = 0), s, re, nomatch.failure = nomatch.failure)[[2]]
})
rhs <- lapply(seq_len(lenrhs), function(rh) {
swildcard(formula(F, lhs = 0, rhs = rh), s, re, nomatch.failure = nomatch.failure)[[2]]
})
if (length(lhs) > 0) {
LHS <- lhs[[1]]
for (s in lhs[-1]) {
LHS <- bquote(.(LHS) | .(s))
}
} else {
LHS <- NULL
}
RHS <- rhs[[1]]
for (s in rhs[-1]) {
RHS <- bquote(.(RHS) | .(s))
}
if (is.null(LHS)) {
return(as.Formula(substitute(~R, list(R = RHS)), env = env))
} else {
as.Formula(substitute(L ~ R, list(L = LHS, R = RHS)), env = env)
}
}
swildcard <- function(formula, s = ls(environment(formula)), re = FALSE,
nomatch.failure = TRUE) {
av <- all.vars(formula)
if (!re) {
rchars <- c("*", "?")
wild <- unique(unlist(sapply(rchars, grep, av, fixed = TRUE, value = TRUE)))
if (length(wild) > 0) {
rewild <- utils::glob2rx(wild, trim.tail = FALSE)
} else {
rewild <- NULL
}
# quote some regexp-specials
rewild <- lapply(seq_along(rewild), function(i) structure(rewild[i], orig = wild[i]))
} else {
rchars <- c(".", "*", "|", "\\", "?", "[", "]", "(", ")", "{", "}")
wild <- unique(unlist(sapply(rchars, grep, av, fixed = TRUE, value = TRUE)))
rewild <- lapply(wild, function(w) structure(w, orig = w))
}
wsub <- lapply(rewild, function(w) {
mtch <- grep(paste("^", w, "$", sep = ""), s, value = TRUE, perl = TRUE)
if (length(mtch) == 0) {
if (nomatch.failure) {
stop("Couldn't match wildcard variable `",
attr(w, "orig"), "`",
call. = FALSE
)
}
mtch <- attr(w, "orig")
}
as.list(parse(text = paste("`", mtch, "`", collapse = "+", sep = "")))[[1]]
})
names(wsub) <- wild
formula(
as.Formula(do.call(substitute, list(formula, wsub)),
env = environment(formula)
),
update = T, collapse = TRUE, drop = FALSE
)
}
# prettyprint a list of integers
ilpretty <- function(il) {
paste(tapply(
il, cumsum(c(1, diff(il)) != 1),
function(x) {
if (length(x) == 1) {
as.character(x)
} else {
paste(x[[1]], x[[length(x)]], sep = ":")
}
}
), collapse = " ")
}
makefitnames <- function(s) gsub("`(Intercept)(fit)`", "(Intercept)", paste("`", s, "(fit)`", sep = ""), fixed = TRUE)
delete.icpt <- function(x) {
asgn <- attr(x, "assign")
icpt <- asgn == 0
if (!length(icpt)) {
return(x)
}
asgn <- asgn[!icpt]
ctr <- attr(x, "contrasts")
x <- x[, !icpt, drop = FALSE]
attr(x, "assign") <- asgn
attr(x, "contrasts") <- ctr
x
}
# do an ls on env, and the parent and all the way up to top
rls <- function(env, top = .GlobalEnv) {
ret <- character()
e <- env
repeat {
if (identical(e, emptyenv())) break
ret <- c(ret, ls(e))
if (identical(e, top)) break
e <- parent.env(e)
}
unique(ret)
}
limlk <- function(mm) {
# find the kappa for computing k-class liml
# it's the smallest eigenvalue of the matrix
# [y endog]' M_i [y endog] ([y endog]' M [y endog])^{-1}
# where M is projection out of instruments
# and M_i is projection out of both instruments and exogenous
# variables
ye <- cbind(mm$y, mm$ivy)
H1 <- crossprod(ye, newols(list(y = ye, x = mm$x), nostats = TRUE)$residuals)
H <- crossprod(ye, newols(list(y = ye, x = cbind(mm$ivx, mm$x)), nostats = TRUE)$residuals)
mat <- solve(H, H1)
min(eigen(mat, only.values = TRUE)$values)
}
resample <- function(cluster, return.factor = FALSE, na.action = NULL, fill = FALSE) {
if (is.list(cluster)) {
if (is.factor(cluster[[1]])) {
if (length(cluster) > 1) warning("List of factors not supported in resample, using first only (", names(cluster)[1], ")")
cluster <- cluster[[1]]
}
}
# resample entire levels of a factor
if (length(na.action) > 0) cluster <- factor(cluster[-na.action])
iclu <- as.integer(cluster)
cl <- sort(sample(nlevels(cluster), replace = TRUE))
if (fill) {
tb <- table(cluster)
while (sum(tb[cl]) < length(cluster)) {
cl <- c(cl, sample(nlevels(cluster), 1))
}
cl <- sort(cl)
}
# find a faster way to do this:
# s <- sort(unlist(sapply(cl, function(ll) which(clu==ll))))
s <- NULL
while (length(cl) > 0) {
s <- c(s, which(iclu %in% cl))
cl <- cl[c(1L, diff(cl)) == 0]
}
if (return.factor) {
return(cluster[s])
}
sort(s)
}
getcomp <- function(est, alpha = NULL) {
if (nlevels(est$cfactor) == 1) {
return(list(est = est, alpha = alpha))
}
ok <- which(est$cfactor == 1)
res <- est
res$cfactor <- factor(res$cfactor[ok])
if (!is.null(res$X)) {
res$X <- res$X[ok, ]
}
res$residuals <- res$residuals[ok, ]
res$response <- res$response[ok, ]
res$fitted.values <- res$fitted.values[ok, ]
res$r.residuals <- res$r.residuals[ok, ]
res$fe <- lapply(est$fe, function(f) factor(f[ok]))
if (alpha != NULL) {
# of the two first factors, remove the components beyond the first
# if there are more than two factors, the remaining are assumed to be ok
# so find those with 'fe' among the two first, and comp > 1
bad <- which((alpha[, "fe"] %in% names(est$fe)[1:2]) & (alpha[, "comp"] > 1))
if (length(bad) > 0) alpha <- alpha[-bad, ]
}
return(est = res, alpha = alpha)
}
#' Chain subset conditions
#'
#' @param ... Logical conditions to be chained.
#' @param out.vars character. Variables not in data.frame, only needed if you use variables which
#' are not in the frame. If `out.vars` is not specified, it is assumed to match all variables
#' starting with a dot ('.').
#' @return Expression that can be `eval`'ed to yield a logical subset mask.
#' @details
#' A set of logical conditions are chained, not and'ed. That is, each argument to
#' `chainsubset` is used as a filter to create a smaller dataset. Each subsequent
#' argument filters further.
#' For independent conditions this will be the same as and'ing them. I.e.
#' `chainsubset(x < 0 , y < 0)` will yield the same subset as `(x < 0) & (y < 0)`.
#' However, for aggregate filters like `chainsubset(x < mean(y), x > mean(y))`
#' we first find all the observations with `x < mean(y)`, then among these we
#' find the ones with `x > mean(y)`. The last `mean(y)` is now conditional on
#' `x < mean(y)`.
#'
#' @examples
#' set.seed(48)
#' N <- 10000
#' dat <- data.frame(y = rnorm(N), x = rnorm(N))
#' # It's not the same as and'ing the conditions:
#' felm(y ~ x, data = dat, subset = chainsubset(x < mean(y), y < 2 * mean(x)))
#' felm(y ~ x, data = dat, subset = chainsubset(y < 2 * mean(x), x < mean(y)))
#' felm(y ~ x, data = dat, subset = (x < mean(y)) & (y < 2 * mean(x)))
#' lm(y ~ x, data = dat, subset = chainsubset(x < mean(y), x > mean(y)))
#' @note
#' Some trickery is done to make this work directly in the subset argument of functions like
#' `felm()` and `lm()`. It might possibly fail with an error message in some situations.
#' If this happens, it should be done in two steps: `ss <- eval(chainsubset(...),data);
#' lm(...,data=data, subset=ss)`. In particular, the arguments are taken literally,
#' constructions like \code{function(...) {chainsubset(...)}} or `a <- quote(x < y); chainsubset(a)` do
#' not work, but `do.call(chainsubset,list(a))` does.
#' @export
chainsubset <- function(..., out.vars) {
if (sys.parent() != 0) {
cl <- sys.call(sys.parent())
mc <- match.call(expand.dots = TRUE)
if (cl[[1]] == quote(eval)) {
# we are called from eval, probably inside lm. let's replace it with a evalq and a direct call, then
# evaluation of the result
ecaller <- parent.frame(3)
cl[[1]] <- quote(evalq)
cl[[2]] <- match.call()
cl[[2]] <- eval(cl, ecaller)
return(eval(cl, ecaller))
}
}
args <- as.list(match.call(expand.dots = TRUE))[-1]
args[["out.vars"]] <- NULL
if (length(args) < 1) {
return(NULL)
}
if (length(args) == 1) {
return(structure(args[[1]], filter = args))
}
group <- list(as.name("{"))
miss <- missing(out.vars)
R <- as.name(sprintf("R%x", RR <- sample(.Machine$integer.max, 1)))
FF <- as.name(sprintf("F%x", RR))
structure(
bquote(local({
.(R) <- logical(length(.(FF) <- .(args[[1]])))
.(R)[.(Reduce(function(f1, f2) {
nm <- all.vars(f2)
nm <- lapply(if (miss) {
grep("^\\.", nm, value = TRUE, invert = TRUE)
} else {
nm[!(nm %in% out.vars)]
}, as.name)
recod <- as.call(c(group, lapply(nm, function(n) bquote(.(n) <- .(n)[.(FF), drop = TRUE]))))
bquote({
.(FF) <- .(f1)
local({
.(recod)
.(FF)[.(f2)]
})
})
}, args[-1], init = bquote(base::which(.(FF)))))] <- TRUE
.(R)
})),
filter = args
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.