# textmodel_wordshoal -----------
#' Wordshoal text model
#'
#' Estimate Lauderdale and Herzog's (2016) model for one-dimensional document
#' author (e.g. speakers) positions based on multiple groups of texts (e.g.
#' debates). Each group of texts is scaled using Slapin and Proksch's (2008)
#' "wordfish" Poisson scaling model of one-dimensional document positions, and
#' then the positions from a particular author are scaled across groups using a
#' second-level linear factor model, using conditional maximum likelihood.
#' @param x the \pkg{quanteda} \link{dfm} from which the model will be fit
#' @param groups the name of a variable in the document variables for data
#' giving the document group for each document
#' @param authors the name of a variable in the document variables for data
#' giving the author of each document
#' @param dir set global identification by specifying the indexes for a pair of
#' authors such that \eqn{\hat{\theta}_{dir[1]} < \hat{\theta}_{dir[2]}}
#' @param tol a convergence threshold for the
#' log-posterior of the model
#' @return An object of class textmodel_wordshoal. This is a list
#' containing: \item{tol}{log-posterior tolerance used in fitting}
#' \item{dir}{global identification of the dimension} \item{theta}{estimated
#' document positions} \item{beta}{debate marginal effects}
#' \item{alpha}{estimated document fixed effects} \item{psi}{estimated
#' document debate-level positions} \item{groups}{document groups}
#' \item{authors}{document authors} \item{ll}{log likelihood at convergence}
#' \item{se.theta}{standard errors for theta-hats} \item{data}{corpus to which
#' the model was fit}
#' @details Returns estimates of relative author positions across the full
#' corpus of texts.
#' @references Benjamin E. Lauderdale and Alexander Herzog. 2016.
#' "\href{https://www.cambridge.org/core/journals/political-analysis/article/measuring-political-positions-from-legislative-speech/35D8B53C4B7367185325C25BBE5F42B4}{Measuring
#' Political Positions from Legislative Speech}." \emph{Political Analysis}
#' 24 (3, July): 374-394.
#' @import quanteda
#' @author Benjamin Lauderdale and Kenneth Benoit
#' @keywords textmodel experimental
#' @importFrom quanteda.textmodels textmodel_wordfish as.statistics_textmodel as.summary.textmodel
#' @examples
#' library("quanteda")
#' iedfm <- dfm(data_corpus_irish30, remove_punct = TRUE)
#'
#' wordshoalfit <-
#' textmodel_wordshoal(iedfm, dir = c(7,1),
#' groups = docvars(data_corpus_irish30, "debateID"),
#' authors = docvars(data_corpus_irish30, "member.name"))
#' summary(wordshoalfit)
#' author_positions <- summary(wordshoalfit)$estimated.author.positions
#' author_positions$row_names <- rownames(author_positions)
#' fitdf <- merge(author_positions,
#' docvars(data_corpus_irish30),
#' by.x = "row_names", by.y = "member.name")
#' fitdf <- subset(fitdf, !duplicated(memberID))
#' aggregate(theta ~ party.name, data = fitdf, mean)
#' @importFrom stats dgamma dnorm
#' @export
textmodel_wordshoal <- function(x, groups, authors, dir = c(1,2), tol = 1e-3) {
UseMethod("textmodel_wordshoal")
}
#' @export
textmodel_wordshoal.dfm <- function(x, groups, authors, dir = c(1,2), tol = 1e-3) {
startTime <- proc.time()
x <- quanteda::as.dfm(x)
groups <- as.factor(groups)
authors <- as.factor(authors)
# check that no groups or author partitions are a single row
if (length(not_enough_rows <- which(lengths(split(quanteda::docnames(x), groups)) < 2)))
stop("only a single case for the following groups: \n",
paste(levels(groups)[not_enough_rows], collapse = "\n"))
# if (length(not_enough_rows <- which(lengths(split(quanteda::docnames(x), authors)) < 2)))
# stop("only a single case for the following authors: \n",
# paste(levels(authors)[not_enough_rows], collapse = "\n"))
S <- quanteda::ndoc(x)
psi <- rep(NA, S)
N <- nlevels(authors)
M <- nlevels(groups)
## FIRST-LEVEL SCALING ##
cat("\nScaling ", M, " document groups", sep="")
for (j in 1:M) {
# Extract dfm rows for current document group
groupdfm <- x[groups == levels(groups)[j], ]
# Remove features that do not appear XX_in at leastone document_XX at least twice
groupdfm <- quanteda::dfm_trim(groupdfm, min_docfreq = 1)
# Run wordfish on document group
# wfresult <- wordfishcpp(as.matrix(groupdfm), c(1, 2), c(0, 0, 1/9, 1), c(1e-2, 1e-4), 1L, 0L)
wfresult <- textmodel_wordfish(groupdfm, tol = c(tol, 1e-8))
# Save the results
psi[groups == levels(groups)[j]] <-
if(isS4(wfresult)){ wfresult@theta } else { wfresult$theta }
if (j %% 20 == 0)
cat(j, " ", sep="")
else
cat(".")
}
## SECOND-LEVEL SCALING ##
cat("\nFactor Analysis on Debate-Level Scales")
psi <- replace(psi,is.na(psi),0) # debates that failed to scale
jVec <- as.integer(groups)
iVec <- as.integer(authors)
## Factor Analysis on Debate Score Matrix ##
prioralpha <- 0.5
priorbeta <- 0.5
priortheta <- 1
priortau <- 1
# Dumb (but deterministic!) initial values
alpha <- rep(0,M)
beta <- rep(0,M)
theta <- seq(-2,2,length.out=N)
tau <- rep(1,N)
# Calculate initial log-posterior...
lastlp <- -Inf
lp <- sum(dnorm(alpha,0,prioralpha,log=TRUE))
lp <- lp + sum(dnorm(beta,0,priorbeta,log=TRUE))
lp <- lp + sum(dnorm(theta,0,priortheta,log=TRUE))
lp <- lp + sum(dgamma(tau,1,1,log=TRUE))
for (s in 1:S) {
lps <- alpha[jVec[s]] + beta[jVec[s]] * theta[iVec[s]]
lp <- lp + dnorm(psi[s], lps, (tau[iVec[s]])^(-1/2), log=TRUE)
}
# Until log-posterior stops changing...
while((lp - lastlp) > abs(tol)){
cat(".")
# Update debate parameters
priordebate <- solve(matrix(c(prioralpha^2,0,0,priorbeta^2),2,2))
for (j in 1:M){
locs <- which(jVec == j)
Ytmp <- psi[locs]
Xtmp <- cbind(1,theta[iVec[locs]])
Wtmp <- diag(tau[iVec[locs]])
coeftmp <- solve(t(Xtmp) %*% Wtmp %*% Xtmp + priordebate) %*% t(Xtmp) %*% Wtmp %*% Ytmp
alpha[j] <- coeftmp[1]
beta[j] <- coeftmp[2]
}
# Update speaker parameters
for (i in 1:N){
locs <- which(iVec == i)
Ytmp <- matrix(psi[locs] - alpha[jVec[locs]],ncol=1)
Xtmp <- matrix(beta[jVec[locs]],ncol=1)
coeftmp <- solve(t(Xtmp) %*% Xtmp + priortheta^(-2)) %*% t(Xtmp) %*% Ytmp
theta[i] <- coeftmp[1,1]
mutmp <- solve(t(Xtmp) %*% Xtmp + priortheta^(-2)) %*% t(Xtmp) %*% Xtmp %*% coeftmp
tau[i] <- (priortau + 0.5 * length(Ytmp)) /
(priortau + 0.5 * (sum(Ytmp^2) - mutmp*(priortheta^(-2)) * mutmp))
}
# Recalculate log-posterior
lastlp <- lp
lp <- sum(dnorm(alpha,0,prioralpha, log = TRUE))
lp <- lp + sum(dnorm(beta, 0, priorbeta, log = TRUE))
lp <- lp + sum(dnorm(theta, 0, priortheta, log = TRUE))
lp <- lp + sum(dgamma(tau, priortau, priortau, log = TRUE))
for (s in 1:S){
lps <- alpha[jVec[s]] + beta[jVec[s]] * theta[iVec[s]]
lp <- lp + dnorm(psi[s], lps, (tau[iVec[s]])^(-1/2), log = TRUE)
}
} # end while
## Calculate standard errors for thetas
thetaSE <- rep(NA,N)
for (i in 1:N){
locs <- which(iVec == i)
Xtmp <- matrix(beta[jVec[locs]],ncol=1)
thetaSE[i] <- sqrt(solve(t(Xtmp) %*% Xtmp + priortheta^(-2)) / tau[i])
}
## Return results
cat("\nElapsed time:", (proc.time() - startTime)[3], "seconds.\n")
result <- list(
tol = tol,
authors = authors,
groups = groups,
theta = theta,
beta = beta,
alpha = alpha,
psi = psi,
se.theta = thetaSE,
call = match.call()
)
class(result) <- c("textmodel_wordshoal", "textmodel", "list")
result
}
# base R methods -----------
#' Print method for textmodel_wordshoal
#'
#' Provides a print method for this class of object.
#' @param x for print method, the object to be printed
#' @param ... additional arguments passed to \code{\link{print}}
#' @method print textmodel_wordshoal
#' @keywords internal
#' @export
print.textmodel_wordshoal <- function(x, ...) {
#cat("Fitted wordshoal model:\n")
cat("Call:\n\t")
print(x$call)
cat("\n",
length(unique(x$authors)), " authors; ",
length(unique(x$groups)), " groups.",
"\n",
sep = "")
# cat("\nEstimated author positions:\n\n")
# results <- data.frame(theta = x$theta,
# SE = x$se.theta,
# lower = x$theta - 1.96*x$se.theta,
# upper = x$theta + 1.96*x$se.theta)
# rownames(results) <- levels(x$authors)
# print(results,...)
}
# setMethod("show", signature(object = "textmodel_wordshoal_fitted"),
# function(object) print(object))
#
# setMethod("show", signature(object = "textmodel_wordshoal_predicted"),
# function(object) print(object))
#' Summarize a fitted textmodel_wordshoal object.
#'
#' \code{summary} method for a fitted \code{\link{textmodel_wordshoal}} object.
#' @param object results of \code{\link{textmodel_wordshoal}} to be summarized
#' @param ... additional arguments passed to \code{print}
#' @export
#' @method summary textmodel_wordshoal
summary.textmodel_wordshoal <- function(object, ...) {
# cat("Call:\n\t")
# print(object$call)
#
# cat("\nEstimated document positions:\n")
stat <- data.frame(
theta = object$theta,
se = object$se.theta,
row.names = levels(object$authors),
check.rows = FALSE,
stringsAsFactors = FALSE
)
# results <- data.frame(theta = object$theta,
# SE = object$se.theta,
# lower = object$theta - 1.96*object$se.theta,
# upper = object$theta + 1.96*object$se.theta)
#
# rownames(results) <- levels(object$authors)
result <- list(
'call' = object$call,
'estimated.author.positions' = as.statistics_textmodel(stat)#,
# 'estimated.feature.scores' = as.coefficients_textmodel(head(coef(object)$features, n))
)
return(as.summary.textmodel(result))
# print(results, ...)
# invisible(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.