R/DocTopics.R

#' Find the topic estimated that a document is most likely to be from.
#' 
#'  
#' @param obj an \code{ExpAgendaOut} class object created by \code{\link{ExpAgendaVonmon}}.
#' @param TopicLabels character vector. Labels for each topic. Should be in the same order as the topics generated by \code{\link{TopicSummary}}. If \code{TopicLabels = "auto"} then the top topic stems are used. 
#' @param StemLabels numeric. The number of stems to use for \code{auto} generated topic labels.
#'  
#' @return A data frame with three columns: \code{ID}, \code{Names}, \code{Topic}. 
#'  
#' @importFrom reshape2 melt  
#'    
#' @export

DocTopics <- function(obj, TopicLabels = "auto", StemLabel = 2){
  if (class(obj) != "ExpAgendaOut"){
    stop("obj must be a ExpAgendaOut class object created by ExpAgendaVonmon.")
  }
  Rs <- obj$rs
  author <- obj$authorID
  RA <- data.frame(cbind(author, Rs))
  RAMolten <- melt(RA, id.vars = 1:2, measure.vars = 3:length(RA))
  RAMolten$ID <- as.numeric(RAMolten$ID) 
  RAMolten$value <- as.numeric(RAMolten$value) 

  OutDF <- data.frame()
  for (i in 1:max(RAMolten$ID)){
  	temp <- subset(RAMolten, ID == i)
  	MaxTemp <- max(temp$value)
  	temp <- subset(temp, value == MaxTemp)
  	OutDF <- rbind(OutDF, temp)
  }

  OutDF <- OutDF[, 1:3]
  row.names(OutDF) <- NULL
  names(OutDF) <- c("ID", "Names", "Topic")
  OutDF$Topic <- as.character(OutDF$Topic)
  OutDF$Topic <- as.numeric(substring(OutDF$Topic, 2))
  OutDF$Topic <- OutDF$Topic - 2

  if (TopicLabels == "auto"){
  	Stems <- TopicSummary(obj, NStems = StemLabel)
  	Labels <- list()
  	N <- max(Stems$TopicNumber)
  	for (u in 1:N){
  		temp <- subset(Stems, TopicNumber == u)
  		Labels[[u]] <- paste(temp$Stems, collapse = " ")
  	}
  	OutDF$Topic <- factor(OutDF$Topic, labels = Labels)
  }

  if (!is.null(TopicLabels) & TopicLabels != "auto"){
  	OutDF$Topic <- factor(OutDF$Topic, labels = TopicLabels)
  }
  OutDF
}
christophergandrud/ExpAgenda documentation built on May 13, 2019, 7:01 p.m.