pver <- packageVersion("DramaAnalysis")
knitr::opts_chunk$set(echo = FALSE) if (is.null(.Options$qd.datadir)) { DramaAnalysis::setup() } getYPositions <- function(x,b,limit=100) { rl <- apply(x, 2, function(xx) { xx <- as.double(xx) cs <- c(0,head(cumsum(xx),-1)) r <- cs + xx/2 r[xx<=limit] <- NA r }) rl } toFigureName <- function(x) { tools::toTitleCase(tolower(x)) } library(data.table) library(DramaAnalysis) library(magrittr) library(highcharter) library(stringdist) #print(class(params$id)) #print(params$id[[2]]) drama <- loadDrama(params$id) text <- segment(drama$text, drama$segments) meta <- drama$meta characters <- drama$characters if (is.null(params$titles)) { titles <- toFigureName(meta$documentTitle) } else { titles <- params$titles } names(titles) <- params$id
if (is.null(params$charAlignment)) { }
title: "QuaDramA Edition Comparison"
This report shows a number of quantitative properties extracted from several plays. The report assumes the plays to be different variants of the same text (what ever that means).
knitr::kable(meta, col.names = c("Corpus", "Text-Id", "Title", "Language", "Author name", "Author Id", "Translator name", "Translator Id", "Year written", "Year printed", "Year premiered", "Year translated"), format = "markdown")
tbl <- text[,.(Acts=length(unique(begin.Act)), Scenes=length(unique(begin.Scene))), .(corpus,drama)] rownames(tbl) <- paste(tbl$corpus,tbl$drama,sep=":") tblm <- as.matrix(tbl[,3:ncol(tbl)]) barplot(tblm, beside=TRUE, legend.text = titles[rownames(tbl)], col=params$col,args.legend = c(x="bottom")) #knitr::kable(tbl)
tbl <- text[,.(Characters=length(unique(Speaker.figure_surface))), .(corpus,drama)] rownames(tbl) <- titles[paste(tbl$corpus,tbl$drama,sep=":")] tblm <- as.matrix(tbl[,3:ncol(tbl)])
tbl <- text[,.(Characters=length(unique(Speaker.figure_surface))), .(corpus,drama)] rownames(tbl) <- titles[paste(tbl$corpus,tbl$drama,sep=":")] tblm <- as.matrix(tbl[,3:ncol(tbl)]) rownames(tblm) <- rownames(tbl) barplot(tblm, beside=TRUE, legend.text = rownames(tbl), col=params$col, args.legend = c(x="bottom")) #knitr::kable(tbl)
knitr::kable(tblm, format="markdown", row.names = TRUE)
knitr::kable(as.matrix(dist(tblm)))
This plot shows the spoken words of the most talkative r params$topn
characters per play.
fstat <- characterStatistics(drama) %>% filterCharacters(drama, n=params$topn) %>% characterNames(drama) xt <- xtabs(tokens ~ paste(corpus,drama,sep=":")+character,data=fstat) attr(xt, "class") <- NULL attr(xt, "call") <- NULL #barplot(xt, beside=TRUE,las=3) xt <- as.data.frame(t(xt)) colnames(xt) <- titles[colnames(xt)] hc <- highchart() hc <- hc_yAxis(hc, max=max(xt)) hc <- hc_xAxis(hc, categories=rownames(xt)) for (i in 1:ncol(xt)) { hc <- hc_add_series(hc, xt[,i], type="column", name=colnames(xt)[i]) } hc
The table shows token numbers for all characters.
fstat <- characterStatistics(drama) %>% characterNames(drama) xt <- xtabs(tokens ~ paste(corpus,drama,sep=":")+character,data=fstat) attr(xt, "class") <- NULL attr(xt, "call") <- NULL xt <- as.data.frame(t(xt)) colnames(xt) <- titles[colnames(xt)] knitr::kable(xt, format="markdown")
This plot shows the distribution of utterange lengths for the top r params$topn
characters in each play. If two characters are aligned, their box is shown side by side. More on box plots in Wikipedia. Dots outside of the box indicate potential outliers, color shows the play they belong to.
ustat <- utteranceStatistics(drama) %>% filterCharacters(drama, n = params$topn) %>% characterNames(drama) if (is.null(params$col)) { col = "black" } else { col = params$col[1:length(params$id)] } hcboxplot(x=ustat$utteranceLength, var=as.character(ustat$character), var2=as.character(ustat$drama), color=col )
This table records information about every utterance in each play. It is therefore very long.
knitr::kable(ustat, format="markdown")
utteranceDistances <- function(t) { tagg <- t[,.(text=paste0(Token.surface, collapse=" ")),.(corpus,drama,begin, Speaker.figure_surface)] pairs <- t(combn(nrow(tagg),2)) for(i in 1:nrow(pairs)) { row_a <- pairs[i,1] row_b <- pairs[i,2] } } text[, mcp:=max(utteranceEnd), .(corpus,drama)][, .(utteranceLength=.N,rbegin=utteranceBegin/mcp), .(corpus,drama,utteranceBegin,Speaker.figure_surface)][]
ustat <- utteranceStatistics(drama, normalizeByDramaLength = FALSE) %>% filterCharacters(drama, n=params$topn) %>% characterNames(drama)
par(mar=c(1,9,1,0),xpd=FALSE) for (id in params$id) { splid <- strsplit(id,":",fixed=TRUE)[[1]] plot(ustat, xlab="", main=toFigureName(meta[meta$corpus==splid[1] & meta$drama==splid[2],3])) }
configs <- lapply(split(drama), function(x) { c <- configuration(x, onlyPresence = TRUE, segment = "Scene") %>% characterNames(x) copr <- as.matrix(c) %*% t(as.matrix(c)) rownames(copr) <- c$character colnames(copr) <- c$character copr <- reshape::melt.array(copr) copr$drama <- x$meta$drama copr }) allcopresence <- Reduce(rbind, configs) allcopresence$X1 <- as.character(allcopresence$X1) allcopresence$X2 <- as.character(allcopresence$X2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.