pver <- packageVersion("DramaAnalysis")
knitr::opts_chunk$set(echo = FALSE)

if (is.null(.Options$qd.datadir)) { 
  DramaAnalysis::setup()
}

getYPositions <- function(x,b,limit=100) {
  rl <- apply(x, 2, function(xx) {
    xx <- as.double(xx)
    cs <- c(0,head(cumsum(xx),-1))
    r <- cs + xx/2
    r[xx<=limit] <- NA
    r  
  })
  rl
}

toFigureName <- function(x) {
  tools::toTitleCase(tolower(x))
}
library(data.table)
library(DramaAnalysis)
library(magrittr)
library(highcharter)
library(stringdist)

#print(class(params$id))
#print(params$id[[2]])
drama <- loadDrama(params$id)
text <- segment(drama$text, drama$segments)
meta <- drama$meta
characters <- drama$characters
if (is.null(params$titles)) {
  titles <- toFigureName(meta$documentTitle)
} else {
  titles <- params$titles
}
names(titles) <- params$id
if (is.null(params$charAlignment)) {

}

title: "QuaDramA Edition Comparison"


This report shows a number of quantitative properties extracted from several plays. The report assumes the plays to be different variants of the same text (what ever that means).

Meta data

knitr::kable(meta,
             col.names = c("Corpus", "Text-Id", "Title", "Language", "Author name", "Author Id", "Translator name", "Translator Id", "Year written", "Year printed", "Year premiered", "Year translated"),
             format = "markdown")

Structure {.tabset}

Overview

tbl <- text[,.(Acts=length(unique(begin.Act)),
               Scenes=length(unique(begin.Scene))),
            .(corpus,drama)]
rownames(tbl) <- paste(tbl$corpus,tbl$drama,sep=":")
tblm <- as.matrix(tbl[,3:ncol(tbl)])

barplot(tblm,
        beside=TRUE,
        legend.text = titles[rownames(tbl)],
        col=params$col,args.legend = c(x="bottom"))

#knitr::kable(tbl)

Differences

Characters {.tabset}

Number of Characters {.tabset}

tbl <- text[,.(Characters=length(unique(Speaker.figure_surface))),
            .(corpus,drama)]
rownames(tbl) <- titles[paste(tbl$corpus,tbl$drama,sep=":")]
tblm <- as.matrix(tbl[,3:ncol(tbl)])

Overview

tbl <- text[,.(Characters=length(unique(Speaker.figure_surface))),
            .(corpus,drama)]
rownames(tbl) <- titles[paste(tbl$corpus,tbl$drama,sep=":")]
tblm <- as.matrix(tbl[,3:ncol(tbl)])
rownames(tblm) <- rownames(tbl)
barplot(tblm,
        beside=TRUE,
        legend.text = rownames(tbl),
        col=params$col, args.legend = c(x="bottom"))

#knitr::kable(tbl)

Table

Absolute numbers

knitr::kable(tblm, format="markdown", row.names = TRUE)

Differences

knitr::kable(as.matrix(dist(tblm)))

Spoken words {.tabset}

Overview

This plot shows the spoken words of the most talkative r params$topn characters per play.

fstat <- characterStatistics(drama) %>%
  filterCharacters(drama, n=params$topn) %>%
  characterNames(drama)

xt <- xtabs(tokens ~ paste(corpus,drama,sep=":")+character,data=fstat)

attr(xt, "class") <- NULL 
attr(xt, "call") <- NULL 
#barplot(xt, beside=TRUE,las=3)

xt <- as.data.frame(t(xt))
colnames(xt) <- titles[colnames(xt)]
hc <- highchart()

hc <- hc_yAxis(hc, max=max(xt))
hc <- hc_xAxis(hc, categories=rownames(xt))

for (i in 1:ncol(xt)) {
  hc <- hc_add_series(hc, xt[,i], type="column", name=colnames(xt)[i])
}

hc

Table

The table shows token numbers for all characters.

fstat <- characterStatistics(drama) %>%
  characterNames(drama)

xt <- xtabs(tokens ~ paste(corpus,drama,sep=":")+character,data=fstat)
attr(xt, "class") <- NULL 
attr(xt, "call") <- NULL 
xt <- as.data.frame(t(xt))
colnames(xt) <- titles[colnames(xt)]


knitr::kable(xt, format="markdown")

Utterances {.tabset}

Lengths

This plot shows the distribution of utterange lengths for the top r params$topn characters in each play. If two characters are aligned, their box is shown side by side. More on box plots in Wikipedia. Dots outside of the box indicate potential outliers, color shows the play they belong to.

ustat <- utteranceStatistics(drama) %>%
  filterCharacters(drama, n = params$topn) %>%
  characterNames(drama)

if (is.null(params$col)) {
  col = "black"
} else {
  col = params$col[1:length(params$id)]
}

hcboxplot(x=ustat$utteranceLength, 
          var=as.character(ustat$character), 
          var2=as.character(ustat$drama),
          color=col
          )

Table

This table records information about every utterance in each play. It is therefore very long.

knitr::kable(ustat, format="markdown")
utteranceDistances <- function(t) {
  tagg <- t[,.(text=paste0(Token.surface, collapse=" ")),.(corpus,drama,begin, Speaker.figure_surface)]
  pairs <- t(combn(nrow(tagg),2))
  for(i in 1:nrow(pairs)) {
    row_a <- pairs[i,1]
    row_b <- pairs[i,2]

  }

}

text[,
     mcp:=max(utteranceEnd),
     .(corpus,drama)][,
                      .(utteranceLength=.N,rbegin=utteranceBegin/mcp),
                      .(corpus,drama,utteranceBegin,Speaker.figure_surface)][]

Presence and Utterances {.tabset}

ustat <- utteranceStatistics(drama, normalizeByDramaLength = FALSE) %>%
  filterCharacters(drama, n=params$topn) %>%
  characterNames(drama)

All Utterances

par(mar=c(1,9,1,0),xpd=FALSE)

for (id in params$id) {
  splid <- strsplit(id,":",fixed=TRUE)[[1]]
  plot(ustat,
       xlab="", 
       main=toFigureName(meta[meta$corpus==splid[1] & meta$drama==splid[2],3]))
}

Configuration and Copresence {.tabset}

configs <- lapply(split(drama), function(x) {
  c <- configuration(x, onlyPresence = TRUE, 
                     segment = "Scene") %>%
    characterNames(x)
  copr <- as.matrix(c) %*% t(as.matrix(c))
  rownames(copr) <- c$character
  colnames(copr) <- c$character

  copr <- reshape::melt.array(copr)
  copr$drama <- x$meta$drama
  copr
})

allcopresence <- Reduce(rbind, configs)
allcopresence$X1 <- as.character(allcopresence$X1)
allcopresence$X2 <- as.character(allcopresence$X2)


quadrama/DramaAnalysis documentation built on Sept. 28, 2020, 8:42 p.m.