pver <- packageVersion("DramaAnalysis")
knitr::opts_chunk$set(echo = FALSE) if (is.null(.Options$qd.datadir)) { setup() } getYPositions <- function(x,b,limit=100) { rl <- apply(x, 2, function(xx) { xx <- as.double(xx) cs <- c(0,head(cumsum(xx),-1)) r <- cs + xx/2 r[xx<=limit] <- NA r }) rl } toFigureName <- function(x) { tools::toTitleCase(tolower(x)) } library(magrittr) library(highcharter) drama <- loadDrama(params$id) title <- dramaNames(drama, formatString = "%T")
title: "QuaDramA Report: r title
"
r paste(paste0("[",drama$meta$Name,"](http://d-nb.info/gnd/",drama$meta$Pnd,")"),sep=", ")
r length(unique(drama$segments$begin.Act))
r drama$segments[,length(unique(begin.Scene)),.(begin.Act)]$V1
All characters, in the order they have their first appearance
cs <- characterStatistics(drama) %>% characterNames(drama) as.character(cs[order(cs$firstBegin),]$character)
Relative amount of words spoken by each figure.
b <- characterStatistics(drama, normalize = FALSE) %>% characterNames(drama) %>% barplot()
table <- characterStatistics(drama, normalize=FALSE) %>% characterNames(drama) knitr::kable(table,format="pandoc")
r params$topN
Characters {.tabset}This segment shows the top r params$topN
characters. This number can be adjusted by supplying the parameter topN
with a different value to the report()
function.
ustatl <- utteranceStatistics(drama) %>% filterCharacters(drama, by="rank", n=params$topN) %>% characterNames(drama)
par(mar=c(1,9,1,0),xpd=FALSE) plot(ustatl, drama)
par(mar=c(2,9,0,0)) boxplot(utteranceLength ~ character, data=ustatl, horizontal=TRUE, las=1, frame=FALSE)
ustat <- utteranceStatistics(drama, normalizeByDramaLength = FALSE) %>% characterNames(drama)
par(mar=c(1,9,1,0),xpd=FALSE) plot(ustat,drama)
par(mar=c(2,9,0,0)) boxplot(utteranceLength ~ character, data=ustat, horizontal=TRUE,las=1,frame=FALSE)
actLimit <- 0.1 sceneLimit <- 0.3
This visualisation displays the relative amount of tokens uttered by a character. The plot includes names of characters only when they speak more than r actLimit*100
% of the tokens in an act and more than r sceneLimit*100
% of the tokens in a scene.
c <- configuration(drama) %>% characterNames(drama) mat <- as.matrix(c) mat <- scale(mat, center=FALSE, scale=colSums(mat)) rownames(mat) <- c$character b <- barplot(mat, col=params$col, names.arg = as.roman(1:ncol(mat))) yPositions <- as.vector(getYPositions(mat,b,limit=actLimit)) xPositions <- rep(b,each=nrow(mat)) lab <- rep(as.character(c$character),ncol(mat)) xPositions <- xPositions[which(!is.na(yPositions))] lab <- lab[which(!is.na(yPositions))] yPositions <- yPositions[which(!is.na(yPositions))] if (length(yPositions > 0)) { text(x=xPositions,y=yPositions,labels=tools::toTitleCase(tolower(lab)),cex=0.6) }
c <- configuration(drama, segment = "Scene") %>% characterNames(drama) mat <- as.matrix(c) mat <- scale(mat, center=FALSE, scale=colSums(mat)) rownames(mat) <- c$character sceneLabels <- paste(as.roman(drama$segments[,.N,.(Number.Act, Number.Scene)]$Number.Act), drama$segments[,.N,.(Number.Act, Number.Scene)]$Number.Scene, sep=".") b <- barplot(mat, col=params$col, names.arg=sceneLabels) yPositions <- as.vector(getYPositions(mat,b,limit=actLimit)) xPositions <- rep(b,each=nrow(mat)) lab <- rep(as.character(c$character),ncol(mat)) xPositions <- xPositions[which(!is.na(yPositions))] lab <- lab[which(!is.na(yPositions))] yPositions <- yPositions[which(!is.na(yPositions))] if (length(yPositions > 0)) { text(x=xPositions,y=yPositions,labels=tools::toTitleCase(tolower(lab)),cex=0.6) }
In a copresence network, two vertices are connected if the characters they represent appear together on stage (= are co-present on stage). The network shown here is based on scenes, i.e., the thicker an edge, the more scenes the two connected characters are copresent.
c <- configuration(drama, onlyPresence = TRUE, segment="Scene") %>% characterNames(drama) co <- as.matrix(c) %*% t(as.matrix(c)) # add figure names rownames(co) <- c$character colnames(co) <- c$character require(igraph) g <- graph_from_adjacency_matrix(co, weighted=TRUE, # weighted graph mode="undirected", # no direction diag=FALSE # no looping edges ) # Now we plot plot.igraph(g, layout=layout_with_gem, # how to lay out the graph vertex.label.cex=0.6, # label size vertex.label.color="black", # font color vertex.color=qd.colors[4], # vertex color vertex.frame.color=NA, # no vertex border edge.width=E(g)$weight # scale edges according to their weight )
Independent vertex sets are sets of characters that do not have edges between any two characters. I.e., there is not a single scene in the play in which the characters shown in the vertex sets below are co-present. This is based on the function largest_ivs()
from the igraph package.
largest_ivs(g)
The "gossip network" represents character that talk about other characters. The colors don't follow a particular pattern and are just added for readability. Edge thickness represent how often (number of scenes) this happens, and is scaled logarithmically. Currently, this includes scenes in which both characters are present. This will be changed later.
mtext <- segment(drama$mentions, drama$segments) m <- as.matrix( as.data.frame.matrix( xtabs( ~ utteranceSpeakerId + entityId, mtext[,.N,.(utteranceSpeakerId,entityId,begin.Scene)]))) m <- m[-1,colnames(m) %in% drama$characters$figure_id] g <- graph_from_adjacency_matrix(m, weighted=TRUE, # weighted graph mode="directed", # no direction diag=FALSE # no looping edges ) coords <- layout_(g, on_grid()) plotGossipNetwork <- TRUE if (length(igraph::E(g)) == 0) { plotGossipNetwork <- FALSE }
plot.igraph(g, layout=coords, # how to lay out the graph vertex.label.cex=0.6, # label size vertex.label.color="black", # font color vertex.color=qd.colors[4], # vertex color vertex.frame.color=NA, # no vertex border edge.curved = TRUE, # curved edges edge.width=log(E(g)$weight)+1, # scale edges according to their weight edge.color=rep(qd.colors,100) )
This analysis shows word fields only for characters that speak more than r params$minTokens
tokens. This value can be adjusted by supplying the argument minTokens
to the report()
function. Z-Sore analysis is only shown if more than one character remains after this filtering, other plots only if at least one character remains.
showZScores <- TRUE fields <- base_dictionary fieldNames <- names(fields) dstat <- dictionaryStatistics(drama, fields = fields, normalizeByCharacter = TRUE, normalizeByField = TRUE, column="Token.lemma") %>% filterCharacters(drama, by="tokens", n=params$minTokens) %>% characterNames(drama) mat <- as.matrix(dstat) rownames(mat) <- dstat$character par(mar=c(10,2,0,10),xpd=TRUE) b <- barplot(t(mat), col=params$col[1:length(fields)], las=3, border=FALSE ) legend(x=max(b)+1,y=max(mat), legend=colnames(mat), fill=params$col[1:length(fields)], bty="n", border=FALSE)
if (nrow(dstat) < 2) { message("*Less than two characters remain, skipping Z-Score analysis.*") showZScores <- FALSE }
zmat <- scale(mat) par(mar=c(2,12,0,6),xpd=TRUE) b <- barplot(t(zmat), col=params$col[1:length(fieldNames)], las=1,horiz=TRUE, border=FALSE, beside=TRUE ) legend(x=max(zmat),y=max(b)+1,legend=colnames(zmat),fill=params$col,bty="n",border=FALSE)
mat <- t(scale(t(mat),center=FALSE,scale=rowSums(mat))) par(mar=c(10,2,1,10),xpd=TRUE) b <- barplot(t(mat), col=params$col[1:length(fields)], las=3, border=FALSE ) legend(x=max(b)+1,y=max(mat), legend=colnames(mat), fill=params$col,bty="n", border=FALSE)
fields
showPersonnelExchange = length(unique(drama$segments$begin.Act)) > 1
print("*Not applicable for this file.*")
The following chart shows for each scene boundary the number of characters exchanged over the boundary. Different metrics have been proposed, the plot shows all of them in comparison.
df <- list() df$ham <- hamming(drama, variant="NormalizedHamming") df$trilcke <- hamming(drama, variant="Trilcke") df$sd <- scenicDifference(drama) if (length(df$ham)+1 != length(unique(mtext$begin.Scene))) { message("Something is seriously wrong here. Please report this issue.") showPersonnelExchange <- FALSE }
df <- list() df$ham <- hamming(drama, variant="NormalizedHamming") df$trilcke <- hamming(drama, variant="Trilcke") df$sd <- scenicDifference(drama) if (is.null(params$col)) { colors <- gray.colors(3) } else { colors <- params$col[c(1,4,7)] } names <- c("Hamming (Normalized)", "Trilcke et al. (2017)", "Scenic Difference") sceneLabels <- paste(as.roman(mtext[,.N,.(Number.Act, Number.Scene)]$Number.Act), mtext[,.N,.(Number.Act, Number.Scene)]$Number.Scene, sep=".") par(mar=c(3,3,3,3)) layout(matrix(c(1,2,2,2,2),5,1,byrow=TRUE)) plot(0,type="n",axes=FALSE,xlab="",ylab="") legend(x="center",legend=c("Hamming (Normalized)", "Trilcke et al. (2017)", "Scenic Difference"),bty="n",horiz=TRUE,fill = colors) actBoundaries <- mtext[,length(unique(begin.Scene)),.(begin.Act)]$V1 s <- 0 for (i in 1:length(actBoundaries)) { actBoundaries[i] <- actBoundaries[i] + s s <- actBoundaries[i] } b <- barplot(Reduce(rbind, df), beside=TRUE,col = colors, border=FALSE) axis(1, at=seq(0.5,max(b[3,])+1,4),labels=sceneLabels, cex=0.1)
highchart() %>% hc_yAxis(max=1) %>% hc_xAxis(categories=sceneLabels, tickmarkPlacement="between", labels.x=-15) %>% hc_add_series(df$ham, pointPadding=0, type="column", name=names[[1]], color=colors[[1]]) %>% hc_add_series(df$trilcke, pointPadding=0, type="column", name=names[[2]], color=colors[[2]]) %>% hc_add_series(df$sd, pointPadding=0, type="column", name=names[[3]], color=colors[[3]])
This report has geen generated using the R package DramaAnalysis
in version r pver
. The package is being developed here, in the context of the project QuaDramA. Please report errors here, or contact me.
For the generation of this report, the following list of parameters was used:
params
The following table describes these options
| Name | Meaning |
| ----- | ------- |
| id
| The id of the play we want to report on |
| topN
| If an analysis only takes the most talkative characters into account, it will use the number of characters given here. Default: 10
|
| minTokens
| If an analysis requires characters to speak at least a certain number of tokens, this value can be adjusted here. Default: 500
|
| col
| A list of color codes in a form R can understand. Default: r qd.colors
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.