pver <- packageVersion("DramaAnalysis")
knitr::opts_chunk$set(echo = FALSE)

if (is.null(.Options$qd.datadir)) { 
  setup()
}

getYPositions <- function(x,b,limit=100) {
  rl <- apply(x, 2, function(xx) {
    xx <- as.double(xx)
    cs <- c(0,head(cumsum(xx),-1))
    r <- cs + xx/2
    r[xx<=limit] <- NA
    r  
  })
  rl
}

toFigureName <- function(x) {
  tools::toTitleCase(tolower(x))
}

library(magrittr)
library(highcharter)

drama <- loadDrama(params$id)

title <- dramaNames(drama, formatString = "%T")

title: "QuaDramA Report: r title"


Meta data

Character Lists {.tabset}

By Order of appearance

All characters, in the order they have their first appearance

cs <- characterStatistics(drama) %>%
  characterNames(drama)

as.character(cs[order(cs$firstBegin),]$character)

Overall Speech Distribution {.tabset}

Bar chart

Relative amount of words spoken by each figure.

b <- characterStatistics(drama, normalize = FALSE) %>%
  characterNames(drama) %>%
  barplot()

Table

table <- characterStatistics(drama, normalize=FALSE) %>%
  characterNames(drama)

knitr::kable(table,format="pandoc")

Presence and Utterances {.tabset}

Top r params$topN Characters {.tabset}

This segment shows the top r params$topN characters. This number can be adjusted by supplying the parameter topN with a different value to the report() function.

ustatl <- utteranceStatistics(drama) %>%
  filterCharacters(drama, by="rank", n=params$topN) %>%
  characterNames(drama)

All Utterances

par(mar=c(1,9,1,0),xpd=FALSE)
plot(ustatl, drama)

Utterance Length Variation

par(mar=c(2,9,0,0))
boxplot(utteranceLength ~ character, data=ustatl, horizontal=TRUE, las=1, frame=FALSE)

All Characters {.tabset}

ustat <- utteranceStatistics(drama, normalizeByDramaLength = FALSE) %>%
  characterNames(drama)

All Utterances

par(mar=c(1,9,1,0),xpd=FALSE)
plot(ustat,drama)

Utterance Length Variation

par(mar=c(2,9,0,0))
boxplot(utteranceLength ~ character, data=ustat, horizontal=TRUE,las=1,frame=FALSE)

Configuration and Copresence {.tabset}

actLimit <- 0.1
sceneLimit <- 0.3

This visualisation displays the relative amount of tokens uttered by a character. The plot includes names of characters only when they speak more than r actLimit*100% of the tokens in an act and more than r sceneLimit*100% of the tokens in a scene.

By Act

c <- configuration(drama) %>% characterNames(drama)

mat <- as.matrix(c)

mat <- scale(mat, center=FALSE, scale=colSums(mat))
rownames(mat) <- c$character

b <- barplot(mat,
             col=params$col,
             names.arg = as.roman(1:ncol(mat)))



yPositions <- as.vector(getYPositions(mat,b,limit=actLimit))
xPositions <- rep(b,each=nrow(mat))
lab <- rep(as.character(c$character),ncol(mat))

xPositions <- xPositions[which(!is.na(yPositions))]
lab <- lab[which(!is.na(yPositions))]
yPositions <- yPositions[which(!is.na(yPositions))]

if (length(yPositions > 0)) {
  text(x=xPositions,y=yPositions,labels=tools::toTitleCase(tolower(lab)),cex=0.6)
} 

By Scene

c <- configuration(drama, segment = "Scene") %>% 
  characterNames(drama)

mat <- as.matrix(c)

mat <- scale(mat, center=FALSE, scale=colSums(mat))
rownames(mat) <- c$character

sceneLabels <- paste(as.roman(drama$segments[,.N,.(Number.Act, Number.Scene)]$Number.Act), drama$segments[,.N,.(Number.Act, Number.Scene)]$Number.Scene, sep=".")

b <- barplot(mat,
             col=params$col,
             names.arg=sceneLabels)


yPositions <- as.vector(getYPositions(mat,b,limit=actLimit))
xPositions <- rep(b,each=nrow(mat))
lab <- rep(as.character(c$character),ncol(mat))

xPositions <- xPositions[which(!is.na(yPositions))]
lab <- lab[which(!is.na(yPositions))]
yPositions <- yPositions[which(!is.na(yPositions))]

if (length(yPositions > 0)) {
  text(x=xPositions,y=yPositions,labels=tools::toTitleCase(tolower(lab)),cex=0.6)
} 

Network Analysis {.tabset}

Copresence Network

In a copresence network, two vertices are connected if the characters they represent appear together on stage (= are co-present on stage). The network shown here is based on scenes, i.e., the thicker an edge, the more scenes the two connected characters are copresent.

c <- configuration(drama, 
                   onlyPresence = TRUE, 
                   segment="Scene") %>%
  characterNames(drama)
co <- as.matrix(c) %*% t(as.matrix(c))

# add figure names
rownames(co) <- c$character
colnames(co) <- c$character

require(igraph)

g <- graph_from_adjacency_matrix(co, 
                                 weighted=TRUE,     # weighted graph
                                 mode="undirected", # no direction
                                 diag=FALSE         # no looping edges
                                )

# Now we plot
plot.igraph(g, 
            layout=layout_with_gem,       # how to lay out the graph
            vertex.label.cex=0.6,         # label size
            vertex.label.color="black",   # font color
            vertex.color=qd.colors[4],    # vertex color
            vertex.frame.color=NA,        # no vertex border
            edge.width=E(g)$weight        # scale edges according to their weight
            )  

Independent Vertex Sets

Independent vertex sets are sets of characters that do not have edges between any two characters. I.e., there is not a single scene in the play in which the characters shown in the vertex sets below are co-present. This is based on the function largest_ivs() from the igraph package.

largest_ivs(g)

Gossip Network

The "gossip network" represents character that talk about other characters. The colors don't follow a particular pattern and are just added for readability. Edge thickness represent how often (number of scenes) this happens, and is scaled logarithmically. Currently, this includes scenes in which both characters are present. This will be changed later.

mtext <- segment(drama$mentions, drama$segments)

m <- as.matrix(
  as.data.frame.matrix(
    xtabs( ~ utteranceSpeakerId + entityId,
           mtext[,.N,.(utteranceSpeakerId,entityId,begin.Scene)])))

m <- m[-1,colnames(m) %in% drama$characters$figure_id]

g <- graph_from_adjacency_matrix(m, 
                                 weighted=TRUE,     # weighted graph
                                 mode="directed", # no direction
                                 diag=FALSE         # no looping edges
                                )
coords <- layout_(g, on_grid())

plotGossipNetwork <- TRUE
if (length(igraph::E(g)) == 0) {
  plotGossipNetwork <- FALSE
}
plot.igraph(g,
            layout=coords,                # how to lay out the graph
            vertex.label.cex=0.6,         # label size
            vertex.label.color="black",   # font color
            vertex.color=qd.colors[4],    # vertex color
            vertex.frame.color=NA,        # no vertex border
            edge.curved = TRUE,           # curved edges
            edge.width=log(E(g)$weight)+1,  # scale edges according to their weight
            edge.color=rep(qd.colors,100)
            )  

Word Fields {.tabset}

This analysis shows word fields only for characters that speak more than r params$minTokens tokens. This value can be adjusted by supplying the argument minTokens to the report() function. Z-Sore analysis is only shown if more than one character remains after this filtering, other plots only if at least one character remains.

Bar Chart

showZScores <- TRUE

fields <- base_dictionary
fieldNames <- names(fields)

dstat <- dictionaryStatistics(drama, 
                              fields = fields,
                              normalizeByCharacter = TRUE,
                              normalizeByField = TRUE,
                              column="Token.lemma") %>%
  filterCharacters(drama, by="tokens", n=params$minTokens) %>%
  characterNames(drama)

mat <- as.matrix(dstat)
rownames(mat) <- dstat$character

par(mar=c(10,2,0,10),xpd=TRUE)
b <- barplot(t(mat), 
        col=params$col[1:length(fields)],
        las=3,
        border=FALSE
        )
legend(x=max(b)+1,y=max(mat),
       legend=colnames(mat),
       fill=params$col[1:length(fields)],
       bty="n",
       border=FALSE)

Bar chart (z-scores)

if (nrow(dstat) < 2) {
  message("*Less than two characters remain, skipping Z-Score analysis.*")
  showZScores <- FALSE
}

Z-scores in wikipedia

zmat <- scale(mat)


par(mar=c(2,12,0,6),xpd=TRUE)
b <- barplot(t(zmat), 
        col=params$col[1:length(fieldNames)],
        las=1,horiz=TRUE,
        border=FALSE,
        beside=TRUE
)
legend(x=max(zmat),y=max(b)+1,legend=colnames(zmat),fill=params$col,bty="n",border=FALSE)

Bar chart (scaled)

mat <- t(scale(t(mat),center=FALSE,scale=rowSums(mat)))

par(mar=c(10,2,1,10),xpd=TRUE)
b <- barplot(t(mat),
        col=params$col[1:length(fields)],
        las=3,
        border=FALSE
        )
legend(x=max(b)+1,y=max(mat),
       legend=colnames(mat),
       fill=params$col,bty="n",
       border=FALSE)

Word fields

fields

Personnel Exchange {.tabset}

showPersonnelExchange = length(unique(drama$segments$begin.Act)) > 1
print("*Not applicable for this file.*")

Static

The following chart shows for each scene boundary the number of characters exchanged over the boundary. Different metrics have been proposed, the plot shows all of them in comparison.

df <- list()
df$ham <- hamming(drama, variant="NormalizedHamming")
df$trilcke <- hamming(drama, variant="Trilcke")
df$sd <- scenicDifference(drama)

if (length(df$ham)+1 != length(unique(mtext$begin.Scene))) {
  message("Something is seriously wrong here. Please report this issue.")
  showPersonnelExchange <- FALSE
}
df <- list()
df$ham <- hamming(drama, variant="NormalizedHamming")
df$trilcke <- hamming(drama, variant="Trilcke")
df$sd <- scenicDifference(drama)



if (is.null(params$col)) {
  colors <- gray.colors(3)
} else {
  colors <- params$col[c(1,4,7)]
}

names <- c("Hamming (Normalized)", 
                             "Trilcke et al. (2017)", 
                             "Scenic Difference")
sceneLabels <- paste(as.roman(mtext[,.N,.(Number.Act, Number.Scene)]$Number.Act), mtext[,.N,.(Number.Act, Number.Scene)]$Number.Scene, sep=".")
  par(mar=c(3,3,3,3))
  layout(matrix(c(1,2,2,2,2),5,1,byrow=TRUE))
  plot(0,type="n",axes=FALSE,xlab="",ylab="")
  legend(x="center",legend=c("Hamming (Normalized)", 
                             "Trilcke et al. (2017)", 
                             "Scenic Difference"),bty="n",horiz=TRUE,fill = colors)


  actBoundaries <- mtext[,length(unique(begin.Scene)),.(begin.Act)]$V1
  s <- 0
  for (i in 1:length(actBoundaries)) {
    actBoundaries[i] <- actBoundaries[i] + s
    s <- actBoundaries[i]
  }

  b <- barplot(Reduce(rbind, df), beside=TRUE,col = colors, border=FALSE)
  axis(1, at=seq(0.5,max(b[3,])+1,4),labels=sceneLabels, cex=0.1)

Dynamic

highchart() %>%
  hc_yAxis(max=1) %>%
  hc_xAxis(categories=sceneLabels,
           tickmarkPlacement="between",
           labels.x=-15) %>%
  hc_add_series(df$ham, pointPadding=0,
                type="column",
                name=names[[1]],
                color=colors[[1]]) %>%
  hc_add_series(df$trilcke,
                pointPadding=0,
                type="column",
                name=names[[2]],
                color=colors[[2]]) %>%
  hc_add_series(df$sd, pointPadding=0,
                type="column",
                name=names[[3]],
                color=colors[[3]])

About

This report has geen generated using the R package DramaAnalysis in version r pver. The package is being developed here, in the context of the project QuaDramA. Please report errors here, or contact me.

Reproduction

For the generation of this report, the following list of parameters was used:

params

The following table describes these options

| Name | Meaning | | ----- | ------- | | id | The id of the play we want to report on | | topN | If an analysis only takes the most talkative characters into account, it will use the number of characters given here. Default: 10 | | minTokens | If an analysis requires characters to speak at least a certain number of tokens, this value can be adjusted here. Default: 500 | | col | A list of color codes in a form R can understand. Default: r qd.colors |



quadrama/DramaAnalysis documentation built on Sept. 28, 2020, 8:42 p.m.