In trinker/qdap: Bridging the Gap Between Qualitative Data and Quantitative Analysis

&twocol

# set global chunk options
library(reports); library(slidify); library(knitcitations); library(knitr); library(qdap)
opts_chunk$set(cache=FALSE, tidy = FALSE)


#Reading in your own bib file:
bib <- read.bibtex(dir()[tools::file_ext(dir()) == "bib"][1])
#cite in text using `r citet(bib[1])`
internal <- TRUE

About Me

*** =right

r VS(2) - Former First Grade Teacher - Literacy PhD Student - Quantitatively Bent - Accidental Programmer

*** =left

r IM2("norah.png", width=350)

*** =pnotes

Research Interests

Engagement, Motivation and Feedback
Multimodal Analysis
Discourse Analysis
Data Visualization
Improving Research Methods

--- &twocol

Why R?

*** =left

r VS(2) - Cutting Edge
- Powerful - Visualization

r VS(4) r HS(4) Everything can be quantified...

*** =right

r VS(2) r IM2("r.jpg", width=350)

--- {class: class, tpl: tabs}

Why qdap?

*** {class: active, id: qdap}

r IM2("whyqdap.png")

*** {id: Birth}

Frustration

r IM2("fustration.jpg", width = 400)

*** {id: Affordances}

Affordances

Fexible
Bridge to r HR("http://cran.r-project.org/", "5087") + r HR("http://www.bioconductor.org/", "749") + r HR("https://github.com", "?") = ⁺5836
Dynamic (Field Driven)
Data Focused

Recomendations

r IM2("logo_rstudio.jpg", width = 200, center = FALSE, link = "http://www.rstudio.com/")

r IM2("github-social.png", width = 200, center = FALSE, link ="https://github.com/")

r IM2("knitr.png", width = 200, center = FALSE, link ="http://yihui.name/knitr/") + r IM2("latex.png", width = 160, center = FALSE, link = "http://en.wikipedia.org/wiki/LaTeX") r HS(5) r HR("C:/Users/trinker/Desktop/proximity/REPORT/proximity_measure.Rnw", ".Rmd") r HS(5)``r HR("C:/Users/trinker/Desktop/proximity/REPORT/proximity_measure.pdf", ".pdf")

r IM2("ggplot.png", width = 200, center = FALSE, link = "http://docs.ggplot2.org/current/")

press p

*** =pnotes

r IM2("ggplot2demo.png")

Let's Dig In

r IM2("dig.jpg")

*** =pnotes

Agenda

Installing qdap
Projects
Read In Data
Word Counts and Descriptive Statistics
Word Measures and Scoring
Qualitative Coding System
Visualizing Discourse Data
Discussion

Installing qdap

# install.packages("devtools", eval = FALSE)
library(devtools)
install_github(c("slidify", "slidifyLibraries"), "ramnathv", ref = "dev")
install_github("knitcitations", "cboettig")
install_github(c("reports", "qdapDictionaries", "qdap"), "trinker")
install_github("ggthemes", "jrnold")
install.packages("scales")

invisible(lapply(c("qdap", "ggplot2", "ggthemes", "scales", "grid"), 
    require, character.only = TRUE))

r VS(2)

https://github.com/trinker/qdap

--- .YT yt:chQlpEj8g2Q &youtube

Projects

*** =pnotes

r IF("vid1.html")

General qdap Function Format

Function(Text_Variable, list(Grouping_Variables))

r VS(2)

with(Data_Set, Function(Text_Variable, list(Grouping_Variables)))

Read In Data

Word (docx)
Text (txt)
Excel (csv/xlsx)

Read In Data

r IM2("transcript.png", width= 850)

Read In Data

doc1 <- system.file("extdata/transcripts/trans1.docx", package = "qdap")
dat1 <- read.transcript(doc1)
truncdf(dat1, 50)

r VS(2) Plenty of parsing tools to clean up!!!

Our Data Set

DATA

suppressMessages(library(xtable))
print(xtable(DATA), type="html")

Word Counts and Descriptive Statistics

Word Frequency Matrix
Word Stats
Term Counts
Question Types
Parts of Speech
Syllablication

Word Frequency Matrix

with(DATA, wfm(state, person))[1:14, ]

Word Frequency Matrix

plot(with(DATA, wfm(state, person)), values = TRUE, plot = FALSE) +
   coord_flip()

Word Frequency Matrix (Correlations)

dat2 <- wfm(DATA$state, seq_len(nrow(DATA)))
qheat(cor(t(dat2)), low = "yellow", high = "red",
    grid = "grey90", diag.na = TRUE, by.column = NULL)

Word Stats (1 of 3)

(desc_wrds <- with(mraja1spl, word_stats(dialogue, person, tot = tot)))

desc_wrds2 <- with(mraja1spl, word_stats(desc_wrds, person, tot = tot, digits = 1))

desc_wrds2$gts[, c(1, 2:9)]

Word Stats (2 of 3)

desc_wrds2$gts[, c(1, 10:19)]

Word Stats (3 of 3)

desc_wrds2$gts[, c(1, 20:26)]

Word Stats Plot

plot(desc_wrds, label = TRUE, high="red")

Term Counts

ml2 <- list(
    theme_1 = c(" the ", " a ", " an "),
    theme_2 = c(" I'" ),
    "good",
    the_words = c("the", " the ", " the", "the ")
)

out <- with(raj.act.1,  termco(dialogue, person, ml2))

*** =pnotes

r VS(2) *Press p

ml2 <- list(
    theme_1 = c(" the ", " a ", " an "),
    theme_2 = c(" I'" ),
    "good",
    the_words = c("the", " the ", " the", "the ")
)

Term Counts

out

*** =pnotes

ml2 <- list(
    theme_1 = c(" the ", " a ", " an "),
    theme_2 = c(" I'" ),
    "good",
    the_words = c("the", " the ", " the", "the ")
)

Term Counts Plot

plot(out, high = "red", low = "yellow", label = TRUE)

Question Types

(x <- with(mraja1spl, question_type(dialogue, person)))

Question Types Plot

plot(x)

Parts of Speech

(posbydat <- with(DATA, pos_by(state, list(adult, sex))))

load("pos.RData")
posbydat

r VS(3) r HR("http://faculty.washington.edu/dillon/GramResources/penntable.html", "Penn Treebank Project (1991)")

*** =pnotes

posbydat[["POStagged"]]

Parts of Speech Plot

plot(posbydat, label = TRUE)

Word Measures and Scoring

Readability
Formality
Polarity

Readability

Automated Readability Index
Coleman Liau
SMOG
Flesch Kincaid
Fry
Linsear Write

Readability

with(rajSPLIT, coleman_liau(dialogue, list(fam.aff)))

Formality

Heylighen & Dewaele(1999a, 1999b, 2002)

$$ F = 50(\frac{n_{f}-n_{c}}{N} + 1) $$

Where:

$$ f = \left {noun, \;adjective, \;preposition, \;article\right } $$ $$ c = \left {pronoun, \;verb, \;adverb, \;interjection\right } $$ $$ N = \sum{(f \;+ \;c \;+ \;conjunctions)} $$

Formality

(form <- with(raj, formality(dialogue, act)))

(form <- with(raj, formality(rajPOS, act)))

Formality Plot

plot(form, bar.colors=c("Set2", "RdBu"))

Polarity

(poldat <- with(mraja1spl, polarity(dialogue, list(sex, fam.aff, died))))

plot(poldat)

Qualitative Coding System

Apply codes to:

The dialogue
The time spans

codes <- qcv(AA, BB, CC)
X <- cm_df.transcript(DATA$state, DATA$person, file="DATA.txt")

sam:

     1        2  3    4   5   6   
     Computer is fun. Not too fun.
greg:

     7  8    9    10   11   
     No it's not, it's dumb.
teacher:

     12   13     14 15 
     What should we do?
sam:

     16  17    18 19     
     You liar, it stinks!

r IM2("transcript2.png", width=800) r VS(1) Coding time spans works similarly...

press p

*** =pnotes

x <- list(
    transcript_time_span = qcv(00:00 - 1:12:00),
    A = qcv(terms = "2.40:3.00, 5.01, 6.02:7.00, 9.00, 
        12.00:14.00, 00.51.00:00.59.00"),
    B = qcv(terms = "2.40, 3.01:3.02, 5.01, 6.02:7.00,
        9.00, 30.00:39.00, 1.12.00:1.19.01"),
    C = qcv(terms = "2.40:3.00, 5.01, 6.02:7.00, 9.00, 17.01")
)

After Reading Qualitative Codes...

(y <- cm_2long(x))

Gantt Plot of Codes

plot(y)

Summary of Codes

summary(y)
plot(summary(y))

Visualizing Discourse Data

Lexical Dispersion Plot
Word Cloud
Turn of Talk Plot
Venn Diagram
Word Network Plot

Lexical Dispersion Plot

with(rajSPLIT , dispersion_plot(dialogue, c("love", "night"),
    grouping.var = list(fam.aff, sex), rm.vars = act))

Lexical Dispersion Plot

wrds <- word_list(pres_debates2012$dialogue, stopwords = Top200Words)
wrds2 <- spaste(wrds[["rfswl"]][["all"]][, "WORD"])
wrds2 <- c(" governor~~romney ", wrds2[-c(3, 12)])
with(pres_debates2012 , dispersion_plot(dialogue, wrds2, rm.vars = time, color="black", bg.color="white"))

Word Cloud (Colored Terms)

terms <- list(
    I = c("i", "i'm"),
    mal = qcv(stinks, dumb, distrust),
    articles = qcv(the, a, an),
    pronoun = qcv(we, you)
)

with(DATA, trans_cloud(state, target.words=terms,
    cloud.colors=qcv(red, green, blue, black, gray65),
    expand.target=FALSE, proportional=TRUE, legend=c(names(terms),
    "other")))

terms <- list(
    I = c("i", "i'm"),
    mal = qcv(stinks, dumb, distrust),
    articles = qcv(the, a, an),
    pronoun = qcv(we, you)
)

with(DATA, trans_cloud(state, target.words=terms,
    cloud.colors=qcv(red, green, blue, black, gray65),
    expand.target=FALSE, proportional=TRUE, legend=c(names(terms),
    "other")))

Word Cloud Plot (Colored Terms)

with(DATA, trans_cloud(state, target.words=terms, 
    cloud.colors=qcv(red, green, blue, black, gray65),
    expand.target=FALSE, proportional=TRUE, legend=c(names(terms),
    "other"), max.word.size = 8, min.word.size = .5))

Gradient Cloud

DATA2 <- DATA
DATA2$state <- space_fill(DATA$state, c("is fun", "too fun", "you liar"))

gradient_cloud(DATA$state, DATA$sex, title="Houghton Colors", 
    max.word.size = 8, min.word.size = .01, X ="purple" , Y = "yellow")

Gradient Cloud

gradient_cloud(DATA2$state, DATA2$sex, title="Gender Word Use", 
    max.word.size = 8, min.word.size = .01, X ="red" , Y = "blue")

Turn of Talk Plot

r VS(1)

tot_plot(mraja1, "dialogue", grouping.var = c("sex", "fam.aff"), tot=FALSE, plot=FALSE)+
    scale_fill_brewer(palette="Set1") +
    geom_hline(aes(yintercept=mean(word.count))) +
    geom_hline(aes(yintercept=mean(word.count) + (2 *sd(word.count)))) +
    geom_hline(aes(yintercept=mean(word.count) + (3 *sd(word.count)))) +
    geom_text(parse=TRUE, hjust=0, vjust=0, size = 3, aes(x = 2, 
        y = mean(word.count) + 2, label = "bar(x)")) +
    geom_text(hjust=0, vjust=0, size = 3, aes(x = 1, 
        y = mean(word.count) + (2 *sd(word.count)) + 2, label = "+2 sd")) +
    geom_text(hjust=0, vjust=0,  size = 3, aes(x = 1, 
        y = mean(word.count) + (3 *sd(word.count)) + 2, label = "+3 sd")) +
    ggtitle("Romeo & Juliet:\nAct 1 Turns of Talk")

Venn Diagram

with(DATA , trans_venn(state, person, legend.location = "topright"))

Word Network Plot

word_network_plot(text.var=DATA$state, DATA$person, stopwords=NULL)

Discussion

How might qdap + R fit into your workflow?
What do you want to know more about?
Are there any points that need to be clarified?
…

trinker/qdap documentation built on Sept. 30, 2020, 6:28 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

trinker/qdap Bridging the Gap Between Qualitative Data and Quantitative Analysis

In trinker/qdap: Bridging the Gap Between Qualitative Data and Quantitative Analysis

About Me

Research Interests

Why R?

Why qdap?

Frustration

Affordances

Recomendations

Let's Dig In

Agenda

Installing qdap

Projects

General qdap Function Format

Read In Data

Read In Data

Read In Data

Our Data Set

Word Counts and Descriptive Statistics

Word Frequency Matrix

Word Frequency Matrix

Word Frequency Matrix (Correlations)

Word Stats (1 of 3)

Word Stats (2 of 3)

Word Stats (3 of 3)

Word Stats Plot

Term Counts

Term Counts

Term Counts Plot

Question Types

Question Types Plot

Parts of Speech

Parts of Speech Plot

Word Measures and Scoring

Readability

Readability

Formality

Formality

Formality Plot

Polarity

Qualitative Coding System

After Reading Qualitative Codes...

Gantt Plot of Codes

Summary of Codes

Visualizing Discourse Data

Lexical Dispersion Plot

Lexical Dispersion Plot

Word Cloud (Colored Terms)

Word Cloud Plot (Colored Terms)

Gradient Cloud

Gradient Cloud

Turn of Talk Plot

Venn Diagram

Word Network Plot

Discussion

R Package Documentation

Browse R Packages

We want your feedback!

trinker/qdap
Bridging the Gap Between Qualitative Data and Quantitative Analysis