inst/doc/litRiddle.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)


## ----warning = FALSE----------------------------------------------------------
library(litRiddle)

## -----------------------------------------------------------------------------
data(books)
data(respondents)
data(reviews)
data(motivations)
data(frequencies)

## ----eval = FALSE-------------------------------------------------------------
#  books

## -----------------------------------------------------------------------------
books$title[1:10]

## -----------------------------------------------------------------------------
get.columns()

## -----------------------------------------------------------------------------
explain("books")
explain("reviews")
explain("respondents")
explain("motivations")
explain("frequencies")

## -----------------------------------------------------------------------------
dat = combine.all(load.freq.table = FALSE)

## -----------------------------------------------------------------------------
dat = combine.all(load.freq.table = TRUE)

## -----------------------------------------------------------------------------
find.dataset("book.id")
find.dataset("age.resp")

## ----eval = FALSE-------------------------------------------------------------
#  get.columns()

## -----------------------------------------------------------------------------
make.table(table.of = 'age.resp')

## -----------------------------------------------------------------------------
make.table(table.of = 'age.resp', xlab = 'age respondent', 
           ylab = 'number of people', 
           title = 'Distribution of respondent age', 
           barcolor = 'red', barfill = 'white')

## -----------------------------------------------------------------------------
make.table2(table.of = 'age.resp', split = 'gender.resp')

## -----------------------------------------------------------------------------
make.table2(table.of = 'literariness.read', split = 'gender.author')

## -----------------------------------------------------------------------------
make.table2(table.of = 'age.resp', split = 'zipcode') 

## -----------------------------------------------------------------------------
make.table2(table.of = 'age.resp', split = 'gender.resp', 
            xlab = 'age respondent', ylab = 'number of people', 
            barcolor = 'purple', barfill = 'yellow')

## -----------------------------------------------------------------------------
make.table2(table.of = 'literariness.read', split = 'gender.author', 
            xlab = 'Overall literariness scores', 
            ylab = 'number of people', barcolor = 'black', 
            barfill = 'darkred')

## -----------------------------------------------------------------------------
dat.reviews = order.responses('readingbehavior')
str(dat.reviews)

## -----------------------------------------------------------------------------
dat.ratings = order.responses('bookratings')
str(dat.ratings)

## -----------------------------------------------------------------------------
rownames(frequencies)[10:20]

## -----------------------------------------------------------------------------
frequencies["Appel_Weerzin",][1:10]

## -----------------------------------------------------------------------------
books[books["short.title"]=="Appel_Weerzin",]

## -----------------------------------------------------------------------------
data(motivations)
head(motivations, 15)

## -----------------------------------------------------------------------------
# We're importing `dplyr` to use `tibble` so we can 
# show very large tables somewhat nicer.
suppressMessages(library(dplyr))  

mots = motivations.text()
tibble(mots)

## -----------------------------------------------------------------------------
mots_hhhh <- merge(x = books[books["book.id"]==46,], y = mots, by = "book.id", all.x = TRUE)
tibble(mots_hhhh)

## -----------------------------------------------------------------------------
print(tibble(mots_hhhh[,"text"]), n = 40)

## -----------------------------------------------------------------------------
mots_hhhh = merge(x = books[books["book.id"] == 46, c("book.id", "author", "title")], y = mots[, c("book.id", "respondent.id", "text")], by = "book.id", all.x = TRUE)
tibble(mots_hhhh)

## -----------------------------------------------------------------------------
tibble(merge(x = mots_hhhh, y = reviews, by = c("book.id", "respondent.id"), all.x = TRUE))

## -----------------------------------------------------------------------------
reviews[ reviews["respondent.id"] == 1022 & reviews["book.id"] == 46, ]

## -----------------------------------------------------------------------------
toks = motivations  # Remmber: that is a *token* table, one token + lemma + POS tag per row.
head(table(toks$lemma, toks$upos), n = 30)

## -----------------------------------------------------------------------------
# filter out tokens that do not start with at least one word character
# we use regular expression "\w+" which means "more than one word character", 
# the added backslash prevents R from interpreting the backslash as an
# escape character. 
mots = filter(motivations, grepl('\\w+', lemma))

# create a data frame out of a table of raw frequencies.
# Look up 'table function' in R documentation. 
mots = data.frame(table(mots$lemma, mots$upos))

# use interpretable column names
colnames(mots) = c("lemma", "upos", "freq")

# select only useful information, i.e. those lemma+pos combinations 
# that appear more than 0 times
mots = mots[mots['freq'] > 0, ]

# sort from most used to least used
mots = mots[order(mots$freq, decreasing = TRUE), ]

# finally show as a nicer looking table
tibble(mots)

## -----------------------------------------------------------------------------
mots[mots["lemma"] == "boek", ]

## -----------------------------------------------------------------------------
# First we find the motivation IDs from the books where this happens.
boekx = motivations[motivations["lemma"] == "boek" & motivations["upos"] == "X", ]
boekx

## -----------------------------------------------------------------------------
mots_text = motivations.text()

## -----------------------------------------------------------------------------
boekx_mots_text =  merge( x = boekx, y = mots_text, by = "motivation.id", all.x = TRUE)

## -----------------------------------------------------------------------------
tibble(boekx_mots_text[ , c( "book.id.x", "respondent.id.x", "text")])

## -----------------------------------------------------------------------------
boekx_mots_text[3, "text"]

## ----eval = FALSE-------------------------------------------------------------
#  help(books)
#  help(respondents)
#  help(reviews)
#  help(frequencies)
#  help(combine.all)
#  help(explain)
#  help(find.dataset)
#  help(get.columns)
#  help(make.table)
#  help(make.table2)
#  help(order.responses)
#  help(litRiddle) # for the general description of the package

Try the litRiddle package in your browser

Any scripts or data that you put into this service are public.

litRiddle documentation built on July 26, 2023, 5:36 p.m.