knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(kableExtra)

A glimpse at each TBDBr query function

library(TBDBr)

getTranscripts

Queries a table where each row represents a transcript.

# Get penglish-north american transcripts in in childes 
 transcripts <- getTranscripts(corpusName = 'childes',
                               corpora = c('childes', 'Eng-NA'))
kable(transcripts[1:5,]) %>% 
   kable_styling("striped") %>%
   scroll_box(width = "100%")

getParticipants

Queries a table where each row represents a participant (speaker) listed in a transcript.

# Get english-north american participants in childes 
 participants <- getParticipants(corpusName = 'childes',
                                 corpora = c('childes',
                                             'Eng-NA'))
kable(participants[1:5,]) %>% 
   kable_styling("striped") %>%
   scroll_box(width = "100%")

getTokens

Queries a table with all the words from the selected transcripts, one word (token) per row.

# Get tokens (words) from one transcript.
tokens <- getTokens(corpusName = 'childes',
                    corpora = c('childes',
                                'Eng-NA',
                                'MacWhinney',
                                '010411a'));
kable(tokens[1:5,]) %>% 
   kable_styling("striped") %>%
   scroll_box(width = "100%")

getTokenTypes

Queries a table with all the words from the selected transcripts condensed into "types" based on word form and part of speech.

# Get token types from MacWhinney set.
token.types <- getTokenTypes(corpusName = 'childes',
                             corpora = c('childes',
                                         'Eng-NA',
                                         'MacWhinney'));
kable(token.types[1:5,]) %>% 
   kable_styling("striped") %>%
   scroll_box(width = "100%")

getUtterances

Queries a table with all the words from the selected transcripts, one word (token) per row.

utterances <- getUtterances(corpusName = 'childes',
                            corpora = c('childes',
                                        'Eng-NA',
                                        'MacWhinney',
                                        '010411a'))
kable(utterances[10:14,]) %>% 
   kable_styling("striped") %>%
   scroll_box(width = "100%")

getNgrams

Queries to get n-grams of specified size (n) and type.

# Get 3-grams of words from one transcript.
ngrams <- getNgrams(nGram=c("3", "word"),
                    corpusName = 'childes',
                    corpora = c('childes',
                                'Eng-NA',
                                'MacWhinney',
                                '010411a'));
kable(ngrams[1:5,]) %>% 
   kable_styling("striped") %>%
   scroll_box(width = "100%")

getCQL

Queryting by "CQL" (Corpus Query Language) lets us search for patterns in the selected transcripts. We construct a CQL query by specifying a search pattern of words, lemmas, and parts of speech. see documentation (?getCQL) for details.

# Query for text pattern "my ball" as lemma in MacWhinney set.
cql.myball <- getCQL(cqlArr=list(list(type="lemma", item="my", freq="once"),
                                 list(type="lemma", item="ball", freq="once")), 
                     corpusName = 'childes',
                     corpora = c('childes', 'Eng-NA', 'MacWhinney'));
kable(cql.myball[1:5,]) %>% 
   kable_styling("striped") %>%
   scroll_box(width = "100%")


TalkBank/TBDBr documentation built on Feb. 4, 2024, 2:25 p.m.