word_position: Word Position

Description Usage Arguments Value Note Examples

View source: R/word_position.R

Description

Find counts of the positioning of words within a sentence.

Usage

1
2
3
4
5
6
7
8
word_position(
  text.var,
  match.terms,
  digits = 2,
  percent = TRUE,
  zero.replace = 0,
  ...
)

Arguments

text.var

The text variable.

match.terms

A character vector of quoted terms to find the positions of.

digits

Integer; number of decimal places to round when printing.

percent

logical. If TRUE output given as percent. If FALSE the output is proportion.

zero.replace

Value to replace 0 values with.

...

Currently ignored.

Value

Returns a list, of class "word_position", of data frames and information regarding word positions:

raw

raw word position counts in long format (may be more useful for plotting)

count

integer word position counts

prop

proportional word position counts; proportional to each total word uses

rnp

a character combination data frame of count and proportional

zero_replace

value to replace zeros with; mostly internal use

percent

The value of percent used for plotting purposes.

digits

integer value of number of digits to display; mostly internal use

Note

Default printing is a heatmap plot.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
## Not run: 
position <- with(DATA, word_position(sent_detect(state), Top25Words))
position
lview(position)
plot(position)
scores(position)
preprocessed(position)
counts(position)
proportions(position)
plot(proportions(position))

stopwords <- unique(c(contractions[[1]], Top200Words))
topwords <- freq_terms(pres_debates2012[["dialogue"]], top = 40, 
    at.least = 4, stopwords = stopwords)[[1]]
word_position(pres_debates2012[["dialogue"]], topwords)
plot(word_position(pres_debates2012[["dialogue"]], topwords), FALSE)
plot(word_position(pres_debates2012[["dialogue"]], topwords), TRUE, scale=FALSE)

wordlist <- c("tax", "health", "rich", "america", "truth", "money", "cost", 
    "governnor", "president", "we", "job", "i", "you", "because", 
    "our", "years")

word_position(pres_debates2012[["dialogue"]], wordlist)

## BY VARIABLES
library(gridExtra)
pres_deb_by_time <- with(pres_debates2012, split(dialogue, time))
out1 <-lapply(pres_deb_by_time, word_position, wordlist)
do.call("grid.arrange", c(lapply(out1, plot), ncol=1))

pres_deb_by_person <- with(pres_debates2012, split(dialogue, person))
out2 <-lapply(pres_deb_by_person, word_position, wordlist)
plots <- lapply(names(out2), function(x) plot(out2[[x]], scale=FALSE) + 
    ggtitle(x))
do.call("grid.arrange", c(plots, ncol=2))

## As a histogram
## theme taken from: http://jonlefcheck.net/2013/03/11/black-theme-for-ggplot2-2/
theme_black <- function(base_size=12,base_family="") {
  theme_grey(base_size=base_size,base_family=base_family) %+replace%
    theme(
      # Specify axis options
      axis.line=element_blank(), 
      axis.text.x=element_text(size=base_size*0.8,color="grey55",
                               lineheight=0.9,vjust=1), 
      axis.text.y=element_text(size=base_size*0.8,color="grey55",
                               lineheight=0.9,hjust=1), 
      axis.ticks=element_line(color="grey55",size = 0.2), 
      axis.title.x=element_text(size=base_size,color="grey55",vjust=1), 
      axis.title.y=element_text(size=base_size,color="grey55",angle=90,
                                vjust=0.5), 
      axis.ticks.length=unit(0.3,"lines"), 
      axis.ticks.margin=unit(0.5,"lines"),
      # Specify legend options
      legend.background=element_rect(color=NA,fill="black"), 
      legend.key=element_rect(color="grey55", fill="black"), 
      legend.key.size=unit(1.2,"lines"), 
      legend.key.height=NULL, 
      legend.key.width=NULL,     
      legend.text=element_text(size=base_size*0.8,color="grey55"), 
      legend.title=element_text(size=base_size*0.8,face="bold",hjust=0,
                                color="grey55"), 
      legend.position="right", 
      legend.text.align=NULL, 
      legend.title.align=NULL, 
      legend.direction="vertical", 
      legend.box=NULL,
      # Specify panel options
      panel.background=element_rect(fill="black",color = NA), 
      panel.border=element_rect(fill=NA,color="grey55"), 
      panel.grid.major=element_blank(), 
      panel.grid.minor=element_blank(), 
      panel.spacing=unit(0.25,"lines"),  
      # Specify facetting options
      strip.background=element_rect(fill="grey30",color="grey10"), 
      strip.text.x=element_text(size=base_size*0.8,color="grey55"), 
      strip.text.y=element_text(size=base_size*0.8,color="grey55",
                                angle=-90), 
      # Specify plot options
      plot.background=element_rect(color="black",fill="black"), 
      plot.title=element_text(size=base_size*1.2,color="grey55"), 
      plot.margin=unit(c(1,1,0.5,0.5),"lines")
    )
}

out3 <- list_df2df(lapply(out2[1:2], preprocessed), "Person")
out3 %>% ggplot(aes(x=position)) + 
    geom_histogram(binwidth = 1, fill="white") +
    facet_grid(Person~word) +
    theme_black() + ylab("Count") + xlab("Position")

## MOVE TO THE MICRO THROUGH QUALITATIVE ANALYSIS
locs <- unlist(setNames(lapply(wordlist, function(x){
     sapply(c("ROMNEY", "OBAMA"), function(y){
         which(pres_debates2012[["person"]] ==y & grepl(x, pres_debates2012[["dialogue"]]))
     })
}), wordlist), recursive=FALSE)

fdl <- qdap:::folder(pres_context)
Map(function(x, y){
    if (identical(integer(0), x)) return(NULL)
    z <- with(pres_debates2012, trans_context(dialogue, person, inds=x, n.before=1))
    z[["text"]] <- gsub(beg2char(y, "."), 
        paste0("[[", beg2char(y, "."), "]]"), z[["text"]])
    print(z, file=file.path(fdl, sprintf("%s.doc", y)))
}, locs, names(locs))

## End(Not run)

trinker/qdap documentation built on Sept. 30, 2020, 6:28 p.m.