rm_citation | R Documentation |
Remove/replace/extract APA6 style citations from a string.
Counts of normalized citations ("et al." to original author converted to author + year standarization).
rm_citation(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_citation",
replacement = "",
extract = FALSE,
dictionary = getOption("regex.library"),
...
)
ex_citation(
text.var,
trim = !extract,
clean = TRUE,
pattern = "@rm_citation",
replacement = "",
extract = TRUE,
dictionary = getOption("regex.library"),
...
)
as_count(x, ...)
text.var |
The text variable. |
trim |
logical. If |
clean |
trim logical. If |
pattern |
A character string containing a regular expression (or
character string for |
replacement |
Replacement for matched |
extract |
logical. If |
dictionary |
A dictionary of canned regular expressions to search within
if |
... |
Ignored. |
x |
The output from |
The default regular expression used by rm_citation
finds
in-text and parenthetical citations. This behavior can be altered by using a
secondary regular expression from the regex_usa
data (or other dictionary) via (pattern = "@rm_citation2"
or
pattern = "@rm_citation3"
). See Examples for example usage.
Returns a character string with citations removed.
Returns a data.frame
of Authors, Years, and n (counts).
This function is experimental.
gsub
,
stri_extract_all_regex
Other rm_ functions:
rm_abbreviation()
,
rm_between()
,
rm_bracket()
,
rm_caps_phrase()
,
rm_caps()
,
rm_citation_tex()
,
rm_city_state_zip()
,
rm_city_state()
,
rm_date()
,
rm_default()
,
rm_dollar()
,
rm_email()
,
rm_emoticon()
,
rm_endmark()
,
rm_hash()
,
rm_nchar_words()
,
rm_non_ascii()
,
rm_non_words()
,
rm_number()
,
rm_percent()
,
rm_phone()
,
rm_postal_code()
,
rm_repeated_characters()
,
rm_repeated_phrases()
,
rm_repeated_words()
,
rm_tag()
,
rm_time()
,
rm_title_name()
,
rm_url()
,
rm_white()
,
rm_zip()
## All Citations
x <- c("Hello World (V. Raptor, 1986) bye",
"Narcissism is not dead (Rinker, 2014)",
"The R Core Team (2014) has many members.",
paste("Bunn (2005) said, \"As for elegance, R is refined, tasteful, and",
"beautiful. When I grow up, I want to marry R.\""),
"It is wrong to blame ANY tool for our own shortcomings (Baer, 2005).",
"Wickham's (in press) Tidy Data should be out soon.",
"Rinker's (n.d.) dissertation not so much.",
"I always consult xkcd comics for guidance (Foo, 2012; Bar, 2014).",
"Uwe Ligges (2007) says, \"RAM is cheap and thinking hurts\""
)
rm_citation(x)
ex_citation(x)
as_count(ex_citation(x))
rm_citation(x, replacement="[CITATION HERE]")
## Not run:
qdapTools::vect2df(sort(table(unlist(rm_citation(x, extract=TRUE)))),
"citation", "count")
## End(Not run)
## In-Text
ex_citation(x, pattern="@rm_citation2")
## Parenthetical
ex_citation(x, pattern="@rm_citation3")
## Not run:
## Mining Citation
if (!require("pacman")) install.packages("pacman")
pacman::p_load(qdap, qdapTools, dplyr, ggplot2)
url_dl("http://umlreading.weebly.com/uploads/2/5/2/5/25253346/whole_language_timeline-updated.docx")
parts <- read_docx("whole_language_timeline-updated.docx") %>%
rm_non_ascii() %>%
split_vector(split = "References", include = TRUE, regex=TRUE)
parts[[1]]
parts[[1]] %>%
unbag() %>%
ex_citation() %>%
c()
## Counts
parts[[1]] %>%
unbag() %>%
ex_citation() %>%
as_count()
## By line
ex_citation(parts[[1]])
## Frequency
cites <- parts[[1]] %>%
unbag() %>%
ex_citation() %>%
c() %>%
data_frame(citation=.) %>%
count(citation) %>%
arrange(n) %>%
mutate(citation=factor(citation, levels=citation))
## Distribution of citations (find locations and then plot)
cite_locs <- do.call(rbind, lapply(cites[[1]], function(x){
m <- gregexpr(x, unbag(parts[[1]]), fixed=TRUE)
data.frame(
citation=x,
start = m[[1]] -5,
end = m[[1]] + 5 + attributes(m[[1]])[["match.length"]]
)
}))
ggplot(cite_locs) +
geom_segment(aes(x=start, xend=end, y=citation, yend=citation), size=3,
color="yellow") +
xlab("Duration") +
scale_x_continuous(expand = c(0,0),
limits = c(0, nchar(unbag(parts[[1]])) + 25)) +
theme_grey() +
theme(
panel.grid.major=element_line(color="grey20"),
panel.grid.minor=element_line(color="grey20"),
plot.background = element_rect(fill="black"),
panel.background = element_rect(fill="black"),
panel.border = element_rect(colour = "grey50", fill=NA, size=1),
axis.text=element_text(color="grey50"),
axis.title=element_text(color="grey50")
)
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.