#' Convert post data frame to corpus objects
#'
#' These functions convert data frame to corpus objects
#' same as those constructed with
#' \code{\link[quanteda]{corpus}}, \code{\link[tm]{VCorpus}},
#' and \code{\link[tm]{PCorpus}}. The list-column
#' \code{comment} of the original post data frame is dropped
#' for not complicating the data structure of the corpus.
#'
#' @param df Data frame. A data frame generated by
#' \code{\link{post2df}}.
#' @param PCorpus Logical. Whether to contruct corpus with
#' \code{\link[tm]{PCorpus}}. If \code{FALSE}, uses
#' \code{\link[tm]{VCorpus}}. Defaults to \code{FALSE}.
#' @param ... Additional arguments passed on to
#' \code{\link[quanteda]{corpus}}, \code{\link[tm]{VCorpus}},
#' or \code{\link[tm]{PCorpus}}.
#'
#' @seealso \code{\link{comment2qcorp}}, \code{\link{comment2tmcorp}}
#' @name post2corpus
NULL
#' Convert post data frame to quanteda corpus
#' @rdname post2corpus
#'
#' @examples
#' library(quanteda)
#' post_df <- example_posts()
#' post_corp <- post2qcorp(post_df)
#'
#' summary(post_corp)
#'
#' @importFrom dplyr rename
#' @importFrom magrittr %>%
#' @importFrom quanteda corpus
#' @export
post2qcorp <- function(df, ...) {
df <- df[, -which(colnames(df) == "comment")]
post_corp <- corpus(df,
docid_field = "title",
text_field = "content", ...)
}
#' Convert post data frame to tm corpus
#' @rdname post2corpus
#'
#' @examples
#' library(tm)
#' post_df <- example_posts()
#' post_corp <- post2tmcorp(post_df)
#'
#' inspect(post_corp)
#' meta(post_corp, "author")
#'
#' @importFrom tm PCorpus VCorpus DataframeSource
#' @importFrom dplyr rename select everything
#' @importFrom magrittr %>%
#' @export
post2tmcorp <- function(df, PCorpus = FALSE, ...) {
df <- df[, -which(colnames(df) == "comment")]
df <- rename(df, "doc_id" = "link") %>%
rename("text" = "content") %>%
select("doc_id", "text", everything())
ifelse(PCorpus == T,
docs <- PCorpus(DataframeSource(df), ...),
docs <- VCorpus(DataframeSource(df), ...))
return(docs)
}
#' Convert 'comment' list-column to 'corpus' list-column
#'
#' These functions convert data frames stored in the list-column
#' 'comment' of the data frame returned by \code{\link{post2df}}
#' to corpus objects in \code{\link[quanteda]{corpus}},
#' \code{\link[tm]{VCorpus}}, and \code{\link[tm]{PCorpus}}.
#'
#' @param df Data frame. A data frame generated by
#' \code{\link{post2df}}.
#' @param PCorpus Logical. Whether to contruct corpus with
#' \code{\link[tm]{PCorpus}}. If \code{FALSE}, uses
#' \code{\link[tm]{VCorpus}}. Defaults to \code{FALSE}.
#' @param ... Additional arguments passed on to
#' \code{\link[quanteda]{corpus}}, \code{\link[tm]{VCorpus}},
#' or \code{\link[tm]{PCorpus}}.
#'
#' @seealso \code{\link{post2qcorp}}, \code{\link{post2tmcorp}}
#' @name comment2corpus
NULL
#' Convert comment list-column to quanteda corpus list-column
#'
#' @rdname comment2corpus
#'
#' @importFrom quanteda corpus
#' @importFrom tibble data_frame
#' @export
comment2qcorp <- function(df) {
comment_corp <- vector("list", length = nrow(df))
for (i in seq_along(df$comment)) {
if (is.null(df$comment[[i]])) {
comment_corp[[i]] <- df$comment[[i]]
} else {
comment_corp[[i]] <- corpus(df$comment[[i]],
text_field = "comment",
docid_field = "time")
}
}
df <- data_frame(post_id = df$link,
comment = comment_corp)
}
# toks <- tokens(corp)
# key_in_context <- kwic(toks, "魯")
#' Convert comment list-column to tm corpus list-column
#'
#' @rdname comment2corpus
#'
#' @importFrom tm PCorpus VCorpus DataframeSource
#' @importFrom dplyr mutate rename select everything
#' @importFrom magrittr %>%
#' @export
comment2tmcorp <- function(df, PCorpus = FALSE, ...) {
if (PCorpus == T) {
corp <- function(df, ...) PCorpus(DataframeSource(df), ...)
} else {
corp <- function(df, ...) VCorpus(DataframeSource(df), ...)
}
as_source <- function(df) {
df <- df %>%
mutate("doc_id" = 1:nrow(df)) %>%
rename("text" = "comment") %>%
select("doc_id", "text", everything())
}
comment_corp <- vector("list", length = nrow(df))
for (i in seq_along(df$comment)) {
if (is.null(df$comment[[i]])) {
comment_corp[[i]] <- df$comment[[i]]
} else {
comment_corp[[i]] <- corp(as_source(df$comment[[i]]), ...)
}
}
df <- data_frame(post_id = df$link,
comment = comment_corp)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.