R/automated_readability_index.R
In qdap: Bridging the Gap Between Qualitative Data and Quantitative Analysis

Documented in automated_readability_index coleman_liau counts.automated_readability_index counts.coleman_liau counts.flesch_kincaid counts.fry counts.linsear_write counts.SMOG flesch_kincaid fry linsear_write plot.automated_readability_index plot.coleman_liau plot.flesch_kincaid plot.linsear_write plot.linsear_write_count plot.linsear_write_scores plot.readability_count plot.readability_score plot.SMOG print.automated_readability_index print.coleman_liau print.flesch_kincaid print.fry print.linsear_write print.linsear_write_count print.linsear_write_scores print.readability_count print.readability_score print.SMOG scores.automated_readability_index scores.coleman_liau scores.flesch_kincaid scores.fry scores.linsear_write scores.SMOG SMOG

#' Readability Measures
#' 
#' \code{automated_readability_index} - Apply Automated Readability Index to 
#' transcript(s) by zero or more grouping variable(s).
#' 
#' @param text.var The text variable.
#' @param grouping.var The grouping variables.  Default \code{NULL} generates 
#' one output for all text.  Also takes a single grouping variable or a list of 1 
#' or more grouping variables.
#' @param rm.incomplete logical.  If \code{TRUE} removes incomplete sentences 
#' from the analysis.
#' @param \ldots Other arguments passed to \code{\link[qdap]{end_inc}}.
#' @return Returns a list of 2 dataframes: (1) Counts and (2) Readability.  
#' Counts are the raw scores used to calculate readability score and can be
#' accessed via \code{\link[qdap]{counts}}.  Readability is the dataframe
#' with the selected readability statistic by grouping variable(s) and can be 
#' access via \code{\link[qdap]{scores}}.  The \code{\link[qdap]{fry}} function 
#' returns a graphic representation of the readability as the 
#' \code{\link[qdap]{scores}} returns the information for graphing but not a
#' readability score.
#' @rdname Readability
#' @section Warning: Many of the indices (e.g., Automated Readability Index) 
#' are derived from word difficulty (letters per word) and sentence difficulty 
#' (words per sentence).  If you have not run the sentSplit function on your 
#' data the results may not be accurate.
#' @section Fry: The \code{fry} function is based on Fry's formula that randomly 
#' samples 3 100 word length passages.  If a group(s) in does not contain 300+ 
#' words they will not be included in the output.
#' @references Coleman, M., & Liau, T. L. (1975). A computer readability formula 
#' designed for machine scoring. Journal of Applied Psychology, Vol. 60, 
#' pp. 283-284.
#' 
#' Fry, E. B. (1968). A readability formula that saves time. Journal of Reading,
#' 11(7), 513-516, 575-578.
#' 
#' Fry, E. B. (1969). The readability graph validated at primary levels. The Reading
#' Teacher, 22(6), 534-538.
#' 
#' Flesch R. (1948). A new readability yardstick. Journal of Applied Psychology. 
#' Vol. 32(3), pp. 221-233. doi: 10.1037/h0057532.
#' 
#' Gunning, T. G. (2003). Building Literacy in the Content Areas. Boston: Allyn 
#' & Bacon.
#' 
#' McLaughlin, G. H. (1969). SMOG Grading: A New Readability Formula. 
#' Journal of Reading, Vol. 12(8), pp. 639-646. 
#' 
#' Smith, E. A. & Senter, R. J. (1967) Automated readability index. 
#' Technical Report AMRLTR-66-220, University of Cincinnati, Cincinnati, Ohio.
#' @export
#' @importFrom qdapTools matrix2df 
#' @examples
#' \dontrun{
#' AR1 <- with(rajSPLIT, automated_readability_index(dialogue, list(person, act)))
#' ltruncdf(AR1,, 15)
#' scores(AR1)
#' counts(AR1)
#' plot(AR1)
#' plot(counts(AR1))
#' 
#' AR2 <- with(rajSPLIT, automated_readability_index(dialogue, list(sex, fam.aff)))
#' ltruncdf(AR2,, 15)
#' scores(AR2)
#' counts(AR2)
#' plot(AR2)
#' plot(counts(AR2))
#' 
#' AR3 <- with(rajSPLIT, automated_readability_index(dialogue, person))
#' ltruncdf(AR3,, 15)
#' scores(AR3)
#' head(counts(AR3))
#' plot(AR3)
#' plot(counts(AR3))
#' 
#' CL1 <- with(rajSPLIT, coleman_liau(dialogue, list(person, act)))
#' ltruncdf(CL1, 20)
#' head(counts(CL1))
#' plot(CL1)
#' 
#' CL2 <- with(rajSPLIT, coleman_liau(dialogue, list(sex, fam.aff)))
#' ltruncdf(CL2)
#' plot(counts(CL2))
#' 
#' (SM1 <- with(rajSPLIT, SMOG(dialogue, list(person, act))))
#' plot(counts(SM1))
#' plot(SM1)
#' 
#' (SM2 <- with(rajSPLIT, SMOG(dialogue, list(sex, fam.aff))))
#' 
#' (FL1 <- with(rajSPLIT, flesch_kincaid(dialogue, list(person, act))))
#' plot(scores(FL1))
#' plot(counts(FL1))
#' 
#' (FL2 <-  with(rajSPLIT, flesch_kincaid(dialogue, list(sex, fam.aff))))
#' plot(scores(FL2))
#' plot(counts(FL2))
#' 
#' FR1 <- with(rajSPLIT, fry(dialogue, list(sex, fam.aff)))
#' scores(FR1)
#' plot(scores(FR1))
#' counts(FR1)
#' plot(counts(FR1))
#' 
#' FR2 <- with(rajSPLIT, fry(dialogue, person))
#' scores(FR2)
#' plot(scores(FR2))
#' counts(FR2)
#' plot(counts(FR2))
#' 
#' FR3 <- with(pres_debates2012, fry(dialogue, list(time, person)))
#' colsplit2df(scores(FR3))
#' plot(scores(FR3), auto.label = FALSE)
#' counts(FR3)
#' plot(counts(FR3))
#' 
#' library(ggplot2)
#' ggplot(colsplit2df(counts(FR3)), aes(sent.per.100.wrds, 
#'     syllables.per.100.wrds)) +
#'     geom_point(aes(fill=person), shape=21, size=3) +
#'     facet_grid(person~time)
#'     
#' LW1 <- with(rajSPLIT, linsear_write(dialogue, list(person, act)))
#' plot(scores(LW1))
#' plot(counts(LW1))
#' 
#' LW2 <- with(rajSPLIT, linsear_write(dialogue, list(sex, fam.aff)))
#' plot(scores(LW2), method="lm")
#' plot(counts(LW2))
#' }
automated_readability_index <-
function(text.var, grouping.var = NULL, rm.incomplete = FALSE, ...) {
    if(is.null(grouping.var)) {
        G <- "all"
    } else {
        if (is.list(grouping.var)) {
            m <- unlist(as.character(substitute(grouping.var))[-1])
            m <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
                    x[length(x)]
                }
            )
            G <- paste(m, collapse="&")
        } else {
            G <- as.character(substitute(grouping.var))
            G <- G[length(G)]
        }
    }
    if(is.null(grouping.var)){
        grouping <- rep("all", length(text.var))
    } else {
        if (is.list(grouping.var) & length(grouping.var)>1) {
            grouping <- paste2(grouping.var)
        } else {
            grouping <- unlist(grouping.var)
        } 
    } 
    text <- as.character(text.var)
    DF <- stats::na.omit(data.frame(group = grouping, text.var = text, 
        stringsAsFactors = FALSE))
    if (rm.incomplete) {
        DF <- end_inc(dataframe = DF, text.var = text.var, ...)
    }   
    if (is.dp(text.var = DF[, "text.var"])) {
        warning(paste0("\n  Some rows contain double punctuation.", 
            "  Suggested use of sentSplit function."))
    }  
    if (is.empty(DF[["text.var"]])) {   
        badrows <- which.empty(DF[["text.var"]])
        DF <- DF[-c(badrows), ]
        warning("The following rows contained empty sentences and were removed:\n",
             paste(badrows, collapse=", "))                  
    }        
    DF$word.count <- word_count(DF$text.var, missing = 0)
    i <- as.data.frame(table(DF$group))
    DF$group <- DF$group[ , drop=TRUE]
    DF$tot.n.sent <- 1:nrow(DF)
    DF <- DF[with(DF, order(group, DF$tot.n.sent)), ]
    DF$character.count <- character_count(DF$text.var)
    DF2 <- stats::aggregate(word.count ~ group, DF, sum)
    DF2$sentence.count <- as.data.frame(table(DF$group))$Freq
    DF2$character.count <- stats::aggregate(character.count ~ 
        group, DF, sum)$character.count 
    ari <- function(tse, tw, tc) 4.71*(tc/tw) + .5*(tw/tse) - 21.43
    DF2$Automated_Readability_Index <- with(DF2, 
        ari(tse = sentence.count, tc = character.count, tw = word.count))
    rownames(DF) <- NULL
    names(DF2)[1] <- G
    o <- list(Counts = DF, Readability = DF2)
    class(o) <- c("automated_readability_index", class(DF2))
    o
}


#' Prints an automated_readability_index Object
#' 
#' Prints an automated_readability_index object.
#' 
#' @param x The automated_readability_index object.
#' @param digits The number of digits displayed if \code{values} is \code{TRUE}.
#' @param \ldots ignored
#' @method print automated_readability_index 
#' @export
print.automated_readability_index <- function(x, digits = 3, ...) {
    print(scores(x), digits = digits, ...)
}



#' Coleman Liau Readability
#' 
#' \code{coleman_liau} - Apply Coleman Liau Index to transcript(s) by zero or 
#' more grouping variable(s).
#' 
#' @rdname Readability
#' @export
coleman_liau <-
function(text.var, grouping.var = NULL, rm.incomplete = FALSE, ...) {
    if(is.null(grouping.var)) {
        G <- "all"
    } else {
        if (is.list(grouping.var)) {
            m <- unlist(as.character(substitute(grouping.var))[-1])
            m <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
                    x[length(x)]
                }
            )
            G <- paste(m, collapse="&")
        } else {
            G <- as.character(substitute(grouping.var))
            G <- G[length(G)]
        }
    }
    if(is.null(grouping.var)){
        grouping <- rep("all", length(text.var))
    } else {
        if (is.list(grouping.var) & length(grouping.var)>1) {
            grouping <- paste2(grouping.var)
        } else {
            grouping <- unlist(grouping.var)
        } 
    } 
    text <- as.character(text.var)
    DF <- stats::na.omit(data.frame(group = grouping, text.var = text, 
        stringsAsFactors = FALSE))
    if (rm.incomplete) {
        DF <- end_inc(dataframe = DF, text.var = text.var, ...)
    }
    if (is.dp(text.var = DF[, "text.var"])) {
        warning(paste0("\n  Some rows contain double punctuation.", 
            "  Suggested use of sentSplit function."))
    }    
    if (is.empty(DF[["text.var"]])) {   
        badrows <- which.empty(DF[["text.var"]])
        DF <- DF[-c(badrows), ]
        warning("The following rows contained empty sentences and were removed:\n",
             paste(badrows, collapse=", "))                  
    }          
    DF$word.count <- word_count(DF$text.var, missing = 0, digit.remove = FALSE)
    i <- as.data.frame(table(DF$group))
    DF$group <- DF$group[ , drop=TRUE]
    DF$tot.n.sent <- 1:nrow(DF)
    DF <- DF[with(DF, order(group, DF$tot.n.sent)), ]
    DF$character.count <- character_count(DF$text.var, 
        apostrophe.remove = FALSE, digit.remove = FALSE)
    DF2 <- stats::aggregate(word.count ~ group, DF, sum)
    DF2$sentence.count <- as.data.frame(table(DF$group))$Freq
    DF2$character.count <- stats::aggregate(character.count ~ 
        group, DF, sum)$character.count 
    clf <- function(tse, tw, tc) (.0588*((100*tc)/tw)) - 
      (.296*((100*tse)/tw) ) - 15.8
    DF2$Coleman_Liau <- with(DF2, clf(tse = sentence.count, 
        tc = character.count, tw = word.count))
    names(DF2)[1] <- G
    rownames(DF) <- NULL    
    o <- list(Counts = DF, Readability = DF2)
    class(o) <- "coleman_liau"
    o
}


#' SMOG Readability
#' 
#' \code{SMOG} - Apply SMOG Readability to transcript(s) by zero or more grouping variable(s).
#' 
#' @rdname Readability
#' @param output A character vector character string indicating output type. 
#' One of "valid" (default and congruent with McLaughlin's intent) or "all". 
#' @export
SMOG <-
function(text.var, grouping.var = NULL, output = "valid", 
    rm.incomplete = FALSE, ...) {
    group <- NULL
    if(is.null(grouping.var)) {
        G <- "all"
    } else {
        if (is.list(grouping.var)) {
            m <- unlist(as.character(substitute(grouping.var))[-1])
            m <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
                    x[length(x)]
                }
            )
            G <- paste(m, collapse="&")
        } else {
            G <- as.character(substitute(grouping.var))
            G <- G[length(G)]
        }
    }
    if(is.null(grouping.var)){
        grouping <- rep("all", length(text.var))
    } else {
        if (is.list(grouping.var) & length(grouping.var)>1) {
            grouping <- paste2(grouping.var)
        } else {
            grouping <- unlist(grouping.var)
        } 
    } 
    text <- as.character(text.var)
    DF <- stats::na.omit(data.frame(group = grouping, text.var = text, 
        stringsAsFactors = FALSE))
    if (rm.incomplete) {
        DF <- end_inc(dataframe = DF, text.var = text.var, ...)
    }
    if (is.dp(text.var = DF[, "text.var"])) {
        warning(paste0("\n  Some rows contain double punctuation.", 
            "  Suggested use of sentSplit function."))
    }   
    if (is.empty(DF[["text.var"]])) {   
        badrows <- which.empty(DF[["text.var"]])
        DF <- DF[-c(badrows), ]
        warning("The following rows contained empty sentences and were removed:\n",
             paste(badrows, collapse=", "))                  
    }          
    DF$word.count <- word_count(DF$text.var, missing = 0)
    i <- as.data.frame(table(DF$group))
    if (output == "valid") {
        DF <- subset(DF, group%in%as.character(i[i$Freq > 29, ][,'Var1']))
        if (nrow(DF) == 0) {
            stop("Not enough sentences for valid output.")
        }
    }
    DF$group <- DF$group[ , drop=TRUE]
    DF$tot.n.sent <- 1:nrow(DF)
    DF <- DF[with(DF, order(group, DF$tot.n.sent)), ]
    DF$polysyllable.count <- polysyllable_sum(DF$text.var)
    DF2 <- stats::aggregate(word.count ~ group, DF, sum)
    DF2$sentence.count <- as.data.frame(table(DF$group))$Freq
    DF2$polysyllable.count <- stats::aggregate(polysyllable.count ~ 
        group, DF, sum)$polysyllable.count   
    smog <- function(tse, tpsy) 1.043 * sqrt(tpsy * (30/tse)) + 3.1291 
    DF2$SMOG <- with(DF2, smog(tse = sentence.count, 
        tpsy = polysyllable.count))
    DF2$validity <- ifelse(DF2$sentence.count < 30, "n < 30", "valid")
    if(output == "valid") DF2$validity <- NULL
    names(DF2)[1] <- G
    o <- list(Counts = DF, Readability = DF2)
    class(o) <- "SMOG"
    o
}


#' Flesch-Kincaid Readability
#' 
#' \code{flesch_kincaid} - Flesch-Kincaid Readability to transcript(s) by zero or more 
#' grouping variable(s).
#' 
#' @rdname Readability
#' @export
flesch_kincaid <-
function(text.var, grouping.var = NULL, rm.incomplete = FALSE, ...) {
      if(is.null(grouping.var)) {
        G <- "all"
    } else {
        if (is.list(grouping.var)) {
            m <- unlist(as.character(substitute(grouping.var))[-1])
            m <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
                    x[length(x)]
                }
            )
            G <- paste(m, collapse="&")
        } else {
            G <- as.character(substitute(grouping.var))
            G <- G[length(G)]
        }
    }
    if(is.null(grouping.var)){
        grouping <- rep("all", length(text.var))
    } else {
        if (is.list(grouping.var) & length(grouping.var)>1) {
            grouping <- paste2(grouping.var)
        } else {
            grouping <- unlist(grouping.var)
        } 
    } 
    text <- as.character(text.var)
    DF <- stats::na.omit(data.frame(group = grouping, text.var = text, 
        stringsAsFactors = FALSE))
    if (rm.incomplete) {
        DF <- end_inc(dataframe = DF, text.var = text.var, ...)
    }
    if (is.dp(text.var = DF[, "text.var"])) {
        warning(paste0("\n  Some rows contain double punctuation.", 
            "  Suggested use of sentSplit function."))
    }
    if (is.empty(DF[["text.var"]])) {   
        badrows <- which.empty(DF[["text.var"]])
        DF <- DF[-c(badrows), ]
        warning("The following rows contained empty sentences and were removed:\n",
             paste(badrows, collapse=", "))                  
    }          
    DF$word.count <- word_count(DF$text.var, missing = 0)
    DF$syllable.count <- syllable_sum(DF$text.var)
    DF$tot.n.sent <- 1:nrow(DF)
    DF <- DF[with(DF, order(group, DF$tot.n.sent)), ]
    DF2 <- stats::aggregate(word.count ~ group, DF, sum)
    DF2$sentence.count <- as.data.frame(table(DF$group))$Freq
    DF2$syllable.count <- stats::aggregate(syllable.count ~ group, DF, 
        sum)$syllable.count  
    fkgl <- function(tw, tse, tsy) 0.39 * (tw/tse) + 11.8 * (tsy/tw) - 15.59
    fre <- function(tw, tse, tsy) 206.835 - 1.015 * (tw/tse) - 84.6 * (tsy/tw)
    DF2$FK_grd.lvl <- with(DF2, fkgl(tw = word.count, 
        tse = sentence.count, tsy = syllable.count))
    DF2$FK_read.ease <- with(DF2, fre(tw = word.count, 
        tse = sentence.count, tsy = syllable.count))
    names(DF2)[1] <- G
    o <- list(Counts = DF, Readability = DF2)
    class(o) <- "flesch_kincaid"
    o
}


#' Fry Readability
#' 
#' \code{fry} - Apply Fry Readability to transcript(s) by zero or more 
#' grouping variable(s).
#' 
#' @rdname Readability
#' @param auto.label  logical.  If \code{TRUE} labels automatically added.  If 
#' \code{FALSE} the user clicks interactively.
#' @param grid logical.  If \code{TRUE} a micro grid is displayed, similar to 
#' Fry's original depiction, though this may make visualizing more difficult.
#' @param div.col The color of the grade level division lines.
#' @param plot logical.  If \code{TRUE} a graph is plotted corresponding to Fry's 
#' graphic representation.
#' @export
fry <-
function(text.var, grouping.var = NULL, rm.incomplete = FALSE, 
    auto.label = TRUE, grid = FALSE, div.col = "grey85", plot = TRUE, ...) {
    read.gr <- group <- NULL  
    if(is.null(grouping.var)) {
        G <- "all"
    } else {
        if (is.list(grouping.var)) {
            m <- unlist(as.character(substitute(grouping.var))[-1])
            m <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
                    x[length(x)]
                }
            )
            G <- paste(m, collapse="&")
        } else {
            G <- as.character(substitute(grouping.var))
            G <- G[length(G)]
        }
    }
    if(is.null(grouping.var)){
        grouping <- rep("all", length(text.var))
    } else {
        if (is.list(grouping.var) & length(grouping.var)>1) {
            grouping <- paste2(grouping.var)
        } else {
            grouping <- unlist(grouping.var)
        } 
    } 
    text <- as.character(text.var)
    DF <- stats::na.omit(data.frame(group = grouping, text.var = text, 
        stringsAsFactors = FALSE))
    if (rm.incomplete) {
        DF <- end_inc(dataframe = DF, text.var = text.var, ...)
    }
    if (is.dp(text.var = DF[, "text.var"])) {
        warning(paste0("\n  Some rows contain double punctuation.", 
            "  Suggested use of sentSplit function."))
    }   
    if (is.empty(DF[["text.var"]])) {   
        badrows <- which.empty(DF[["text.var"]])
        DF <- DF[-c(badrows), ]
        warning("The following rows contained empty sentences and were removed:\n",
             paste(badrows, collapse=", "))                  
    }          
    DF$word.count <- word_count(DF$text.var, missing = 0)
    DF$tot.n.sent <- 1:nrow(DF)
    DF <- DF[with(DF, order(group, DF$tot.n.sent)), ]
    DF$read.gr <- unlist(by(DF$word.count, DF$group, partition))
    LIST <- names(which(tapply(DF$word.count, DF$group, function(x) {
        max(cumsum(x))
    }) >= 300))
    DF2 <- subset(DF, group %in% LIST & read.gr != "NA")
    DF2$group <- DF2$group[, drop = TRUE]
    DF2$sub <- with(DF2, paste(group, read.gr, sep = "_"))
    DF2b <- DF2[order(DF2$sub), ]
    DF2b$cumsum.gr <- unlist(tapply(DF2$word.count, DF2$sub, cumsum))
    DF2 <- DF2b[order(DF2b$group, DF2b$tot.n.sent), ]
    DF2$wo.last <- DF2$cumsum.gr - DF2$word.count
    DF2$hun.word <- 100 - DF2$wo.last
    DF2$frac.sent <- ifelse(DF2$hun.word/DF2$word.count > 1, 
        1, round(DF2$hun.word/DF2$word.count, digits = 3))   
    DF3b <- unique(data.frame(sub = DF2$sub, group = DF2$group))
    DF3 <- data.frame(sub = DF2$sub, group = DF2$group, 
        text.var = DF2$text.var, frac.sent = DF2$frac.sent)
    CHOICES <- tapply(as.character(DF3b$sub), DF3b$group, function(x) {
            if (length(x) < 3) return(NULL)
            sample(x, 3, replace = FALSE)
        }
    )
    ## drop any less than 300 words
    less_than_300 <- sapply(CHOICES, is.null)
    if (any(less_than_300)) {
        warning("The following groups contain < 300 words and were dropped:\n  ",
            paste(names(CHOICES[less_than_300]), collapse="', '")
        )
    }
    CHOICES <- CHOICES[!less_than_300]

    CHOICES <- as.character(unlist(CHOICES))
    DF4 <- DF3[which(DF3$sub %in% CHOICES), ]
    DF4$sub <- DF4$sub[, drop = TRUE]
    FUN <- function(x) paste(as.character(unlist(x)), collapse = " ")
    DF5 <- stats::aggregate(text.var ~ sub + group, DF4, FUN)
    sent.per.100 <- as.data.frame(tapply(DF2$frac.sent, DF2$sub, sum))
    names(sent.per.100) <- "x"
    DF5$sent.per.100 <- sent.per.100[as.character(DF5$sub), "x"] 
    hun.grab <- function(x) paste(unblanker(unlist(word_split(
        reducer(unlist(strip(x))))))[1:100], collapse = " ")
    DF5$syll.count <- syllable_sum(lapply(DF5$text.var, hun.grab))
    DF6 <- stats::aggregate(syll.count ~ group, DF5, mean)
    DF6$ave.sent.per.100 <- stats::aggregate(sent.per.100 ~ group, DF5, mean)[, 2]
    names(DF6) <- c(G, "ave.syll.per.100", "ave.sent.per.100")
    colnames(DF5)[c(2, 4, 5)] <- c(colnames(DF6)[1], "sent.per.100.wrds",
        "syllables.per.100.wrds")
    DF5 <- DF5[, -1]
    
    out <- list(Counts = DF5[, c(1, 3:4, 2)], Readability = DF6)
    attributes(out) <- list(
            class = "fry",
            names = names(out),
            plot_instructions = list(auto.label = auto.label, 
                grid = grid, div.col = div.col, plot = plot)
    )
    out
}


#' Linsear Write Readability
#' 
#' \code{linsear_write} - Apply Linsear Write Readability to transcript(s) by 
#' zero or more grouping variable(s).
#' @rdname Readability
#' @export 
linsear_write <-
function(text.var, grouping.var = NULL, rm.incomplete = FALSE, ...) {
    read.gr <- group <- NULL
    if(is.null(grouping.var)) {
        G <- "all"
    } else {
        if (is.list(grouping.var)) {
            m <- unlist(as.character(substitute(grouping.var))[-1])
            m <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
                    x[length(x)]
                }
            )
            G <- paste(m, collapse="&")
        } else {
            G <- as.character(substitute(grouping.var))
            G <- G[length(G)]
        }
    }
    if(is.null(grouping.var)){
        grouping <- rep("all", length(text.var))
    } else {
        if (is.list(grouping.var) & length(grouping.var)>1) {
            grouping <- paste2(grouping.var)
        } else {
            grouping <- unlist(grouping.var)
        } 
    } 
    text <- as.character(text.var)
    DF <- stats::na.omit(data.frame(group = grouping, text.var = text, 
        stringsAsFactors = FALSE))
    if (rm.incomplete) {
        DF <- end_inc(dataframe = DF, text.var = text.var, ...)
    }
    if (is.dp(text.var = DF[, "text.var"])) {
        warning(paste0("\n  Some rows contain double punctuation.", 
            "  Suggested use of sentSplit function."))
    }   
    if (is.empty(DF[["text.var"]])) {   
        badrows <- which.empty(DF[["text.var"]])
        DF <- DF[-c(badrows), ]
        warning("The following rows contained empty sentences and were removed:\n",
             paste(badrows, collapse=", "))                  
    }          
    DF$word.count <- word_count(DF$text.var)
    DF$tot.n.sent <- 1:nrow(DF)
    DF <- DF[with(DF, order(group, DF$tot.n.sent)), ]
    DF$read.gr <- unlist(by(DF$word.count, DF$group, partition))
    LIST <- names(which(tapply(DF$word.count, DF$group, function(x) {
            max(cumsum(x))
        }
    ) >= 100))
    DF2 <- subset(DF, group %in% LIST & read.gr != "NA")
    DF2$group <- DF2$group[, drop = TRUE]
    DF2$sub <- with(DF2, paste(group, read.gr, sep = "_"))
    DF2b <- DF2[order(DF2$sub), ]
    DF2b$cumsum.gr <- unlist(tapply(DF2$word.count, DF2$sub, cumsum))
    DF2 <- DF2b[order(DF2b$group, DF2b$tot.n.sent), ]
    DF2$wo.last <- DF2$cumsum.gr - DF2$word.count
    DF2$hun.word <- 100 - DF2$wo.last
    DF2$frac.sent <- ifelse(DF2$hun.word/DF2$word.count > 1, 1, 
        DF2$hun.word/DF2$word.count)  
    DF3b <- unique(data.frame(sub = DF2$sub, group = DF2$group))
    DF3 <- data.frame(sub = DF2$sub, group = DF2$group, text.var = 
        DF2$text.var, frac.sent = DF2$frac.sent)
    CHOICES <- tapply(as.character(DF3b$sub), DF3b$group, function(x) {
            sample(x, 1, replace = FALSE)
        }
    )
    CHOICES <- as.character(unlist(CHOICES))
    DF4 <- DF3[which(DF3$sub %in% CHOICES), ]
    DF4$sub <- DF4$sub[, drop = TRUE]
    FUN <- function(x) paste(as.character(unlist(x)), collapse = " ")
    DF5 <- stats::aggregate(text.var ~ sub + group, DF4, FUN)
    sent.per.100 <- as.data.frame(tapply(DF2$frac.sent, DF2$sub, sum))
    names(sent.per.100) <- "x"
    DF5$sent.per.100 <- sent.per.100[as.character(DF5$sub), "x"]
    hun.grab <- function(x) paste(unblanker(unlist(word_split(reducer(
        unlist(strip(x))))))[1:100], collapse = " ")
    DF5$SYL.LIST <- lapply(DF5$text.var, function(x) unlist(syllable_count(
        hun.grab(x))$syllables))
    DF5$hard_easy_sum <- unlist(lapply(DF5$SYL.LIST, function(x) {
          sum(ifelse(x >= 3, 3, 1))
    }))
    DF5$HE_tsent_ratio <- unlist(DF5$hard_easy_sum)/DF5$sent.per.100
    DF5$Linsear_Write <- ifelse(DF5$sent.per.100 > 20, 
        DF5$HE_tsent_ratio/2, 
        (DF5$HE_tsent_ratio - 2)/2)
    DF5 <- DF5[, c(2, 4, 6, 8)]
    names(DF5) <- c(G, "sent.per.100", "hard_easy_sum", "Linsear_Write")
    names(DF)[1] <- names(DF5)[1]
    o <- list(Counts = DF, Readability = DF5)
    class(o) <- "linsear_write"
    o
}


#' Readability Measures
#' 
#' \code{scores.automated_readability_index} - View scores from \code{\link[qdap]{automated_readability_index}}.
#' 
#' automated_readability_index Method for scores
#' @param x The automated_readability_index object.
#' @param \ldots ignored
#' @export
#' @method scores automated_readability_index
scores.automated_readability_index <- function(x, ...) {

    out <- x[["Readability"]]
    attributes(out) <- list(
            class = c("readability_score", class(out)),
            type = "automated_readability_index_scores",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}

#' Prints a readability_score Object
#' 
#' Prints a readability_score object.
#' 
#' @param x The readability_score object.
#' @param digits The number of digits displayed if \code{values} is \code{TRUE}.
#' @param \ldots ignored
#' @method print readability_score
#' @export
print.readability_score <-
    function(x, digits = 3, ...) {
    class(x) <- "data.frame"
    WD <- options()[["width"]]
    options(width=3000)
    x[, -1] <- round(x[, -1], digits = digits)
    print(x)
    options(width=WD)
}


#' Readability Measures
#' 
#' \code{counts.automated_readability_index} - View counts from \code{\link[qdap]{automated_readability_index}}.
#' 
#' @param x The automated_readability_index object.
#' @param \ldots ignored
#' automated_readability_index Method for counts.
#' @export
#' @method counts automated_readability_index
counts.automated_readability_index <- function(x, ...) {
    out <- x[["Counts"]]
    attributes(out) <- list(
            class = c("readability_count", class(out)),
            type = "automated_readability_index_count",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}



#' Prints a readability_count Object
#' 
#' Prints a readability_count object.
#' 
#' @param x The readability_count object.
#' @param digits The number of digits displayed.
#' @param \ldots ignored
#' @method print readability_count
#' @export
print.readability_count <-
    function(x, digits = 3, ...) {
    class(x) <- "data.frame"
    WD <- options()[["width"]]
    options(width=3000)
    print(x)
    options(width=WD)
}




#' Plots a readability_count Object
#' 
#' Plots a readability_count object.
#' 
#' @param x The readability_count object.
#' @param alpha The alpha level to use for points.
#' @param \ldots ignored
#' @importFrom ggplot2 ggplot aes geom_point theme theme_minimal ylab xlab scale_size_continuous element_blank guides 
#' @importFrom scales alpha
#' @method plot readability_count
#' @export
plot.readability_count <- function(x, alpha = .3, ...){ 

    type <- attributes(x)[["type"]]

    switch(type,
        automated_readability_index_count = {
            word_counts(DF = x, x = "word.count", y = "character.count", 
                z = NULL, g = "group", alpha = alpha)
        },
        coleman_liau_count = {
            word_counts(DF = x, x = "word.count", y = "character.count", 
                z = NULL, g = "group", alpha = alpha)
        },
        SMOG_count = {
            word_counts(DF = x, x = "word.count", y = "polysyllable.count", 
                z = NULL, g = "group", alpha = alpha)
        },
        flesch_kincaid_count = {
            word_counts(DF = x, x = "word.count", y = "syllable.count", 
                z = NULL, g = "group", alpha = alpha)
        },
        fry_count = {
            
            word_counts3(DF = x, x = "sent.per.100.wrds", 
                y = "syllables.per.100.wrds", 
                z = NULL, g = colnames(x)[1])
        },        
        stop("Not a plotable readability count")
    )

}

#' Plots a readability_score Object
#' 
#' Plots a readability_score object.
#' 
#' @param x The readability_score object.
#' @param alpha The alpha level to be used for the points.
#' @param auto.label  logical.  For plotting \code{\link[qdap]{fry}} only, if 
#' \code{TRUE} labels automatically added.  If \code{FALSE} the user clicks 
#' interactively.
#' @param grid logical.  For plotting \code{\link[qdap]{fry}} only, if 
#' \code{TRUE} a micro grid is displayed similar to Fry's original depiction, 
#' though this makes visualizing more difficult.
#' @param div.col For plotting \code{\link[qdap]{fry}} only, the color of the 
#' grade level division lines.
#' @param \ldots ignored
#' @importFrom ggplot2 ggplot aes geom_smooth facet_wrap guide_colorbar geom_point theme ggplotGrob theme_bw ylab xlab scale_fill_gradient element_blank guides 
#' @importFrom gridExtra grid.arrange
#' @importFrom scales alpha
#' @method plot readability_score
#' @export
plot.readability_score <- function(x, alpha = .3, auto.label, 
    grid, div.col, ...){ 

    type <- attributes(x)[["type"]]

    switch(type,
        automated_readability_index_scores = {
            plot_automated_readability_index(x)
        },
        coleman_liau_scores = {
            plot_coleman_liau(x)
        },
        SMOG_scores = {
            plot_SMOG(x)
        },
        flesch_kincaid_scores = {
            plot_flesch_kincaid(x)
        },
        fry_scores = {
            ## Plotting attributes
            PA <- attributes(x)[["plot_instructions"]]
        
            if (missing(auto.label)) {
                auto.label <- PA[["auto.label"]]
            }
            if (missing(grid)) {
                grid <- PA[["grid"]]
            }
            if (missing(div.col)) {
                div.col <- PA[["div.col"]]
            }    
            plot_fry(x, auto.label = auto.label, grid = grid, 
                    div.col = div.col)
        },
        stop("Not a plotable readability score")
    )

}


plot_automated_readability_index <- function(x) {

    character.count <- sentence.count <- Coleman_Liau <- word.count <- grvar <- 
        Readability_count <-NULL

    names(x)[ncol(x)] <- "Readability_count"
    x  <- x[order(x[, "Readability_count"]), ]
    x[, 1] <- factor(x[, 1], levels = x[, 1])
    forlater <-  names(x)[1]
    names(x)[1] <- "grvar"
    plot1 <- ggplot2::ggplot(x, ggplot2::aes(fill = word.count, x = sentence.count, 
            y = character.count)) + 
        ggplot2::geom_point(size=2.75, shape=21, colour="grey65") +
        ggplot2::theme_bw() + 
        ggplot2::scale_fill_gradient(high="red", low="pink", name="Word\nCount") +
        ggplot2::ylab("Character Count") + 
        ggplot2::xlab("Sentence Count") + 
        ggplot2::theme(panel.grid = ggplot2::element_blank(),
            legend.position = "bottom") +
        ggplot2::guides(fill = ggplot2::guide_colorbar(barwidth = 10, barheight = .5)) 
    
    plot2 <- ggplot2::ggplot(x, ggplot2::aes(y = grvar, x = Readability_count)) +
        ggplot2::geom_point(size=2) + 
        ggplot2::ylab(gsub("&", " & ", forlater)) + 
        ggplot2::xlab("Automated Readability Index")

    gridExtra::grid.arrange(plot2, plot1, ncol=2)
}

#' Plots a automated_readability_index Object
#' 
#' Plots a automated_readability_index object.
#' 
#' @param x The readability_score object.
#' @param \ldots ignored
#' @importFrom ggplot2 ggplot aes guide_colorbar geom_point theme ggplotGrob theme_bw ylab xlab scale_fill_gradient element_blank guides 
#' @importFrom gridExtra grid.arrange
#' @export
#' @method plot automated_readability_index
plot.automated_readability_index <- function(x, ...){ 

    plot.readability_score(scores(x))

}

## Generic helper plotting function for counts
word_counts <- function(DF, x, y, z = NULL, g, alpha = .3) {

    NMS <- rename(c(x, y, z))

    AES <- ggplot2::aes(x=x, y=y, color=g)
    if (!is.null(z)) {
        AES[["size"]] <- g
    }
    MAX <- max(DF[, y])

    plot <- ggplot2::ggplot(data.frame(x=DF[, x], y=DF[, y], z=DF[, z], g=DF[, g]), AES) + 
        ggplot2::geom_point(alpha=alpha) + facet_wrap(~g) + 
        ggplot2::geom_smooth() + ylim(0, MAX + MAX*.05) +
        ggplot2::guides(color=FALSE) +
        ggplot2::ylab(NMS[2]) + 
        ggplot2::xlab(NMS[1]) +
        ggplot2::theme_minimal() + 
        ggplot2::theme(panel.grid = ggplot2::element_blank(),
            panel.spacing = unit(1, "lines")) +
        ggplot2::annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf)+
        ggplot2::annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf)


    if (!is.null(z)) {
        plot <- plot + ggplot2::scale_size_continuous(names=gsub(" ", "\n", NMS[3]))
    }

    suppressMessages(suppressWarnings(print(plot)))

}

word_counts2 <- function(DF, x, y, z = NULL, g, alpha = .3) {

    NMS <- rename(c(x, y, z))

    AES <- ggplot2::aes(x=x, y=y, color=g)
    if (!is.null(z)) {
        AES[["size"]] <- g
    }
    MAX <- max(DF[, y])

    plot <- ggplot2::ggplot(data.frame(x=DF[, x], y=DF[, y], z=DF[, z], g=DF[, g]), AES) + 
        ggplot2::geom_point(alpha=alpha) + 
        ggplot2::facet_wrap(~g) + 
        ggplot2::geom_smooth() + ylim(0, MAX + MAX*.05) +
        ggplot2::guides(color=FALSE) +
        ggplot2::ylab(NMS[2]) + 
        ggplot2::xlab(NMS[1]) +
        ggplot2::theme_minimal() + 
        ggplot2::theme(panel.grid = ggplot2::element_blank(),
            panel.spacing = unit(1, "lines")) +
        ggplot2::annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf)+
        ggplot2::annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf)


    if (!is.null(z)) {
        plot <- plot + ggplot2::scale_size_continuous(names=gsub(" ", "\n", NMS[3]))
    }

    invisible(suppressMessages(suppressWarnings(plot)))

}

word_counts3 <- function(DF, x, y, z = NULL, g) {

    NMS <- rename(c(x, y, z))

    AES <- ggplot2::aes(x=x, y=y, fill=g)
    if (!is.null(z)) {
        AES[["size"]] <- g
    }
    MAX <- max(DF[, y])

    avs <- qdapTools::matrix2df(do.call(rbind, lapply(split(DF[, 2:3], 
        DF[,1]), colMeans)), "g")

    plot <- ggplot2::ggplot(data.frame(x=DF[, x], y=DF[, y], z=DF[, z], g=DF[, g]), AES) + 
        ggplot2::geom_point(shape=21, size=3) + 
        ggplot2::facet_wrap(~g) + 
        ggplot2::guides(color=FALSE, fill=FALSE) +
        ggplot2::ylab(NMS[2]) + 
        ggplot2::xlab(NMS[1]) +
        ggplot2::geom_point(data=avs, shape=3, colour="black", ggplot2::aes_string(x=x, y=y, group="g")) 

    if (!is.null(z)) {
        plot <- plot + ggplot2::scale_size_continuous(names=gsub(" ", "\n", NMS[3]))
    }

    suppressMessages(suppressWarnings(print(plot)))

}


## Generic helper funciton for word_counts plotting to rename axi labels
rename <- function(x) {

    input <- c("word.count",  "character.count", "polysyllable.count",
        "syllable.count", "polysyl2word.ratio", "sent.per.100.wrds",
        "syllables.per.100.wrds")
    output <- c("Words Per Sentence", "Characters Per Sentence", 
        "Polysyllables Per Sentence", "Syllables Per Sentence",
        "Polysyllables Per Word", "Sentences Per 100 Words", 
        "Syllables Per 100 Words")
    mgsub(input, output, x)

}

#' Readability Measures
#' 
#' \code{scores.SMOG} - View scores from \code{\link[qdap]{SMOG}}.
#' 
#' SMOG Method for scores
#' @param x The SMOG object.
#' @param \ldots ignored
#' @export
#' @method scores SMOG
scores.SMOG <- function(x, ...) {

    out <- x[["Readability"]]
    attributes(out) <- list(
            class = c("readability_score", class(out)),
            type = "SMOG_scores",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}


#' Prints an SMOG Object
#' 
#' Prints an SMOG object.
#' 
#' @param x The SMOG object.
#' @param digits The number of digits displayed if \code{values} is \code{TRUE}.
#' @param \ldots ignored
#' @method print SMOG
#' @export
print.SMOG <- function(x, digits = 3, ...) {
    print(scores(x), digits = digits, ...)
}


#' Readability Measures
#' 
#' \code{counts.SMOG} - View counts from \code{\link[qdap]{SMOG}}.
#' 
#' SMOG Method for counts.
#' @param x The SMOG object.
#' @param \ldots ignored
#' @export
#' @method counts SMOG
counts.SMOG <- function(x, ...) {
    out <- x[["Counts"]]
    attributes(out) <- list(
            class = c("readability_count", class(out)),
            type = "SMOG_count",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}


#' Plots a SMOG Object
#' 
#' Plots a SMOG object.
#' 
#' @param x The readability_score object.
#' @param \ldots ignored
#' @importFrom ggplot2 ggplot aes geom_point theme ylab xlab scale_size_continuous  element_rect
#' @importFrom gridExtra grid.arrange
#' @export
#' @method plot SMOG
plot.SMOG <- function(x, ...){ 

    plot.readability_score(scores(x))

}


plot_SMOG <- function(x, ...){ 

    sentence.count <- SMOG <- word.count <- grvar <- NULL
    
    x  <- x[order(x[, "SMOG"]), ]
    x[, 1] <- factor(x[, 1], levels = x[, 1])
    forlater <-  names(x)[1]
    names(x)[1] <- "grvar"

    ggplot2::ggplot(x, ggplot2::aes(y = grvar, x = SMOG)) +
        ggplot2::geom_point(aes(size = word.count), color="grey75") +
        ggplot2::geom_point(size=2.2) + 
        ggplot2::ylab(gsub("&", " & ", forlater)) + 
        ggplot2::xlab("SMOG") +
        ggplot2::scale_size_continuous(name="  Word\n  Count") + 
        ggplot2::theme(legend.key = ggplot2::element_rect(fill = NA))

}

#' Readability Measures
#' 
#' \code{scores.flesch_kincaid} - View scores from \code{\link[qdap]{flesch_kincaid}}.
#' 
#' flesch_kincaid Method for scores
#' @param x The flesch_kincaid object.
#' @param \ldots ignored
#' @export
#' @method scores flesch_kincaid
scores.flesch_kincaid <- function(x, ...) {

    out <- x[["Readability"]]
    attributes(out) <- list(
            class = c("readability_score", class(out)),
            type = "flesch_kincaid_scores",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}


#' Prints an flesch_kincaid Object
#' 
#' Prints an flesch_kincaid object.
#' 
#' @param x The flesch_kincaid object.
#' @param digits The number of digits displayed if \code{values} is \code{TRUE}.
#' @param \ldots ignored
#' @method print flesch_kincaid
#' @export
print.flesch_kincaid <- function(x, digits = 3, ...) {
    print(scores(x), digits = digits, ...)
}


#' Readability Measures
#' 
#' \code{counts.flesch_kincaid} - View counts from \code{\link[qdap]{flesch_kincaid}}.
#' 
#' flesch_kincaid Method for counts.
#' @param x The flesch_kincaid object.
#' @param \ldots ignored
#' @export
#' @method counts flesch_kincaid
counts.flesch_kincaid <- function(x, ...) {
    out <- x[["Counts"]]
    attributes(out) <- list(
            class = c("readability_count", class(out)),
            type = "flesch_kincaid_count",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}


#' Plots a flesch_kincaid Object
#' 
#' Plots a flesch_kincaid object.
#' 
#' @param x The readability_score object.
#' @param \ldots ignored
#' @importFrom ggplot2 ggplot aes guide_colorbar geom_point theme ggplotGrob theme_bw ylab xlab scale_fill_gradient element_blank guides 
#' @importFrom gridExtra grid.arrange
#' @export
#' @method plot flesch_kincaid
plot.flesch_kincaid <- function(x, ...){ 

    plot.readability_score(scores(x))

}


plot_flesch_kincaid <- function(x, ...){ 

    character.count <- sentence.count <- FK_grd.lvl <- FK_read.ease <- 
        word.count <- grvar <- value <- syllable.count <- NULL

    x  <- x[order(x[, "FK_grd.lvl"]), ]
    x[, 1] <- factor(x[, 1], levels = x[, 1])
    forlater <-  names(x)[1]
    names(x)[1] <- "grvar"
    plot1 <- ggplot2::ggplot(x, ggplot2::aes(x = sentence.count, y = syllable.count)) + 
        ggplot2::geom_point(size=2.75, alpha=.3) +
        ggplot2::theme_bw() + 
        ggplot2::ylab("Syllable Count") + 
        ggplot2::xlab("Sentence Count") + 
        ggplot2::theme(panel.grid = ggplot2::element_blank())
    
    x2 <- melt(x[, c(1, 5:6)], id="grvar")
    x2[, "variable"] <- mgsub(c("FK_grd.lvl", "FK_read.ease"),
        c("Flesch Kincaid Grade Level", 
        "Flesch Kincaid Reading Ease"), x2[, "variable"])

    plot2 <- ggplot2::ggplot(x2, ggplot2::aes(y = grvar, x = value)) +
        ggplot2::geom_point(size=2) + 
        ggplot2::ylab(gsub("&", " & ", forlater)) + 
        ggplot2::xlab("score") +
        ggplot2::facet_grid(.~variable, scales="free_x") 

    gridExtra::grid.arrange(plot2, plot1, ncol=2, widths= grid::unit(c(.67, .33), "native"))
}

#' Readability Measures
#' 
#' \code{scores.linsear_write} - View scores from \code{\link[qdap]{linsear_write}}.
#' 
#' linsear_write Method for scores
#' @param x The linsear_write object.
#' @param \ldots ignored
#' @export
#' @method scores linsear_write
scores.linsear_write <- function(x, ...) {

    out <- x[["Readability"]]
    attributes(out) <- list(
            class = c("linsear_write_scores", class(out)),
            type = "linsear_write_scores",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}

#' Prints a linsear_write_scores Object
#' 
#' Prints a linsear_write_scores object.
#' 
#' @param x The linsear_write_scores object.
#' @param digits The number of digits displayed.
#' @param \ldots ignored
#' @method print linsear_write_scores
#' @export
print.linsear_write_scores <-
    function(x, digits = 3, ...) {
    class(x) <- "data.frame"
    WD <- options()[["width"]]
    options(width=3000)
    print(x)
    options(width=WD)
}


#' Prints an linsear_write Object
#' 
#' Prints an linsear_write object.
#' 
#' @param x The linsear_write object.
#' @param digits The number of digits displayed if \code{values} is \code{TRUE}.
#' @param \ldots ignored
#' @method print linsear_write
#' @export
print.linsear_write <- function(x, digits = 3, ...) {
    print(scores(x), digits = digits, ...)
}


#' Readability Measures
#' 
#' \code{counts.linsear_write} - View counts from \code{\link[qdap]{linsear_write}}.
#' 
#' linsear_write Method for counts.
#' @param x The linsear_write object.
#' @param \ldots ignored
#' @export
#' @method counts linsear_write
counts.linsear_write <- function(x, ...) {
    out <- x[["Counts"]]
    rownames(out) <- NULL
    
    attributes(out) <- list(
            class = c("linsear_write_count", class(out)),
            type = "linsear_write_count",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}

#' Prints a linsear_write_count Object
#' 
#' Prints a linsear_write_count object.
#' 
#' @param x The linsear_write_count object.
#' @param digits The number of digits displayed.
#' @param \ldots ignored
#' @method print linsear_write_count
#' @export
print.linsear_write_count <-
    function(x, digits = 3, ...) {
    class(x) <- "data.frame"
    WD <- options()[["width"]]
    options(width=3000)
    print(x)
    options(width=WD)
}


#' Plots a linsear_write_count Object
#' 
#' Plots a linsear_write_count object.
#' 
#' @param x The linsear_write_count object.
#' @param \ldots ignored
#' @importFrom ggplot2 scale_colour_manual ylab
#' @export
#' @method plot linsear_write_count
plot.linsear_write_count <- function(x, ...){ 

    x[, "Selected"] <- ifelse(is.na(x[, "read.gr"]), "Used", "Not Used")
    nms1 <- paste(sapply(unlist(strsplit(names(x)[1], "\\&")), 
        Caps), collapse = " & ")
    names(x)[1] <- "group"
    x[, "group"] <- paste(1:nrow(x), x[, "group"], sep="|||")

    dat <- gantt(x[, "text.var"], x[, "group"])
    dat[, "group"] <- sapply(strsplit(as.character(dat[, "group"]), "\\|\\|\\|"), "[", 2)
    dat[, "Selected"] <- ifelse(is.na(x[, "read.gr"]), "Used", "Not Used")

    plot1 <- gantt_wrap(dat, "group", fill.var = "Selected", plot = FALSE)

    plot1 + 
        ggplot2::scale_colour_manual(values=c("grey90", "red")) +
        ggplot2::ylab(nms1) 
}



#' Plots a linsear_write Object
#' 
#' Plots a linsear_write object.
#' 
#' @param x The readability_score object.
#' @param alpha The alpha level for the points and smooth fill in the 
#' scatterplot (length one or two; if two 1-points, 2-smooth fill).
#' @param \ldots ignored
#' @importFrom ggplot2 ggplot aes geom_point geom_smooth theme ggplotGrob theme_bw ylab xlab
#' @importFrom gridExtra grid.arrange
#' @importFrom scales alpha
#' @export
#' @method plot linsear_write
plot.linsear_write <- function(x, alpha = .4, ...){ 

    graphics::plot(scores(x, alpha = alpha, ...))

}

#' Plots a linsear_write_scores Object
#' 
#' Plots a linsear_write_scores object.
#' 
#' @param x The readability_score object.
#' @param alpha The alpha level for the points and smooth fill in the 
#' scatterplot (length one or two; if two 1-points, 2-smooth fill).
#' @param \ldots Other arguments passed to \code{\link[ggplot2]{geom_smooth}}.
#' @importFrom ggplot2 ggplot aes geom_point geom_smooth theme ggplotGrob theme_bw ylab xlab
#' @importFrom gridExtra grid.arrange
#' @importFrom scales alpha
#' @export
#' @method plot linsear_write_scores
plot.linsear_write_scores <- function(x, alpha = c(.4, .08), ...){ 

    hard_easy_sum <- Linsear_Write <- NULL
    
    character.count <- sent.per.100 <- Linsear_write <- 
        hard_easy_sumt <- grvar <- NULL
  
    x  <- x[order(x[, "Linsear_Write"]), ]
    x[, 1] <- factor(x[, 1], levels = x[, 1])
    forlater <-  names(x)[1]
    names(x)[1] <- "grvar"

    plot1 <- ggplot2::ggplot(x, ggplot2::aes(y = hard_easy_sum, 
            x = sent.per.100)) + 
        ggplot2::geom_smooth(fill="blue", colour="darkblue", size=1, 
            alpha = utils::tail(alpha, 1), ...) +
        ggplot2::geom_point(size=2.75, alpha = utils::head(alpha, 1)) +
        ggplot2::theme_bw() + 
        ggplot2::xlab("Sentence Per 100 Words") + 
        ggplot2::ylab("3 x Hard Words + Easy Words") + 
        ggplot2::theme(panel.grid = ggplot2::element_blank()) 
    
    plot2 <- ggplot2::ggplot(x, ggplot2::aes(y = grvar, x = Linsear_Write)) +
        ggplot2::geom_point(size=2) + 
        ggplot2::ylab(gsub("&", " & ", forlater)) + 
        ggplot2::xlab("Linsear Write")

    gridExtra::grid.arrange(plot2, plot1, ncol=2)
}

####=============Template================

#' Readability Measures
#' 
#' \code{scores.coleman_liau} - View scores from \code{\link[qdap]{coleman_liau}}.
#' 
#' coleman_liau Method for scores
#' @param x The coleman_liau object.
#' @param \ldots ignored
#' @export
#' @method scores coleman_liau
scores.coleman_liau <- function(x, ...) {

    out <- x[["Readability"]]
    attributes(out) <- list(
            class = c("readability_score", class(out)),
            type = "coleman_liau_scores",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}


#' Prints an coleman_liau Object
#' 
#' Prints an coleman_liau object.
#' 
#' @param x The coleman_liau object.
#' @param digits The number of digits displayed if \code{values} is \code{TRUE}.
#' @param \ldots ignored
#' @method print coleman_liau
#' @export
print.coleman_liau <- function(x, digits = 3, ...) {
    print(scores(x), digits = digits, ...)
}


#' Readability Measures
#' 
#' \code{counts.coleman_liau} - View counts from \code{\link[qdap]{coleman_liau}}.
#' 
#' coleman_liau Method for counts.
#' @param x The coleman_liau object.
#' @param \ldots ignored
#' @export
#' @method counts coleman_liau
counts.coleman_liau <- function(x, ...) {
    out <- x[["Counts"]]
    attributes(out) <- list(
            class = c("readability_count", class(out)),
            type = "coleman_liau_count",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}


#' Plots a coleman_liau Object
#' 
#' Plots a coleman_liau object.
#' 
#' @param x The readability_score object.
#' @param \ldots ignored
#' @importFrom ggplot2 ggplot aes guide_colorbar geom_point theme ggplotGrob theme_bw ylab xlab scale_fill_gradient element_blank guides 
#' @importFrom gridExtra grid.arrange
#' @export
#' @method plot coleman_liau
plot.coleman_liau <- function(x, ...){ 

    plot.readability_score(scores(x))

}


plot_coleman_liau <- function(x, ...){ 

    character.count <- sentence.count <- Coleman_Liau <- word.count <- grvar <- 
        NULL
    
    x  <- x[order(x[, "Coleman_Liau"]), ]
    x[, 1] <- factor(x[, 1], levels = x[, 1])
    forlater <-  names(x)[1]
    names(x)[1] <- "grvar"
    plot1 <- ggplot2::ggplot(x, ggplot2::aes(fill = word.count, x = sentence.count, 
            y = character.count)) + 
        ggplot2::geom_point(size=2.75, shape=21, colour="grey65") +
        ggplot2::theme_bw() + 
        ggplot2::scale_fill_gradient(high="red", low="pink", name="Word\nCount") +
        ggplot2::ylab("Character Count") + 
        ggplot2::xlab("Sentence Count") + 
        ggplot2::theme(panel.grid = ggplot2::element_blank(),
            legend.position = "bottom") +
        ggplot2::guides(fill = ggplot2::guide_colorbar(barwidth = 10, barheight = .5)) 
    
    plot2 <- ggplot2::ggplot(x, ggplot2::aes(y = grvar, x = Coleman_Liau)) +
        ggplot2::geom_point(size=2) + 
        ggplot2::ylab(gsub("&", " & ", forlater)) + 
        ggplot2::xlab("Coleman Liau")

    gridExtra::grid.arrange(plot2, plot1, ncol=2)
}

###==============End of template==========

plot_fry <- function(x, auto.label = TRUE, grid = FALSE, div.col = "grey85", ...) {

    ## Setting up the graph
    suppressWarnings(graphics::plot(1, 1, xlim = c(108, 182), ylim = c(2, 
        25), axes = FALSE, type = "n", 
        xlab = "Average number of syllables per 100 words", 
        ylab = "Average number of sentences per 100 words", 
        main = "Fry Graph for Estimating Reading Ages (grade level)", 
        xaxs = "i", yaxs = "i"))
    graphics::axis(1, at = 108:182, labels = TRUE)
    graphics::axis(1, at = seq(108, 182, by = 4), tcl = -1.1, labels = FALSE)
    graphics::axis(2, at = 2:25, labels = TRUE)
    graphics::axis(4, at = 2:25, labels = TRUE)
    if (grid){
        grid(nx = 74, ny = 46, lty = "solid", col = "gold")
        grid(nx = 37, ny = 23, lty = "solid", col = "gray65")
    }
    graphics::box()
    graphics::segments(108, 9.97, 133.9, 25, lwd = 2, col = div.col)
    graphics::segments(108, 7.7, 142.1, 25, lwd = 2, col = div.col)
    graphics::segments(108, 6.4, 147.8, 25, lwd = 2, col = div.col)
    graphics::segments(108, 5.61, 155, 25, lwd = 2, col = div.col)
    graphics::segments(108, 5.1, 160, 25, lwd = 2, col = div.col)
    graphics::segments(108, 3.8, 160.8, 25, lwd = 2, col = div.col)
    graphics::segments(111.5, 2, 167.5, 25, lwd = 2, col = div.col)
    graphics::segments(123, 2, 169, 25, lwd = 2, col = div.col)
    graphics::segments(136.3, 2, 169, 25, lwd = 2, col = div.col)
    graphics::segments(143.3, 2, 173.5, 25, lwd = 2, col = div.col)
    graphics::segments(148.15, 2, 178.2, 25, lwd = 2, col = div.col)
    graphics::segments(155.6, 2, 178.5, 25, lwd = 2, col = div.col)
    graphics::segments(161.9, 2, 178.4, 25, lwd = 2, col = div.col)
    graphics::segments(168.2, 2, 178.2, 25, lwd = 2, col = div.col)
    graphics::segments(173.9, 2, 178.7, 25, lwd = 2, col = div.col)
    graphics::segments(179.1, 2, 181.25, 25, lwd = 2, col = div.col)
    x1 <- c(108, 108, 128)
    y1 <- c(4.2, 2, 2)
    graphics::polygon(x1, y1, col = "darkblue", border = "darkblue")
    x2 <- c(143, 145, 150, 152, 155, 158, 162, 166, 172, 180, 181, 182, 182)
    y2 <- c(25, 18.3, 14.3, 12.5, 11.1, 10, 9.1, 8.3, 7.7, 7.55, 7.5, 7.5, 25)
    graphics::polygon(x2, y2, col = "darkblue", border = "darkblue")  
    graphics::text(120, 22.2, 1, col = "darkgreen", cex = 1.25)
    graphics::text(124, 17.9, 2, col = "darkgreen", cex = 1.25)
    graphics::text(126, 15.9, 3, col = "darkgreen", cex = 1.25)
    graphics::text(127, 14.4, 4, col = "darkgreen", cex = 1.25)
    graphics::text(128, 13.3, 5, col = "darkgreen", cex = 1.25)
    graphics::text(110.7, 5.5, 6, col = "darkgreen", cex = 1.25)
    graphics::text(115, 4.88, 7, col = "darkgreen", cex = 1.25)
    graphics::text(121, 3.78, 8, col = "darkgreen", cex = 1.25)
    graphics::text(132, 3.5, 9, col = "darkgreen", cex = 1.25)
    graphics::text(142, 3.5, 10, col = "darkgreen", cex = 1.25)
    graphics::text(147.7, 3.5, 11, col = "darkgreen", cex = 1.25)
    graphics::text(153.7, 3.5, 12, col = "darkgreen", cex = 1.25)
    graphics::text(160, 3.5, 13, col = "darkgreen", cex = 1.25)
    graphics::text(166.4, 3.5, 14, col = "darkgreen", cex = 1.25)
    graphics::text(171, 3.5, 15, col = "darkgreen", cex = 1.25)
    graphics::text(176.7, 3.5, 16, col = "darkgreen", cex = 1.25)
    graphics::text(180.7, 3.5, 17, col = "darkgreen", cex = 1.25)

    ## Actually plotting the points
    graphics::points(x[, "ave.syll.per.100"], x[, "ave.sent.per.100"], pch = 19, cex = 1, 
        col = "red")
    graphics::points(x[, "ave.syll.per.100"], x[, "ave.sent.per.100"], pch = 3, cex = 3, 
        col = "black")  
    if (!auto.label) {
        graphics::identify(x[, "ave.syll.per.100"], x[, "ave.sent.per.100"], x[, 1])
    } else {
        graphics::text(x[, "ave.syll.per.100"], x[, "ave.sent.per.100"], labels = x[, 1], 
            adj = c(1, -0.5))
    }  
}

#' Readability Measures
#' 
#' \code{scores.fry} - View scores from \code{\link[qdap]{fry}}.
#' 
#' fry Method for scores
#' @param x The fry object.
#' @param \ldots ignored
#' @export
#' @method scores fry
scores.fry <- function(x, ...) {

    out <- x[["Readability"]]
    attributes(out) <- list(
            class = c("readability_score", class(out)),
            type = "fry_scores",
            names = colnames(out),
            row.names = rownames(out),
            plot_instructions = attributes(x)[["plot_instructions"]]
    )
    out
}


#' Prints an fry Object
#' 
#' Prints an fry object.
#' 
#' @param x The fry object.
#' @param digits The number of digits displayed if \code{values} is \code{TRUE}.
#' @param auto.label  logical.  If \code{TRUE} labels automatically added.  If 
#' \code{FALSE} the user clicks interactively.
#' @param grid logical.  If \code{TRUE} a micro grid is displayed similar to 
#' Fry's original depiction, though this makes visualizing more difficult.
#' @param div.col The color of the grade level division lines.
#' @param plot logical.  If \code{TRUE} a graph is plotted corresponding to Fry's 
#' graphic representation.
#' @param \ldots ignored
#' @method print fry
#' @export
print.fry <- function(x, digits = 3, auto.label, 
    grid, div.col, plot, ...) {

    print(scores(x), digits = digits, ...)

    ## Plotting attributes
    PA <- attributes(x)[["plot_instructions"]]

    if (missing(auto.label)) {
        auto.label <- PA[["auto.label"]]
    }
    if (missing(grid)) {
        grid <- PA[["grid"]]
    }
    if (missing(div.col)) {
        div.col <- PA[["div.col"]]
    }
    if (missing(plot)) {
        plot <- PA[["plot"]]
    }

    if (plot) {
        plot_fry(scores(x), auto.label = auto.label, grid = grid, 
            div.col = div.col)
    }

}


#' Readability Measures
#' 
#' \code{counts.fry} - View counts from \code{\link[qdap]{fry}}.
#' 
#' fry Method for counts.
#' @param x The fry object.
#' @param \ldots ignored
#' @export
#' @method counts fry
counts.fry <- function(x, ...) {
    out <- x[["Counts"]]
    attributes(out) <- list(
            class = c("readability_count", class(out)),
            type = "fry_count",
            names = colnames(out),
            row.names = rownames(out)
    )
    out
}
Any scripts or data that you put into this service are public.
qdap documentation built on May 31, 2023, 5:20 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
qdap
Bridging the Gap Between Qualitative Data and Quantitative Analysis

R/automated_readability_index.R
In qdap: Bridging the Gap Between Qualitative Data and Quantitative Analysis

Try the qdap package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

qdap Bridging the Gap Between Qualitative Data and Quantitative Analysis

R/automated_readability_index.R In qdap: Bridging the Gap Between Qualitative Data and Quantitative Analysis

Try the qdap package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

qdap
Bridging the Gap Between Qualitative Data and Quantitative Analysis

R/automated_readability_index.R
In qdap: Bridging the Gap Between Qualitative Data and Quantitative Analysis