R/utils_distrb_clipboard.R

#' Visualize Distrbution of the Data in Clipboard
#'
#' This function can visualize the data distribution from clipboard. If the data
#' can be coerced to numeric, then ouput a histogram; if cannot, then output a
#' horizontal bar plot.
#'
#' @author Yiying Wang, \email{wangy@@aetna.com}
#' @param varRow Row index of the header row. Default 1. If no header needed,
#' put it 0.
#'
#' @return Histograms/bar charts on various windows devices.
#' @import ggplot2
#' @importFrom stringr str_replace_all
#' @export
#'
#' @examples
#' \dontrun{
#' getDistrFromClipb()
#' }
getDistrFromClipb <- function(varRow=1){
    data <- readClipboard()
    if (all(str_detect(data, "\t"))){
        data <- strsplit(data, "\t")
        data <- do.call("rbind", data)
    }
    data <- data.frame(data, stringsAsFactors=FALSE)
    if (varRow>0) {
        names(data) <- data[varRow, ]
        data <- data[-varRow, , drop=FALSE]
    }
    data <- sapply(names(data), cmpfun(function(var) {
        data[,var] <- str_replace_all(data[, var], "^$", NA)
    }))
    data <- data.frame(data, stringsAsFactors=FALSE)
    notAllNA <- sapply(seq_len(ncol(data)), function(var) {
        return(!all(is.na(data[,var])))
    })
    data <- data[, notAllNA, drop=FALSE]

    dat <- getInitNum(data)
    sapply(names(data), cmpfun(function(var) {
        .newDev <- function(title){
            if (Sys.info()[['sysname']] == "Windows"){
                windows(8, 6, xpos=125, ypos=125, title=title)
            }else if (Sys.info()[['sysname']] == "Darwin") {
                x11(8, 6, xpos=125, ypos=125, title=title)
            }else{
                quartz(8, 6, xpos=125, ypos=125, title=title)
            }
        }
        if (!all(is.na(dat[, var]))){  # if dat[,var] contains numeric
            mean <- mean(dat[, var], na.rm=TRUE)
            sd <- sd(dat[,var], na.rm=TRUE)
            gg <- ggplot(dat, aes_string(x=var)) + xlab(var) +
                geom_histogram(bins=13, fill="deepskyblue", color="white",
                               size=1, alpha=0.5) +
                geom_vline(xintercept = mean, color="tomato3", size=1.2, alpha=0.8,
                           show.legend=TRUE) +
                geom_vline(xintercept = c(mean+sd, mean-sd), color="tomato3",
                           linetype=2, size=1.2, alpha=0.8, show.legend=TRUE)
            .newDev(paste("Numeric Distribution of", var))
            print(gg)
        }
        if (sum(!is.na(dat[,var])) != sum(!is.na(data[,var]))) {
            out <- as.data.frame(table(data[,var]))
            names(out) <- c("Value", "Freq")
            out <- out[order(-out$Freq), ]
            out$Value <- factor(out$Value, levels=out$Value)
            gg <- ggplot(out, aes(x=Value, y=Freq)) +
                geom_bar(stat="identity", color="white", fill="tomato2",
                         size=1, alpha=0.5) + coord_flip()
            .newDev(paste("Character Distribution of", var))
            print(gg)
        }
    }))
}
madlogos/aseshms documentation built on May 21, 2019, 11:03 a.m.