R/statistics-summaries-menu.R

Defines functions summarizeDataSet numericalSummaries correlationTest countMissing

# Statistics Menu dialogs

# last modified 2013-05-05 by J. Fox

# Summaries menu

summarizeDataSet <- function(){
	nvar <- length(Variables())
	.activeDataSet <- ActiveDataSet()
	if (nvar > 10){
		response <- RcmdrTkmessageBox(message=sprintf(gettextRcmdr("There are %d variables in the data set %s.\nDo you want to proceed?"), nvar, .activeDataSet),
				icon="question", type="okcancel", default="cancel")
		if ("cancel" == tclvalue(response)) {
			tkfocus(CommanderWindow())
			return()
		}
	}
	doItAndPrint(paste("summary(", .activeDataSet, ")", sep=""))
}

numericalSummaries <- function(){ # dialog memory 2011-06-27  J. Fox
    Library("abind")
    Library("e1071")
    defaults <- list(initial.x=NULL, initial.mean="1", initial.sd="1", initial.IQR="1", initial.cv="0",
                     initial.quantiles.variable="1", 
                     initial.quantiles="0, .25, .5, .75, 1", 
                     initial.skewness="0", initial.kurtosis="0", initial.type="2",
                     initial.group=NULL)
    dialog.values <- getDialog("numericalSummaries", defaults)
    initial.group <- dialog.values$initial.group
    initializeDialog(title=gettextRcmdr("Numerical Summaries"))
    notebook <- ttknotebook(top, width=600)
    dataTab <- tkframe(top)
    statisticsTab <- tkframe(top)
    xBox <- variableListBox(dataTab, Numeric(), selectmode="multiple", title=gettextRcmdr("Variables (pick one or more)"),
                            initialSelection=varPosn(dialog.values$initial.x, "numeric"))
    checkBoxes(window = statisticsTab, frame="checkBoxFrame", boxes=c("mean", "sd", "IQR", "cv"), 
               initialValues=c(dialog.values$initial.mean, dialog.values$initial.sd, dialog.values$initial.IQR, dialog.values$initial.cv), 
               labels=gettextRcmdr(c("Mean", "Standard Deviation", "Interquartile Range", "Coefficient of Variation")))
    skFrame <- tkframe(statisticsTab)
    checkBoxes(window = skFrame, frame="skCheckBoxFrame", boxes=c("skewness", "kurtosis"), 
               initialValues=c(dialog.values$initial.skewness, dialog.values$initial.kurtosis), 
               labels=gettextRcmdr(c("Skewness", "Kurtosis")))
    radioButtons(window = skFrame, name="typeButtons", buttons=c("b1", "b2", "b3"), values=c("1", "2", "3"), 
                 initialValue=dialog.values$initial.type,
                 labels=gettextRcmdr(c("Type 1", "Type 2", "Type 3")))
    quantilesVariable <- tclVar(dialog.values$initial.quantiles.variable)
    quantilesFrame <- tkframe(statisticsTab)
    quantilesCheckBox <- tkcheckbutton(quantilesFrame, variable=quantilesVariable)
    quantiles <- tclVar(dialog.values$initial.quantiles)
    quantilesEntry <- ttkentry(quantilesFrame, width="20", textvariable=quantiles)
    groupsBox(recall=numericalSummaries, label=gettextRcmdr("Summarize by:"), 
              initialLabel=if (is.null(initial.group)) gettextRcmdr("Summarize by groups") 
              else paste(gettextRcmdr("Summarize by:"), initial.group), 
              initialGroup=initial.group, window = dataTab)
    onOK <- function(){
        x <- getSelection(xBox)
        quants <- tclvalue(quantiles)
        meanVar <- tclvalue(meanVariable)
        sdVar <- tclvalue(sdVariable)
        IQRVar <- tclvalue(IQRVariable)
        cvVar <- tclvalue(cvVariable)
        quantsVar <- tclvalue(quantilesVariable)
        skewnessVar <- tclvalue(skewnessVariable)
        kurtosisVar <- tclvalue(kurtosisVariable)
        typeVar <- tclvalue(typeButtonsVariable)
        putDialog("numericalSummaries", list(
            initial.x=x, initial.mean=meanVar, initial.sd=sdVar, initial.IQR=IQRVar, initial.cv=cvVar,
            initial.quantiles.variable=quantsVar, initial.quantiles=quants,
            initial.skewness=skewnessVar, initial.kurtosis=kurtosisVar, initial.type=typeVar,
            initial.group=if (.groups != FALSE) .groups else NULL
        ))      
        if (length(x) == 0){
            errorCondition(recall=numericalSummaries, message=gettextRcmdr("You must select a variable."))
            return()
        }
        closeDialog()
        quants <- paste("c(", gsub(",+", ",", gsub(" ", ",", quants)), ")", sep="")
        .activeDataSet <- ActiveDataSet()
        vars <- if (length(x) == 1) paste('"', x, '"', sep="") 
        else paste("c(", paste('"', x, '"', collapse=", ", sep=""), ")", sep="")
        vars <- paste(.activeDataSet, "[,", vars, "]", sep="")
        stats <- paste("c(",
                       paste(c('"mean"', '"sd"', '"IQR"', '"quantiles"', '"cv"', '"skewness"', '"kurtosis"')
                             [c(meanVar, sdVar, IQRVar, quantsVar, cvVar, skewnessVar, kurtosisVar) == 1], 
                             collapse=", "), ")", sep="")
        if (stats == "c()"){
            errorCondition(recall=numericalSummaries, message=gettextRcmdr("No statistics selected."))
            return()
        }
        type.text <- if (skewnessVar == 1 || kurtosisVar == 1) paste(', type="', typeVar, '"', sep="") else ""
        command <- if (.groups != FALSE) {
            grps <- paste(.activeDataSet, "$", .groups, sep="")
            paste("numSummary(", vars, ", groups=", grps, ", statistics=", stats, 
                  ", quantiles=", quants, type.text, ")", sep="")
        }
        else  paste("numSummary(", vars, ", statistics=", stats, 
                    ", quantiles=", quants, type.text, ")", sep="")
        doItAndPrint(command) 
        tkfocus(CommanderWindow())
    }
    OKCancelHelp(helpSubject="numSummary", reset="numericalSummaries", apply ="numericalSummaries")
    tkgrid(getFrame(xBox), sticky="nw")    
    tkadd(notebook, dataTab, text=gettextRcmdr("Data"), padding=6)
    tkadd(notebook, statisticsTab, text=gettextRcmdr("Statistics"), padding=6)
    tkgrid(checkBoxFrame, sticky="w")
    tkgrid(ttklabel(statisticsTab, text=""))
    tkgrid(skCheckBoxFrame, typeButtonsFrame, sticky="nw")
    tkgrid(skFrame, sticky="w")
    tkgrid(labelRcmdr(quantilesFrame, text=gettextRcmdr("Quantiles")), quantilesCheckBox,
           labelRcmdr(quantilesFrame, text=gettextRcmdr(" quantiles:")), quantilesEntry, sticky="w")
    tkgrid(ttklabel(statisticsTab, text=""))
    tkgrid(quantilesFrame, sticky="w")
    tkgrid(ttklabel(dataTab, text=""))
    tkgrid(groupsFrame, sticky = "w", padx=6)
    tkgrid(notebook, sticky="nsew") 
    tkgrid(buttonsFrame, sticky="w")
    dialogSuffix(rows=7, columns=1)
}

frequencyDistribution <- function () {
	defaults <- list (initial.x = NULL, initial.goodnessOfFit = "0")
	dialog.values <- getDialog ("frequencyDistribution", defaults)
	initializeDialog(title = gettextRcmdr("Frequency Distributions"))
	xBox <- variableListBox(top, Factors(), selectmode = "multiple", 
			title = gettextRcmdr("Variables (pick one or more)"),
			initialSelection = varPosn (dialog.values$initial.x, "factor"))
	optionsFrame <- tkframe(top)
	goodnessOfFitVariable <- tclVar(dialog.values$initial.goodnessOfFit)
	goodnessOfFitCheckBox <- ttkcheckbutton(optionsFrame, variable = goodnessOfFitVariable)
	onOK <- function() {
		x <- getSelection(xBox)
		if (length(x) == 0) {
			errorCondition(recall = frequencyDistribution, message = gettextRcmdr("You must select a variable."))
			return()
		}
		goodnessOfFit <- tclvalue(goodnessOfFitVariable)
		putDialog ("frequencyDistribution", list (initial.x = x, initial.goodnessOfFit = goodnessOfFit))
		if (length(x) > 1 && goodnessOfFit == "1") {
			errorCondition(recall = frequencyDistribution, message = gettextRcmdr("Goodness-of-fit test not available when more than one variable is selected."))
			return()
		}
		closeDialog()
		.activeDataSet <- ActiveDataSet()
		for (variable in x) {
			command <- paste("table(", .activeDataSet, "$", variable, 
					")", sep = "")
# 			logger(paste(".Table <-", command))
# 			assign(".Table", justDoIt(command), envir = .GlobalEnv)
			doItAndPrint(paste(".Table <-", command))
			doItAndPrint(paste(".Table  # counts for", variable))
			doItAndPrint(paste("round(100*.Table/sum(.Table), 2)  # percentages for", 
							variable))
		}
		env <- environment()
		if (goodnessOfFit == 1) {
			initializeDialog(subwin, title = gettextRcmdr("Goodness-of-Fit Test"))
			hypothesisFrame <- tkframe(subwin)
			levs <- eval(parse(text = paste("levels(", .activeDataSet, 
									"$", x, ")", sep = "")))
			n.levs <- length(levs)
			assign(".entry.1", tclVar(paste("1/", n.levs, sep = "")), 
					envir = env)
			make.entries <- "labelRcmdr(hypothesisFrame, text='Hypothesized probabilities:   ')"
			make.lev.names <- "labelRcmdr(hypothesisFrame, text='Factor levels:')"
			for (i in 1:n.levs) {
				entry.varname <- paste(".entry.", i, sep = "")
				assign(entry.varname, tclVar(paste("1/", n.levs, 
										sep = "")), envir = env)
				make.entries <- paste(make.entries, ", ", "ttkentry(hypothesisFrame, width='5', textvariable=", 
						entry.varname, ")", sep = "")
				make.lev.names <- paste(make.lev.names, ", labelRcmdr(hypothesisFrame, text='", 
						levs[i], "')", sep = "")
			}
			eval(parse(text = paste("tkgrid(", make.lev.names, 
									", sticky='w')", sep = "")), envir = env)
			eval(parse(text = paste("tkgrid(", make.entries, 
									", stick='w')", sep = "")), envir = env)
			tkgrid(hypothesisFrame, sticky = "w")
			onOKsub <- function() {
				probs <- rep(NA, n.levs)
				for (i in 1:n.levs) {
					entry.varname <- paste(".entry.", i, sep = "")
					res <- try(entry <- eval(parse(text = eval(parse(text = paste("tclvalue(", 
																	entry.varname, ")", sep = "")), envir = env))), 
							silent = TRUE)
					if (class(res) == "try-error") {
						errorCondition(subwin, message = gettextRcmdr("Invalid entry."))
						return()
					}
					if (length(entry) == 0) {
						errorCondition(subwin, message = gettextRcmdr("Missing entry."))
						return()
					}
					opts <- options(warn = -1)
					probs[i] <- as.numeric(entry)
					options(opts)
				}
				probs <- na.omit(probs)
				if (length(probs) != n.levs) {
					errorCondition(subwin, message = sprintf(gettextRcmdr("Number of valid entries (%d)\nnot equal to number levels (%d)."), 
									length(probs), n.levs))
					return()
				}
				if (any(probs < 0)) {
					errorCondition(subwin, message = gettextRcmdr("Negative probabilities not allowed."))
					return()
				}
				if (abs(sum(probs) - 1) > 0.001) {
					Message(message = gettextRcmdr("Probabilities rescaled to sum to 1."), 
							type = "warning")
					probs <- probs/sum(probs)
				}
				closeDialog(subwin)
				command <- paste("c(", paste(probs, collapse = ","), 
						")", sep = "")
# 				logger(paste(".Probs <-", command))
# 				assign(".Probs", justDoIt(command), envir = .GlobalEnv)
				doItAndPrint(paste(".Probs <-", command))
				doItAndPrint("chisq.test(.Table, p=.Probs)")
				logger("remove(.Probs)")
				remove(.Probs, envir = .GlobalEnv)
			}
			subOKCancelHelp(subwin)
			tkgrid(subButtonsFrame, sticky = "w")
			dialogSuffix(subwin, rows = 2, columns = 1, onOK = onOKsub, 
					focus = subwin)
		}
		logger("remove(.Table)")
		remove(.Table, envir = .GlobalEnv)
		tkfocus(CommanderWindow())
	}
	OKCancelHelp(helpSubject = "table", reset = "frequencyDistribution")
	tkgrid(getFrame(xBox), sticky = "nw")
	tkgrid(labelRcmdr(optionsFrame, text = gettextRcmdr("Chi-square goodness-of-fit test (for one variable only)")), 
			goodnessOfFitCheckBox, sticky = "w")
	tkgrid(optionsFrame, sticky = "w")
	tkgrid(buttonsFrame, sticky = "w")
	dialogSuffix(rows = 3, columns = 2)
}

statisticsTable <- function () {
    defaults <- list (initial.group=NULL, initial.response=NULL, initial.statistic="mean", initial.other = "")
    dialog.values <- getDialog ("statisticsTable", defaults)
    initializeDialog(title = gettextRcmdr("Table of Statistics"))
    variablesFrame <- tkframe(top)
    groupBox <- variableListBox(variablesFrame, Factors(), selectmode = "multiple", 
                                title = gettextRcmdr("Factors (pick one or more)"), 
                                initialSelection = varPosn(dialog.values$initial.group,"factor"))
    responseBox <- variableListBox(variablesFrame, Numeric(), selectmode = "multiple", 
                                   initialSelection = varPosn(dialog.values$initial.response, "numeric"),
                                   title = gettextRcmdr("Response variables (pick one or more)"))
    statFrame <- tkframe(top)
    radioButtons(statFrame, name = "statistic", buttons = c("mean", "median", "sd", "IQR", "other"), 
                 labels = gettextRcmdr(c("Mean", "Median", "Standard deviation", "Interquartile range", "Other (specify)")), 
                 initialValue = dialog.values$initial.statistic, 
                 title = gettextRcmdr("Statistic"))
    otherVariable <- tclVar(dialog.values$initial.other)
    otherEntry <- ttkentry(statFrame, width = "20", textvariable = otherVariable)
    tkgrid(statisticFrame, labelRcmdr(statFrame, text ="  "), otherEntry, sticky = "sw")
    onOK <- function() {
        groups <- getSelection(groupBox)
        if (0 == length(groups)) {
            errorCondition(recall = statisticsTable, message = gettextRcmdr("No factors selected."))
            return()
        }
        responses <- getSelection(responseBox)
        if (0 == length(responses)) {
            errorCondition(recall = statisticsTable, message = gettextRcmdr("You must select a response variable."))
            return()
        }
        stat <- statistic <- tclvalue(statisticVariable)
        if (statistic == "other") 
            statistic <- tclvalue(otherVariable)
        putDialog ("statisticsTable", list(initial.group=groups, initial.response=responses, 
                                           initial.statistic=stat, initial.other = if(stat == "other") statistic else ""))  
        closeDialog()
        .activeDataSet <- ActiveDataSet()
        groups.list <- paste(paste(groups, "=", .activeDataSet, 
                                   "$", groups, sep = ""), collapse = ", ")
        for (response in responses) {
            if (length(responses) > 1) 
                doItAndPrint(paste("# Table for ", response, 
                                   ":", sep = ""))
            doItAndPrint(paste("tapply(", .activeDataSet, "$", 
                               response, ", list(", groups.list, "), ", statistic, 
                               ", na.rm=TRUE)", sep = ""))
        }
        tkfocus(CommanderWindow())
    }
    OKCancelHelp(helpSubject = "tapply", reset="statisticsTable", apply="statisticsTable")
    tkgrid(getFrame(groupBox), labelRcmdr(variablesFrame, text = "    "), 
           getFrame(responseBox), sticky = "nw")
    tkgrid(variablesFrame, sticky = "w")
    tkgrid(statFrame, sticky = "w")
    tkgrid(buttonsFrame, sticky = "w")
    dialogSuffix(rows = 3, columns = 1, focus = otherEntry)
}

correlationMatrix <- function (){
    defaults <- list (initial.x = NULL, initial.correlations = "Pearson", initial.pvaluesVar="0", initial.use="complete")  
    dialog.values <- getDialog ("correlationMatrix", defaults)
    initializeDialog(title = gettextRcmdr("Correlation Matrix"))
    xBox <- variableListBox(top, Numeric(), selectmode = "multiple", 
        title = gettextRcmdr("Variables (pick two or more)"),
        initialSelection = varPosn (dialog.values$initial.x, "numeric"))
    radioButtons(name = "correlations", buttons = c("pearson", 
        "spearman", "partial"), values = c("Pearson", "Spearman", 
            "partial"), labels = gettextRcmdr(c("Pearson product-moment", 
                "Spearman rank-order", "Partial")), title = gettextRcmdr("Type of Correlations"),
        initialValue = dialog.values$initial.correlations)
    radioButtons(name = "use", buttons = c("complete", "pairwise.complete"), 
        labels = gettextRcmdr(c("Complete observations", "Pairwise-complete observations")), title = gettextRcmdr("Observations to Use"),
        initialValue = dialog.values$initial.use)
    pvaluesFrame <- tkframe(top)
    pvaluesVar <- tclVar(dialog.values$initial.pvaluesVar)
    pvaluesCheckbox <- ttkcheckbutton(pvaluesFrame, variable = pvaluesVar)
    onOK <- function() {
        correlations <- tclvalue(correlationsVariable)
        use <- tclvalue(useVariable)
        x <- getSelection(xBox)
        pvalues <- tclvalue(pvaluesVar)
        if (2 > length(x)) {
            errorCondition(recall = correlationMatrix, message = gettextRcmdr("Fewer than 2 variables selected."))
            return()
        }
        if ((correlations == "partial") && (3 > length(x))) {
            errorCondition(recall = correlationMatrix, message = gettextRcmdr("Fewer than 3 variables selected\nfor partial correlations."))
            return()
        }
        closeDialog()
        putDialog ("correlationMatrix", list (initial.x=x, initial.correlations=correlations, 
            initial.pvaluesVar=pvalues, initial.use=use))
        x <- paste("\"", x, "\"", sep = "")
        .activeDataSet <- ActiveDataSet()
        if (correlations == "Pearson") {
            if (pvalues == 0) {
                doItAndPrint(paste("cor(", .activeDataSet, "[,c(", 
                    paste(x, collapse = ","), ")], use=\"", use, "\")", 
                    sep = ""))
            }
            else {
                Library("Hmisc")
                doItAndPrint(paste("rcorr.adjust(", .activeDataSet, 
                    "[,c(", paste(x, collapse = ","), ")], type=\"pearson\", use=\"", use, "\")", 
                    sep = ""))
            }
        }
        else if (correlations == "Spearman") {
            if (pvalues == 0) {
                doItAndPrint(paste("cor(", .activeDataSet, "[,c(", 
                    paste(x, collapse = ","), ")], method=\"spearman\", use=\"", use, "\")", 
                    sep = ""))
            }
            else {
                Library("Hmisc")
                doItAndPrint(paste("rcorr.adjust(", .activeDataSet, 
                    "[,c(", paste(x, collapse = ","), ")], type=\"spearman\", use=\"", use, "\")", 
                    sep = ""))
            }
        }
        else if (pvalues == 0){
            doItAndPrint(paste("partial.cor(", .activeDataSet, 
                "[,c(", paste(x, collapse = ","), ")], use=\"", use, "\")", 
                sep = ""))
        }
        else {
            doItAndPrint(paste("partial.cor(", .activeDataSet, 
                "[,c(", paste(x, collapse = ","), ")], tests=TRUE, use=\"", use, "\")", 
                sep = ""))
        }
        tkfocus(CommanderWindow())
    }
    OKCancelHelp(helpSubject = "rcorr.adjust", reset="correlationMatrix", apply="correlationMatrix")
    tkgrid(getFrame(xBox), sticky = "nw")
    tkgrid(correlationsFrame, sticky = "w")
    tkgrid(useFrame, sticky="w")
    tkgrid(labelRcmdr(pvaluesFrame, text = gettextRcmdr("Pairwise p-values"), fg=getRcmdr("title.color")), 
        pvaluesCheckbox, sticky = "w")
    tkgrid(pvaluesFrame, sticky = "w")
    tkgrid(buttonsFrame, sticky = "w")
    dialogSuffix(rows = 5, columns = 1)
}

# the following dialog contributed by Stefano Calza, modified by J. Fox

correlationTest <- function(){
	defaults <- list(initial.x=NULL,initial.correlations="pearson",initial.alternative ="two.sided")
	dialog.values <- getDialog("correlationTest", defaults)
	initializeDialog(title=gettextRcmdr("Correlation Test"))
	xBox <- variableListBox(top, Numeric(), selectmode="multiple", title=gettextRcmdr("Variables (pick two)"),initialSelection=varPosn(dialog.values$initial.x, "numeric"))
	radioButtons(name="correlations", buttons=c("pearson", "spearman", "kendall"),
			labels=gettextRcmdr(c("Pearson product-moment", "Spearman rank-order", "Kendall's tau")),
			initialValue=dialog.values$initial.correlations, 
			title=gettextRcmdr("Type of Correlation"))
	radioButtons(name="alternative", buttons=c("two.sided", "less", "greater"), 
			values=c("two.sided", "less", "greater"),
			initialValue=dialog.values$initial.alternative, 
			labels=gettextRcmdr(c("Two-sided", "Correlation < 0", "Correlation > 0")), 
			title=gettextRcmdr("Alternative Hypothesis"))  
	onOK <- function(){
		alternative <- as.character(tclvalue(alternativeVariable))
		correlations <- as.character(tclvalue(correlationsVariable))
		x <- getSelection(xBox)
		putDialog("correlationTest", list(initial.alternative=alternative, initial.correlations=correlations, initial.x=x))
		if (2 > length(x)) {
			errorCondition(recall=correlationTest,
					message=gettextRcmdr("Fewer than 2 variables selected."))
			return()
		}
		if(2 < length(x)) {
			errorCondition(recall=correlationTest,
					message=gettextRcmdr("More than 2 variables selected."))
			return()
		}
		closeDialog()
		.activeDataSet <- ActiveDataSet()
		command <- paste("cor.test(", .activeDataSet, "$", x[1], ", ", .activeDataSet, "$", x[2],
				', alternative="', alternative, '", method="', correlations, '")', sep="")
		doItAndPrint(command)  
		tkfocus(CommanderWindow())
	}
	OKCancelHelp(helpSubject="cor.test", reset="correlationTest", apply="correlationTest")
	tkgrid(getFrame(xBox), sticky="nw")
	tkgrid(labelRcmdr(top, text=""))
	tkgrid(correlationsFrame,alternativeFrame, sticky="w")
	tkgrid(buttonsFrame,columnspan=2,sticky="w")
	dialogSuffix(rows=4, columns=1)
}

countMissing <- function(){
	command <- paste("sapply(", activeDataSet(), 
			", function(x)(sum(is.na(x)))) # NA counts", sep="")
	doItAndPrint(command)
	invisible(NULL)
}

ShapiroTest <- function () {
	defaults <- list (initial.var = NULL)
	dialog.values <- getDialog ("ShapiroTest", defaults)
	initializeDialog(title = gettextRcmdr("Shapiro-Wilk Test for Normality"))
	variableBox <- variableListBox(top, Numeric(), title = gettextRcmdr("Variable (pick one)"),
			initialSelection = varPosn (dialog.values$initial.var, "numeric"))
	onOK <- function() {
		var <- getSelection(variableBox)
		putDialog ("ShapiroTest", list (initial.var = var))
		if (length(var) == 0) {
			errorCondition(recall = ShapiroTest, message = gettextRcmdr("You must select a variable."))
			return()
		}
		closeDialog()
		doItAndPrint(paste("shapiro.test(", ActiveDataSet(), 
						"$", var, ")", sep = ""))
		tkfocus(CommanderWindow())
	}
	OKCancelHelp(helpSubject = "shapiro.test", reset = "ShapiroTest", apply = "ShapiroTest")
	tkgrid(getFrame(variableBox), sticky = "nw")
	tkgrid(buttonsFrame, sticky = "w")
	dialogSuffix(rows = 2, columns = 1)
}

Try the Rcmdr2 package in your browser

Any scripts or data that you put into this service are public.

Rcmdr2 documentation built on May 2, 2019, 6:49 p.m.