R/describe.R

#' @title Univariate summary of a variable for each level of a factor variable (if any is specified)
#' @description describe makes a simple description of a variable 'x' in one or more groups defined
#' by the factor 'by'. If 'x' is a factor, frequency tables and (optionally) barplots are shown. If 'x'
#' is a numeric variable, the variable is summarized in mean and sd in each group if 'x' is normal.
#' In other case it is summarized in median and percentiles (by default 25 and 75 percentiles). A comparison
#' betweed two groups is made by a t.test in the normal case and by a wilcoxon test in other case.
#' When there are more than two groups, anova F-test and Kruskal-Wallis test are used.
#' Optionally histograms and boxplots are printed.
#' @export describe
#' @param x variable to be summarized.
#' @param by variable for identifying groups, interpreted as factor
#' @param bylabel Label of the variable by, to be used in figures and tables.
#' @param xlabel Label of the variable x, to be used in figures and tables.
#' @param plot Logical. If TRUE a plot is printed
#' @param pctBycol Logical. If TRUE, relative frequencies in crosstabs are computed by columns else by rows.
#' @param report. If "auto" a Shapiro test is made; only mean and sd (if variable can be assumed to be
# normal) or median and quartiles (when variable is not normal) are shown. If "meansd", only mean and sd
# are shown; if "medianq", median and percentiles are shown. In other case a full report is printed,
# including also number of missing values, skewness, kurtosis, min, max and the p-value of the Shapiro
# test for normality.
#' @return
#'
describe=function(x,by=NULL,xlabel=NULL,bylabel=NULL, plot=FALSE,
                  report="auto", showDescriptives=TRUE,pctBycol=TRUE, title="", digits=2){
  desc=function(x,by=NULL,xlabel=xlabel,bylabel=bylabel, plot=plot,
                    report=report, showDescriptives=TRUE,digits=digits){
    if (is.factor(x)|is.character(x))
      freqTable(x=x,by=by, xlabel=xlabel,bylabel=bylabel,plot=plot,
                showTable = showDescriptives, pctBycol=pctBycol, title=title, digits=digits)
    else summarize(x=x,by=by, xlabel=xlabel,bylabel=bylabel,plot=plot,
                   report=report,showSummary=showDescriptives,digits=digits)
  }
  panderOptions('knitr.auto.asis', FALSE)
  panderOptions('keep.line.breaks', TRUE)
  panderOptions('table.style',"multiline")
  dsby=deparse(substitute(by))
  if ((dsby!="NULL")& is.null(by)) stop(paste("Variable",dsby,"not found"))
  if (!is.null(by)&is.null(bylabel)) bylabel=toLabel(dsby)
  if (is.null(xlabel)){
    if (is.data.frame(x)) xlabel=names(x) else
        xlabel=toLabel(deparse(substitute(x)))
  } else
    if (is.data.frame(x)&!is.null(xlabel)&length(unique(xlabel))<NCOL(x)){
      txl=data.frame(table(xlabel))
      reps=which(txl$Freq>1)
      for (r in reps){
        wr=which(xlabel==txl[r,1])
        xlabel[wr]=paste(txl[r,1],1:txl[r,2],sep=".")
      }
    }
  if (is.data.frame(x)){
    if (length(report)<ncol(x)) report=rep(report[1],ncol(x))
    vv=validValues(x, by=by, byname=toLabel(dsby))
    NApresent=vv$haveNA
    resumen=NULL
    nms=names(vv$nValid)
    if (length(nms)==3) nms=nms[-3] else nms[length(nms)]="P"
    if (NApresent) nms=sapply(strsplit(nms,"\n"), function(x) x[1])
    test=NULL
    for (j in 1:ncol(x)){
      rj=desc(x=x[,j],by=by,xlabel=xlabel[j],bylabel=bylabel, plot=plot,
              report=report[j],showDescriptives = FALSE,digits=digits)
      resumen=rbind(resumen,setNames(rj,nms))
      test=c(test,strsplit(names(rj[length(rj)]),"\n")[[1]][1])
    }
    names(test)=xlabel
    if (showDescriptives) pander(resumen,split.table=Inf)
    if(NApresent){
      warning("Missing values are present. Not all the variables are evaluated on the same sample size.",
                                  call.=FALSE)
      return(invisible(list(summary=resumen, nValid=vv$nValid, test=test)))
    } else return(invisible(list(summary=resumen, test=test)))
  } else{
    resumen=desc(x,by=by,xlabel=xlabel,bylabel=bylabel, plot=plot,
                 report=report, showDescriptives=showDescriptives,digits=digits)
    return(invisible(list(summary=resumen)))
  }
}
angeloSdP/ULPGCmisc documentation built on May 10, 2019, 11:47 a.m.