R/distVarFunc.R

Defines functions distVarFunc

Documented in distVarFunc

#' distVarFunc
#' @param dfData a data.frame object or a character string indicating the name of the data.frame object.
#' @param dfColNameVec a string or a vector of class character indicating the name(s) of the column(s) to be plotted.
#' @param subDir a character string indicating the name of the subdirectory "output" and "plot" directories to save the output data.frame object (as a .xlsx file) and plot (as a .png file) respectively. If a subdirectory with the given name does not exist within output and/or plot, then it is created. If not specified, the outputs are saved in output/ and plot/. 
#' @param envir a variable indicating the environment where the output data.frame object should be saved.
#' @return distVarFunc returns plots of the empirical cumulative frequency distribution, one for the data in each of the columns in \code{dfColNameVec} of \code{dfData} considering the length of the column as total population. It saves these plots in the subdirectory \code{subDir} inside the "plot" directory within the current working directory. It also saves a data.frame object as a .xlsx file with only those columns from the original data.frame object \code{dfData} which have class factor or class numeric. It creates "output" and/or "plot" directories in the current working directory if not present already. Similarly, if \code{subDir} is specified, it creates a subdirectory with the name \code{subDir} within both output/ and plot/ if not already present, and saves the outputs in the respective subdirectories. If a subdirectory is not specified (i.e. missing \code{subDir}), then it saves the output .xlsx file in output/ and the plot in plot/.
#' @description distVarFunc takes as input a data.frame object \code{dfData}, a character vector containing column names (\code{dfColNameVec}), a character string indicating the name of the output subdirectory (\code{subDir}) and returns plots of the empirical cumulative frequency distribution, one for the data in each of the columns in \code{dfColName} of \code{dfData} considering the length of the column as total population. The plots are saved in the subdirectory \code{subDir} inside the "plot" directory within the current working directory. A data.frame object containing columns from the original data.frame object \code{dfData} which have class factor or class numeric is also created and saved as a .xlsx file in the subdirectory \code{subDir} inside the "output" directory within the current working directory. "output" and/or "plot" directories are created in the current working directory if not present already. Similarly, if \code{subDir} is specified, a subdirectory with the name \code{subDir} is created within both output/ and plot/ if not already present, and the outputs are saved in that subdirectory. If a subdirectory is not specified (i.e. missing \code{subDir}), then the output .xlsx file is saved in output/ and the plot is saved in plot/.
#' @examples
#' distVarFunc(iris,"Sepal.Length")
#' distVarFunc(iris,c("Sepal.Width","Sepal.Length"),"new_plots")
#' @export
distVarFunc<-function(dfData,dfColNameVec,subDir,envir=.GlobalEnv)
{
  
  if(is.character(dfData))
  {
    dfTab<-get(dfData)
  }else
  {
    dfTab<-dfData
    dfData<-deparse(substitute(dfData))
  }

  if(missing(dfColNameVec))
  {
    dfColNameVec<-names(dfTab)
  }

  #test or add plot directory
  if (!any(dir(getwd())=="plot"))
  {
    print("directory plot has been created in the present working directory")
    dir.create("plot/",recursive=T)
  }
  
  #test or add output directory
  if (!any(dir(getwd())=="output"))
  {
    print("directory output has been created in the present working directory")
    dir.create("output/",recursive=T)
  }
  
  # test or add subDir als directory
  if(missing(subDir))
  {
    dirOutput<-file.path(getwd(),"output")
    dirPlot<-file.path(getwd(),"plot")
  }else
  {
    dirOutput<-file.path(getwd(),"output",subDir)
    if(!dir.exists(dirOutput)){dir.create(dirOutput)}
    dirPlot<-file.path(getwd(),"plot",subDir)
    if(!dir.exists(dirPlot)){dir.create(dirPlot)}
  }
  
  #color blind friendly palette
  cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

  listXt<-list()
  
  for (i in dfColNameVec)
  {
    if(is.factor(dfTab[,i]))
    {
      xt<-table(dfTab[,i])
      listXt[[i]]<-as.table(xt)
      par(las=1,cex=1)
      barplot(xt,main=paste(dfData,i,sep="_"),col=cbbPalette)
      dev.copy(png, filename=paste(dirPlot,"/",i,"_",dfData,".png",sep=""),width=1000,height=1000, res=100)
      dev.off()
    }else
      {
        if(is.numeric(dfTab[,i]))
        {
          freqOrdPlot(dfTab,i,main=i,xlab=i)
          dev.copy(png, filename=paste(dirPlot,"/",i,"_",dfData,".png",sep=""),width=1000,height=1000, res=100)
          dev.off()
        }else
          {
            message(paste(i, " is a variable of class::character and has been excluded!"))
          }
      }
    
  }


  xTabsDf<-data.frame(X1=NULL,X2=NULL)
  for (i in names(listXt))
  {
    rows<-data.frame(variable=c(i,rep("",(length(listXt[[i]])-1))),data.frame(listXt[i]))
    names(rows)<-c("variables","levels","frequency")
    xTabsDf<-rbind(xTabsDf,rows)
  }
   write.xlsx(xTabsDf,paste(dirOutput,"/",dfData,"_xTabsDf.xlsx",sep=""),sheetName="crossTables",row.names=F)

  assign(paste(dfData,"_xTabsDf",sep=""),xTabsDf,envir=envir)
  assign(paste(dfData,"_list",sep=""),listXt,envir=envir)
}
lwTools/agriTrf documentation built on March 26, 2020, 12:09 a.m.