Nothing
#' @title pgu.data
#' @description
#' Handles the pguIMP dataset.
#' @details
#' Stores the pguIMP dataset as instance variable and keeps track of the attributes of interest.
#' Provides additionally fast access to several statistical information about the data set.
#' This object is used by the shiny based gui and is not for use in individual R-scripts!
#'
#' @format [R6::R6Class] object.
#' @importFrom R6 R6Class
#' @importFrom magrittr %>%
#' @importFrom dplyr pull select mutate rename summarise summarise_all select_if all_of
#' @importFrom purrr negate
#' @importFrom tibble tibble as_tibble is_tibble rownames_to_column
#' @importFrom tidyr gather
#'
#' @include pguFile.R
#'
#' @examples
#' require(dplyr)
#' require(tibble)
#' data(iris)
#' data_df <- iris %>%
#' tibble::as_tibble()
#' data_obj = pguIMP::pgu.data$new(data_df)
#' @author Sebastian Malkusch, \email{malkusch@@med.uni-frankfurt.de}
#' @export
#'
pgu.data <- R6::R6Class("pgu.data",
####################
# instance variables
####################
private = list(
.rawData = "tbl_df",
.attributeNames = "character",
.numericalAttributeNames = "character",
.categoricalAttributeNames = "character",
.classInformation = "tbl_df",
.statistics = "tbl_df",
.reducedStatistics = "tbl_df",
.missingsStatistics = "tbl_df",
#' @description
#' Clears the heap and
#' indicates that instance of pguIMP::pgu.data is removed from heap.
finalize = function()
{
print("Instance of pgu.data removed from heap")
}, #end pguIMP::pgu.data$finalize
#' @description
#' Summarizes information on the instance variable rawData
#' and retruns it in form of a compact data frame.
analyze_classes = function(){
private$.classInformation <- self$rawData %>%
dplyr::summarise_all(class) %>%
tidyr::gather(variable, class)
}, #end pguIMP::pgi.data$analyze_classes
#' @description
#' Summarizes a vector of numericals and returns summary.
#' @param val
#' Vector of numericals to be summarized.
#' (numeric)
summarize_numerical_data = function(val = "numeric")
{
if(!any(is.na(val))){
res <- c(summary(val),"NA's"=0)
}#if
else{
res <- summary(val)
}#else
return(res)
}, #end pguIMP::pgi.data$summarize_numerical_data
#' @description
#' Iterativley calls the function summarize_numerical_data on all numerical attributes
#' of the instance variable rawData and returns the result in form of a data frame.
calculate_statistics = function()
{
if (length(self$numerical_data()) > 0)
{
private$.statistics <- self$numerical_data() %>%
apply(MARGIN=2, FUN=private$summarize_numerical_data) %>%
t() %>%
as.data.frame() %>%
tibble::rownames_to_column("Value") %>%
tibble::as_tibble()
}else{
private$.statistics <- tibble::tibble(Value = character(0))
}
}, #end pguIMP::pgu.data$calculate_statistics
#' @description
#' Calls the function calculate_statistics
#' filters the result for the attribute names and arithmetic mean values.
#' and returns the result in form of a data frame.
reduce_statistics = function()
{
if ("Mean" %in% colnames(self$statistics))
{
private$.reducedStatistics <- self$statistics %>%
dplyr::select(c("Value", "Mean"))
}else{
private$.reducedStatistics <- tibble::tibble(Value = character(0),
Mean = numeric(0))
}
}, #end pguIMP::pgu.data$reduce_satatistics
#' @description
#' Calls the class' function dataStatistics
#' filters the result for the attribute names and information about missing values.
#' and returns the result in form of a data frame.
detect_missings = function()
{
if("NA's" %in% colnames(self$statistics))
{
len <- nrow(self$rawData)
private$.missingsStatistics <- self$statistics %>%
dplyr::select(c("Value", "NA's")) %>%
dplyr::rename(absolute = !!("NA's")) %>%
dplyr::mutate(fraction = absolute / len)
}else{
private$.missingsStatistics <- tibble::tibble(Value = character(0),
absolute = numeric(0),
fraction = numeric(0))
}
}#end pguIMP::pgu.data$detect_missings
), #end pguIMP::pgu.data$private
##################
# accessor methods
##################
active = list(
#' @field rawData
#' Returns the instance variable rawData
#' (tibble::tibble)
rawData = function()
{
return(private$.rawData)
},
#' @field setRawData
#' Sets the instance variable rawData
#' (tibble::tibble)
setRawData = function(data_df = "tbl_df")
{
if (!tibble::is_tibble(data_df)){
data_df <- tibble::tibble("Sample Name" := c(character(0)))
}
if(!"Sample Name" %in% colnames(data_df)){
sampleName <- seq.int(from =1, to = nrow(data_df), by =1)
data_df <- data_df %>%
dplyr::mutate("Sample Name" := sampleName) %>%
dplyr::select("Sample Name", dplyr::everything())
}
private$.rawData <- data_df %>%
dplyr::mutate("Sample Name" := as.character(data_df[["Sample Name"]]))
private$.attributeNames <- colnames(data_df)
private$.numericalAttributeNames <- self$rawData %>%
dplyr::select_if(is.numeric) %>%
colnames()
private$.categoricalAttributeNames <- self$rawData %>%
dplyr::select_if(purrr::negate(is.numeric)) %>%
colnames()
},
#' @field attributeNames
#' Returns the instance variable attributeNames
#' (character)
attributeNames = function()
{
return(private$.attributeNames)
},
#' @field numericalAttributeNames
#' Returns the instance variable numericalAttributeNames
#' (character)
numericalAttributeNames = function()
{
return(private$.numericalAttributeNames)
},
#' @field categoricalAttributeNames
#' Returns the instance variable categoricalAttributeNames
#' (character)
categoricalAttributeNames = function()
{
return(private$.categoricalAttributeNames)
},
#' @field classInformation
#' Returns the instance variable classInformation
#' (tibble::tibble)
classInformation = function()
{
return(private$.classInformation)
},
#' @field statistics
#' Returns the instance variable statistics
#' (tibble::tibble)
statistics = function()
{
return(private$.statistics)
},
#' @field reducedStatistics
#' Returns the instance variable reducedStatistics
#' (tibble::tibble)
reducedStatistics = function()
{
return(private$.reducedStatistics)
},
#' @field missingsStatistics
#' Returns the instance variable missingsStatistics
#' (tibble::tibble)
missingsStatistics = function()
{
return(private$.missingsStatistics)
}
), #end pguIMP::pgi.data$active
####################
# public functions #
####################
public = list(
#' @description
#' Creates and returns a new pguIMP::pgu.data object.
#' @param data_df
#' The data to be analyzed.
#' (tibble::tibble)
#' @return
#' A new pguIMP::pgu.data object.
#' (pguIMP::pgu.data)
initialize = function(data_df = "tbl_df")
{
self$setRawData <- data_df
}, #end pgiIMP::pgu.data$initialize
#' @description
#' Prints instance variables of a pguIMP::pgu.data object.
#' @return
#' string
print = function()
{
sprintf("\npgu.data\nrawData:\n") %>%
cat()
print(head(self$rawData))
sprintf("\nattributeNames:\n") %>%
cat()
print(self$attributeNames)
sprintf("\nnumericalAttributeNames:\n") %>%
cat()
print(self$numericalAttributeNames)
sprintf("\ncategoricalAttributeNames:\n") %>%
cat()
print(self$categoricalAttributeNames)
sprintf("\nclassInfromation:\n") %>%
cat()
print(self$classInformation)
sprintf("\nstatistics:\n") %>%
cat()
print(self$statistics)
sprintf("\nreducedStatistics:\n") %>%
cat()
print(self$reducedStatistics)
sprintf("\nmissingsStatistics:\n") %>%
cat()
print(self$missingsStatistics)
cat("\n\n")
invisible(self)
}, #end pguIMP::pgu.data$print
#' @description
#' Extracts information about the instance variable rawData.
fit = function()
{
private$analyze_classes()
private$calculate_statistics()
private$reduce_statistics()
private$detect_missings()
}, #end pguIMP::pgi.data$fit()
#' @description
#' Returns the index of an attribute within the instance variable attributeNames.
#' @param attribute
#' Attribute's name.
#' (character)
#' @return
#' Index of attribute's name in rawData
#' (numeric)
attribute_index = function(attribute = "character")
{
idx <- match(attribute, self$attributeNames)
if(is.na(idx)){
rString <- sprintf("\nWarning in pgu.data: attribute %s is not known\n",
attribute)
cat(rString)
}#if
return(idx)
}, #end pguIMP::pgu.data$attribute_idx()
#' @description
#' Returns the numeric attributes of the instance variable rawData.
#' @return
#' A data frame
#' (tibble::tibble)
numerical_data = function()
{
self$rawData %>%
dplyr::select_if(is.numeric) %>%
return()
}, #end pguIMP::pgu.data$numerical_data()
#' @description
#' Returns the categorical attributes of the instance variable rawData.
#' @return
#' A data frame
#' (tibble::tibble)
categorical_data = function()
{
self$rawData %>%
dplyr::select_if(purrr::negate(is.numeric)) %>%
return()
} #end pguIMP::pgi.data$categorical_data()
)#end pguIMP::pgi.data$public
)#end pguIMP::pgu.data
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.