Nothing
#' @title
#' Return the valueType of an object
#'
#' @description
#' Determines the valueType of an object based on [typeof()] and
#' [class()]. The possible values returned are 'date', 'boolean',
#' 'integer', 'decimal', and 'text'.
#'
#' @details
#' The valueType is a declared property of a variable that is required in
#' certain functions to determine handling of the variables. Specifically,
#' valueType refers to the
#' [OBiBa data type of a variable](https://opaldoc.obiba.org/en/dev/variables-data.html#value-types).
#' The valueType is specified in a data dictionary in a column 'valueType' and
#' can be associated with variables as attributes. Acceptable valueTypes
#' include 'text', 'integer', 'decimal', 'boolean', datetime', 'date'. The full
#' list of OBiBa valueType possibilities and their correspondence with R data
#' types are available using [valueType_list]. The valueType can be used to
#' coerce the variable to the corresponding data type.
#'
#' @seealso
#' [typeof()], [class()]
#' [Opal documentation](https://opaldoc.obiba.org/en/dev/magma-user-guide/value/type.html)
#'
#' @param x Object. Can be a vector.
#'
#' @returns
#' A character string which is the valueType of the input object.
#'
#' @examples
#' {
#'
#' # use madshapR_DEMO provided by the package
#'
#' dataset <- madshapR_DEMO$dataset_MELBOURNE
#' valueType_of(dataset$Gender)
#' valueType_of(iris$Sepal.Length)
#'
#' }
#'
#' @import dplyr tidyr fabR
#' @importFrom rlang .data
#'
#' @export
valueType_of <- function(x){
# check if the col is empty
if(is.list(x) & sum(nrow(x)) <= 1)
return(as_valueType(x = x[[1]], valueType))
# check if the col is a vector
if(is.list(x))
stop(call. = FALSE, "'list' object cannot be coerced to valueType")
type <- x %>% typeof()
class <- class(x)[[max(length(class(x)))]]
vT_list <- madshapR::valueType_list
valueType <-
unique(vT_list[
which(vT_list[['typeof']] == type &
vT_list[['class']] == class),]$`toValueType`)
if(type %in% c("character","double") & class == "Date") valueType <- "date"
if(type %in% c("character","double") & class == "POSIXt") valueType <- "datetime"
silently_run({
if(class == "factor"){
lvls <- attributes(x)$`levels` %>% as.character()
valueType <-
try({as_valueType(lvls,"integer");valueType <- "integer"},silent = TRUE)
if(class(valueType)[1] == "try-error") valueType <-
try({as_valueType(lvls,"decimal");valueType <- "decimal"},silent = TRUE)
if(class(valueType)[1] == "try-error") valueType <-
try({as_valueType(lvls,"date") ;valueType <- "date" },silent = TRUE)
if(class(valueType)[1] == "try-error") valueType <-
try({as_valueType(lvls,"boolean");valueType <- "boolean"},silent = TRUE)
if(class(valueType)[1] == "try-error") valueType <-
try({ valueType <- "text" },silent = TRUE)
}
})
if(length(valueType) == 0) valueType <- "text"
return(valueType)
}
#' @title
#' Guess and attribute the valueType of a data dictionary or dataset variable
#'
#' @description
#' Determines the valueType of an object based on [base::typeof()] and
#' [base::class()].
#' The possible values returned are 'date', 'boolean', 'integer', 'decimal', and
#' 'text'.
#'
#' @details
#' A data dictionary contains the list of variables in a dataset and metadata
#' about the variables and can be associated with a dataset. A data dictionary
#' object is a list of data frame(s) named 'Variables' (required) and
#' 'Categories' (if any). To be usable in any function, the data frame
#' 'Variables' must contain at least the `name` column, with all unique and
#' non-missing entries, and the data frame 'Categories' must contain at least
#' the `variable` and `name` columns, with unique combination of
#' `variable` and `name`.
#'
#' A dataset is a data table containing variables. A dataset object is a
#' data frame and can be associated with a data dictionary. If no
#' data dictionary is provided with a dataset, a minimum workable
#' data dictionary will be generated as needed within relevant functions.
#' Identifier variable(s) for indexing can be specified by the user.
#' The id values must be non-missing and will be used in functions that
#' require it. If no identifier variable is specified, indexing is
#' handled automatically by the function.
#'
#' The valueType is a declared property of a variable that is required in
#' certain functions to determine handling of the variables. Specifically,
#' valueType refers to the
#' [OBiBa data type of a variable](https://opaldoc.obiba.org/en/dev/variables-data.html#value-types).
#' The valueType is specified in a data dictionary in a column 'valueType' and
#' can be associated with variables as attributes. Acceptable valueTypes
#' include 'text', 'integer', 'decimal', 'boolean', datetime', 'date'. The full
#' list of OBiBa valueType possibilities and their correspondence with R data
#' types are available using [valueType_list]. The valueType can be used to
#' coerce the variable to the corresponding data type.
#'
#' @seealso
#' [valueType_adjust()]
#'
#' @param ... Object that can be either a dataset or a data dictionary.
#'
#' @returns
#' Either a data frame, identifying the dataset, or a list of data frame(s)
#' identifying a data dictionary, depending which the input refers to.
#'
#' @examples
#' {
#'
#' ###### Example : The valueType of a dataset can be adjusted. each column is
#' # evaluated as whole, and the best valueType match found is applied. If
#' # there is no better match found, the column is left as it is.
#'
#' head(valueType_self_adjust(mtcars['cyl']))
#'
#' }
#'
#' @import dplyr tidyr stringr fabR
#' @importFrom rlang .data
#'
#' @export
valueType_self_adjust <- function(...){
# is dataset
if(is_dataset(...) & !is_data_dict(...)){
dataset <- as_dataset(...,col_id = col_id(...))
{
if(ncol(dataset) == 0) return(dataset)
if(nrow(dataset) == 0) return(dataset)
preserve_attributes <- col_id(dataset)
is_factor <-
dataset %>%
summarise(across(everything(), ~ toString(class(.)))) %>%
pivot_longer(everything()) %>%
dplyr::filter(.data$`value` %in% c("factor"))
data_dict <- data_dict_extract(dataset)
data_dict[['Categories']] <-
bind_rows(
Categories = tibble(name = as.character(),variable = as.character()),
data_dict[['Categories']])
vT <-
dataset %>%
reframe(across(everything(),~ valueType_guess(.))) %>%
pivot_longer(everything())
for(i in names(dataset)) {
dataset[[i]] <-
as_valueType(
x = dataset[[i]],
valueType = vT$value[vT$name == i])
}
data_dict_final <- data_dict_extract(dataset)
data_dict[['Variables']]['valueType'] <- NULL
data_dict_final[['Variables']] <-
data_dict_final[['Variables']][c('name','valueType')] %>%
left_join(data_dict[['Variables']], by = c("name"))
data_dict_final <- c(data_dict_final['Variables'], data_dict['Categories'])
dataset <-
data_dict_apply(dataset, data_dict_final) %>%
mutate(across(c(is_factor$`name`), ~ as.factor(.))) %>%
as_dataset(col_id = preserve_attributes)
}
return(dataset)
}
# is data_dict
if(!is_dataset(...) & is_data_dict(...)){
data_dict <- as_data_dict_shape(...)
attributes(data_dict)$`madshapR::class` <- attributes(...)$`madshapR::class`
if(nrow(data_dict[['Variables']]) == 0) return(data_dict)
if(sum(nrow(data_dict[['Categories']])) == 0){
# warning("Your data dictionary contains no categorical variables.")
return(data_dict)
}else{
category_outcomes <-
data_dict[['Categories']] %>%
select("name") %>% distinct %>%
rowwise() %>%
mutate(valueType = valueType_guess(.data$`name`))
category_outcomes <-
data_dict[['Categories']] %>%
select(.data$`variable`,.data$`name`) %>%
left_join(category_outcomes, by = "name") %>%
select(.data$`variable`,.data$`valueType`) %>%
distinct %>%
group_by(.data$`variable`) %>%
summarise(valueType = paste0(.data$`valueType`,collapse = "|"))
category_outcomes <-
data_dict[['Categories']] %>%
select(.data$`variable`,.data$`name`) %>%
left_join(category_outcomes, by = "variable") %>%
group_by(.data$`variable`) %>% group_split() %>%
lapply(function(x){
test_vT <- str_detect(x$valueType[1], "\\|")
if(test_vT) x <-
x %>% mutate(valueType = valueType_guess(unique(x$name)))
return(x)
}) %>%
bind_rows() %>%
left_join(madshapR::valueType_list, by = "valueType") %>%
select(
name = .data$`variable`,
proposed_tO = .data$`typeof`,
proposed_vT = .data$`valueType`) %>%
distinct
if(length(data_dict[['Variables']][['typeof']]) > 0){
data_dict_tO <-
data_dict[['Variables']] %>% select(.data$`name`,.data$`typeof`) %>%
left_join(category_outcomes, by = "name") %>%
mutate(
proposed_tO =
ifelse(is.na(.data$`proposed_tO`),
.data$`typeof`,.data$`proposed_tO`)) %>%
mutate(
`proposed_tO` =
replace_na(.data$`proposed_tO`,'character')) %>%
select(typeof = .data$`proposed_tO`)
data_dict[['Variables']]['typeof'] <- data_dict_tO
}
if(length(data_dict[['Variables']][['valueType']]) > 0){
data_dict_vT <-
data_dict[['Variables']] %>%
select(.data$`name`,.data$`valueType`) %>%
left_join(category_outcomes, by = "name") %>%
mutate(
proposed_vT =
ifelse(is.na(.data$`proposed_vT`),
.data$`valueType`,.data$`proposed_vT`)) %>%
mutate(`proposed_vT` = replace_na(.data$`proposed_vT`,'text')) %>%
select(valueType = .data$`proposed_vT`)
data_dict[['Variables']]['valueType'] <- data_dict_vT
}
if(length(data_dict[['Variables']][['valueType']]) == 0 &
length(data_dict[['Variables']][['typeof']]) == 0 ) {
data_dict_vT <-
data_dict[['Variables']] %>%
left_join(category_outcomes, by = "name") %>%
rename(typeof = .data$`proposed_tO`, valueType = .data$`proposed_vT`)}
}
return(data_dict)
}
message("The argument is neither a dataset or a data dictionary.")
message("\nTesting dataset :")
try(as_dataset(...))
message("\nTesting data dictionary :")
try(as_data_dict(...))
silently_run(stop(call. = FALSE))
}
#' @title
#' Attribute the valueType from a data dictionary to a dataset, or vice versa
#'
#' @description
#' Takes the valueType of the input (from) and attributes it to the output (to).
#' The parameters 'from' and 'to' can be either a dataset or a data dictionary.
#' Depending on the input provided, the valueType replaced is either in the
#' 'valueType' column of a data dictionary or cast to a column in a dataset.
#' If 'to' is not provided, the function calls [valueType_self_adjust()]
#' instead. The possible values returned are 'date', 'boolean', 'integer',
#' 'decimal', and text'.
#'
#' @details
#' A data dictionary contains the list of variables in a dataset and metadata
#' about the variables and can be associated with a dataset. A data dictionary
#' object is a list of data frame(s) named 'Variables' (required) and
#' 'Categories' (if any). To be usable in any function, the data frame
#' 'Variables' must contain at least the `name` column, with all unique and
#' non-missing entries, and the data frame 'Categories' must contain at least
#' the `variable` and `name` columns, with unique combination of
#' `variable` and `name`.
#'
#' A dataset is a data table containing variables. A dataset object is a
#' data frame and can be associated with a data dictionary. If no
#' data dictionary is provided with a dataset, a minimum workable
#' data dictionary will be generated as needed within relevant functions.
#' Identifier variable(s) for indexing can be specified by the user.
#' The id values must be non-missing and will be used in functions that
#' require it. If no identifier variable is specified, indexing is
#' handled automatically by the function.
#'
#' The valueType is a declared property of a variable that is required in
#' certain functions to determine handling of the variables. Specifically,
#' valueType refers to the
#' [OBiBa data type of a variable](https://opaldoc.obiba.org/en/dev/variables-data.html#value-types).
#' The valueType is specified in a data dictionary in a column 'valueType' and
#' can be associated with variables as attributes. Acceptable valueTypes
#' include 'text', 'integer', 'decimal', 'boolean', datetime', 'date'. The full
#' list of OBiBa valueType possibilities and their correspondence with R data
#' types are available using [valueType_list]. The valueType can be used to
#' coerce the variable to the corresponding data type.
#'
#' @seealso
#' [valueType_self_adjust()]
#'
#' @param from Object to be adjusted. Can be either a dataset or a data
#' dictionary.
#' @param to Object to be adjusted. Can be either a dataset or a data
#' dictionary. NULL by default.
#'
#' @returns
#' Either a data frame, identifying the dataset, or a list of data frame(s)
#' identifying a data dictionary, depending which is 'to'.
#'
#' @examples
#' {
#'
#' # use madshapR_DEMO provided by the package
#' library(dplyr)
#'
#' dataset <- madshapR_DEMO$dataset_TOKYO[c(1:4),'prg_ever']
#' data_dict <-
#' madshapR_DEMO$data_dict_TOKYO %>%
#' data_dict_filter(filter_var = 'name == "prg_ever"') %>%
#' as_data_dict_mlstr()
#'
#' head(valueType_adjust(from = data_dict,to = dataset))
#'
#' }
#'
#' @import dplyr tidyr
#' @importFrom crayon bold
#' @importFrom rlang .data
#'
#' @export
valueType_adjust <- function(from, to = NULL){
# test dataset
if(is.null(to)) return(valueType_self_adjust(from))
# apply the data dictionary of the dataset to the data dictionary
if(is_dataset(from) & is_data_dict(to)){
as_dataset(from) # no col_id
as_data_dict_shape(to)
dataset <- from
data_dict <- to
# dataset must match
if(suppressWarnings(check_dataset_variables(dataset, data_dict)) %>%
dplyr::filter(str_detect(.data$`condition`,"\\[ERR\\]")) %>% nrow > 0){
stop(call. = FALSE,
"Names across your data dictionary differ from names across the dataset.",
bold("\n\nUseful tip:"),
" Use dataset_evaluate(dataset, data_dict) for a full assessment of the dataset"
)}
if(ncol(dataset) == 0) return(data_dict)
vT_data_dict <-
tibble(name = rep(names(dataset)),
valueType = rep(NA_character_,ncol(dataset)))
for(i in names(dataset)){
cat_i <- data_dict$Categories[data_dict$Categories[['variable']] == i,'name']$`name`
if(length(cat_i) == 0){
if(all(is.na(dataset[[i]]))){
dataset[[i]] <-
as_valueType(dataset[[i]],
ifelse(
is.null(data_dict$Variables[["valueType"]])|
toString(data_dict$Variables[
data_dict$Variables[["name"]] == i,][['valueType']]) %in% c("NA",""),
valueType_of(dataset[[i]]),
data_dict$Variables[
data_dict$Variables[["name"]] == i,][['valueType']])
)}
vT_data_dict[vT_data_dict[["name"]] == i,][['valueType']] <-
valueType_of(dataset[[i]])
}else{
test_vec <- silently_run(unique(c(cat_i,unique(dataset[[i]]))))
if(class(test_vec)[[1]] == 'try-error')
test_vec <- unique(c(as.character(cat_i),as.character(unique(dataset[[i]]))))
if(all(is.na(dataset[[i]]))){
dataset[[i]] <-
as_valueType(dataset[[i]],
ifelse(
is.null(data_dict$Variables[["valueType"]])|
is.na(data_dict$Variables[data_dict$Variables[["name"]] == i,][['valueType']]),
valueType_guess(cat_i),
data_dict$Variables[data_dict$Variables[["name"]] == i,][['valueType']]))
}else{
test_vT <- silently_run(as_valueType(test_vec,'integer'))
}
if(class(test_vT)[[1]] == 'try-error')
test_vT <- as_valueType(test_vec,valueType_guess(test_vec))
vT_data_dict[vT_data_dict[["name"]] == i,][['valueType']] <- valueType_of(test_vT)
}
}
vT_list<- madshapR::valueType_list
vT_data_dict <-
left_join(vT_data_dict,vT_list, by = "valueType") %>%
select("name", valueType_data_dict = "valueType",typeof_data_dict = "typeof")
vT_dataset <-
dataset %>%
summarise(across(everything(), ~ valueType_of(.))) %>%
pivot_longer(cols = everything()) %>%
rename(valueType = "value") %>%
left_join(vT_list, by = "valueType") %>%
select("name", valueType_dataset = "valueType",typeof_dataset = "typeof")
vT_final <-
vT_data_dict %>%
full_join(vT_dataset,by = join_by('name')) %>%
mutate(valueType = ifelse(
.data$`valueType_data_dict` == "integer",
.data$`valueType_dataset`,
.data$`valueType_data_dict`)) %>%
mutate(typeof = ifelse(
.data$`typeof_data_dict` == "integer",
.data$`typeof_dataset`,
.data$`typeof_data_dict`)) %>%
select('name','valueType','typeof')
data_dict[['Variables']]['typeof'] <-
data_dict[['Variables']]['name'] %>%
left_join(vT_final %>%
select("name", "typeof"), by = "name") %>%
select("typeof")
# }
# if(length(data_dict[['Variables']][['valueType']]) > 0){
data_dict[['Variables']]['valueType'] <-
data_dict[['Variables']]['name'] %>%
left_join(vT_final %>%
select("name", "valueType"), by = "name") %>%
select("valueType")
# }
data_dict <- as_data_dict_mlstr(data_dict)
return(data_dict)
# }
}
if(is_data_dict(from) & is_dataset(to)){
# test data_dict
tryCatch({data_dict <-
as_data_dict_mlstr(from, name_standard = FALSE)},
warning = function(cond){
stop(call. = FALSE,cond)})
# test dataset
dataset <- as_dataset(to,col_id = attributes(to)$`madshapR::col_id`)
preserve_attributes <- attributes(dataset)$`madshapR::col_id`
# dataset must match
if(suppressWarnings(check_dataset_variables(dataset, data_dict)) %>%
dplyr::filter(str_detect(.data$`condition`,"\\[ERR\\]")) %>% nrow > 0){
stop(call. = FALSE,
"Names across your data dictionary differ from names across the dataset.",
bold("\n\nUseful tip:"),
" Use dataset_evaluate(dataset, data_dict) for a full assessment of the dataset"
)}
if(ncol(dataset) == 0) return(dataset)
data_dict_data <-
data_dict_extract(dataset) %>%
as_data_dict_mlstr(name_standard = FALSE)
is_factor <-
dataset %>%
reframe(across(everything(), ~ class(.))) %>%
pivot_longer(everything()) %>%
dplyr::filter(.data$`value` == "factor")
data_dict_data[['Variables']] <-
data_dict_data[['Variables']] %>%
select(-"valueType") %>%
left_join(data_dict[['Variables']] %>%
select("name", "valueType"),by = "name")
for(i in names(dataset)){
dataset[[i]] <-
as_valueType(
x = dataset[[i]],
valueType = data_dict[['Variables']][[
which(data_dict[['Variables']]$`name` == i),
'valueType']])}
dataset <-
data_dict_apply(dataset, data_dict_data) %>%
mutate(across(c(is_factor$`name`), ~ as.factor(.))) %>%
as_dataset(col_id = preserve_attributes)
return(dataset)
}
if(is_dataset(from) & is_dataset(to))
stop(call. = FALSE, "The argument are both datasets.")
if(is_data_dict(from) & is_data_dict(to))
stop(call. = FALSE, "The argument are both data dictionaries.")
if(is_dataset(to)) {
message("The argument is not a data dictionary.")
as_data_dict(from) }
if(is_dataset(from)) {
message("The argument is not a data dictionary.")
as_data_dict(to) }
if(is_data_dict(to)) {
message("The argument is not a dataset.")
as_data_dict(from) }
if(is_data_dict(from)) {
message("The argument is not a dataset.")
as_dataset(to) }
message(
"The arguments are neither a dataset nor a data dictionary.")
silently_run(stop(call. = FALSE))
}
#' @title
#' Guess the first possible valueType of an object (Can be a vector)
#'
#' @description
#' Provides the first possible valueType of a variable. The function tries to
#' assign the valueType of the object first to 'boolean', then 'integer', then
#' 'decimal', then 'date'. If all others fail, the default valueType is 'text'.
#'
#' @details
#' A data dictionary contains the list of variables in a dataset and metadata
#' about the variables and can be associated with a dataset. A data dictionary
#' object is a list of data frame(s) named 'Variables' (required) and
#' 'Categories' (if any). To be usable in any function, the data frame
#' 'Variables' must contain at least the `name` column, with all unique and
#' non-missing entries, and the data frame 'Categories' must contain at least
#' the `variable` and `name` columns, with unique combination of
#' `variable` and `name`.
#'
#' A dataset is a data table containing variables. A dataset object is a
#' data frame and can be associated with a data dictionary. If no
#' data dictionary is provided with a dataset, a minimum workable
#' data dictionary will be generated as needed within relevant functions.
#' Identifier variable(s) for indexing can be specified by the user.
#' The id values must be non-missing and will be used in functions that
#' require it. If no identifier variable is specified, indexing is
#' handled automatically by the function.
#'
#' The valueType is a declared property of a variable that is required in
#' certain functions to determine handling of the variables. Specifically,
#' valueType refers to the
#' [OBiBa data type of a variable](https://opaldoc.obiba.org/en/dev/variables-data.html#value-types).
#' The valueType is specified in a data dictionary in a column 'valueType' and
#' can be associated with variables as attributes. Acceptable valueTypes
#' include 'text', 'integer', 'decimal', 'boolean', datetime', 'date'. The full
#' list of OBiBa valueType possibilities and their correspondence with R data
#' types are available using [valueType_list]. The valueType can be used to
#' coerce the variable to the corresponding data type.
#'
#' @seealso
#' [Opal documentation](https://opaldoc.obiba.org/en/dev/magma-user-guide/value/type.html)
#'
#' @param x Object. Can be a vector.
#'
#' @returns
#' A character string which is the first possible valueType of the input object.
#'
#' @examples
#' {
#'
#' # use madshapR_DEMO provided by the package
#'
#' dataset <- madshapR_DEMO$dataset_TOKYO
#' valueType_guess(dataset$dob)
#'
#' valueType_guess(mtcars$cyl)
#'
#'}
#'
#' @import dplyr tidyr fabR
#' @importFrom rlang .data
#'
#' @export
valueType_guess <- function(x){
# check if the col is empty
if(is.list(x) & sum(nrow(x)) <= 1)
return(valueType_guess(x = x[[1]]))
# check if the col is a vector
if(is.list(x))
stop(call. = FALSE,"'list' object cannot be coerced to valueType")
# check if all is na
if(all(is.na(x))) return(valueType_of(x))
# else :
x <- unique(x)
x <- x[!is.na(x)]
vT_list <- madshapR::valueType_list
test_vT_integer <-
silently_run(as_valueType(as.character(x),"integer"))
if(class(test_vT_integer)[[max(length(class(test_vT_integer)))]][1] == 'integer'){
if(is.logical(x)){
return('boolean')}
return('integer')
}
test_vT_decimal <-
silently_run(as_valueType(as.character.default(x),"decimal"))
if(class(test_vT_decimal)[[1]] != 'try-error'){
test_vT_date <- silently_run(as_valueType(x ,"date"))
if(class(test_vT_date)[[1]] != 'try-error'){
return('date')}
test_vT_datetime <- silently_run(as_valueType(x ,"datetime"))
if(class(test_vT_datetime)[[1]] != 'try-error'){
return('datetime')}
return('decimal')
}
test_vT_date <- silently_run(as_valueType(x ,"date"))
if(class(test_vT_date)[[1]] != 'try-error'){
return('date')}
test_vT_datetime <- silently_run(as_valueType(x ,"datetime"))
if(class(test_vT_datetime)[[1]] != 'try-error'){
return('datetime')}
return(valueType_of(x))
# t1 = Sys.time()
# # test_vT_boolean <-
# # silently_run(as_valueType(as.character.default(x),"boolean"))
# #
# # test_vT_integer <-
# # silently_run(as_valueType(as.character.default(x),"integer"))
#
# test_vT_decimal <-
# silently_run(as_valueType(as.character.default(x),"decimal"))
#
# test_vT_date <-
# silently_run(as_valueType( x ,"date"))
#
# test_vT_datetime <-
# silently_run(as_valueType( x ,"datetime"))
#
# test_vT_text <-
# as_valueType( x , "text")
#
# t2 = Sys.time()
# test_vT <-
# tribble(
# ~`valueType` ,~`class` ,
# # "boolean" ,
# # class(test_vT_boolean)[[max(length(class(test_vT_boolean)))]][1],
# #
# # "integer" ,
# # class(test_vT_integer)[[max(length(class(test_vT_integer)))]][1],
# #
# "decimal" ,
# class(test_vT_decimal)[[max(length(class(test_vT_decimal)))]][1],
#
# "date" ,
# class(test_vT_date)[[max(length(class(test_vT_date)))]][1],
#
# "datetime" ,
# class(test_vT_datetime)[[max(length(class(test_vT_datetime)))]][1]
#
# ) %>%
# dplyr::filter(.data$`class` != "try-error") %>%
# summarise(
# valueType = paste0(.data$`valueType`,collapse = "|"),
# class = paste0(.data$`class`,collapse = "|")) %>%
# mutate(
# valueType =
# case_when(
# # .data$`valueType` == "boolean|integer" ~ "boolean" ,
# .data$`valueType` == "boolean|integer|decimal" ~ "integer" ,
# # .data$`valueType` == "integer|decimal" ~ "integer" ,
# .data$`valueType` == "integer|decimal|date" ~ "date" ,
# .data$`valueType` == "integer|decimal|datetime" ~ "datetime" ,
# .data$`valueType` == "decimal|date" ~ "date" ,
# .data$`valueType` == "date|datetime" ~ "date" ,
# .data$`valueType` == "boolean|integer|decimal|date" ~ valueType_of(x),
# TRUE ~ .data$`valueType`
# )) %>% pull(.data$`valueType`)
#
# if(test_vT == "") test_vT <- 'text'
#
# message(paste0(test_vT," ",t2-t1))
# return(test_vT)
}
#' @title
#' Validate and coerce any object according to a given valueType
#'
#' @description
#' Attributes a valueType to an object, that can be a vector, or in a data frame
#' using [dplyr::mutate].
#'
#' @details
#' The valueType is a declared property of a variable that is required in
#' certain functions to determine handling of the variables. Specifically,
#' valueType refers to the
#' [OBiBa data type of a variable](https://opaldoc.obiba.org/en/dev/variables-data.html#value-types).
#' The valueType is specified in a data dictionary in a column 'valueType' and
#' can be associated with variables as attributes. Acceptable valueTypes
#' include 'text', 'integer', 'decimal', 'boolean', datetime', 'date'. The full
#' list of OBiBa valueType possibilities and their correspondence with R data
#' types are available using [valueType_list]. The valueType can be used to
#' coerce the variable to the corresponding data type.
#'
#' @seealso
#' [Opal documentation](https://opaldoc.obiba.org/en/dev/magma-user-guide/value/type.html)
#'
#' @param x Object to be coerced. Can be a vector.
#' @param valueType A character string of the valueType used to coerce x.
#'
#' @returns
#' The object coerced accordingly to the input valueType.
#'
#' @examples
#' {
#'
#' # use madshapR_DEMO provided by the package
#'
#' dataset <- madshapR_DEMO$dataset_TOKYO
#' as_valueType(head(dataset$dob),'date')
#'
#' # as_valueType is compatible with tidyverse philosophy
#' library(dplyr)
#' mtcars %>% mutate(cyl = as_valueType(cyl,'integer')) %>% head()
#'
#'}
#'
#' @import dplyr tidyr fabR
#' @importFrom crayon bold
#' @importFrom rlang .data
#'
#' @export
as_valueType <- function(x, valueType = 'text'){
# check if the col is empty
if(is.list(x) & sum(nrow(x)) <= 1) return(as_valueType(x = x[[1]], valueType))
# check if the col is a vector
if(is.list(x))
stop(call. = FALSE,"'list' object cannot be coerced to valueType")
class_x <- class(x)[[max(length(class(x)))]]
x_init <- x
# if x is already the output format, no need to go further
if(class_x == "Date" & valueType == "date") return(x)
if(class_x == "POSIXt" & valueType == "datetime") return(x)
if(is.integer(x) & valueType == "integer") return(x)
if(class_x == "numeric" & valueType == "decimal") return(x)
if(is.logical(x) & valueType == "boolean") return(x)
if(is.na(valueType) | valueType == "text") return(as.character.default(x))
vT_list <- madshapR::valueType_list
# check if valueType exists
if(!valueType %in% vT_list$`valueType`) {
stop(call. = FALSE,
"\nThe valueType provided does not exists. Please refer to documentation.",
bold("\n\nUseful tip:"),
" Use data_dict_evaluate(data_dict) to get a full assessment of your
data dictionary")}
dataType <- vT_list[[which(vT_list['valueType'] == valueType),'call']]
if(dataType == "as_any_date") x <-
as.character.default(x)
if(dataType == "as_any_boolean") x <-
return(as_any_boolean(as.character.default(x)))
if(dataType == "as_any_integer") x <-
return(as_any_integer(as.character.default(x)))
if(class(x)[1] == "factor") x <-
as.character.default(x)
if(dataType == "as_any_date"){
if(class_x == "POSIXt"){
x <-
as_valueType(x,'integer') %>%
as.POSIXct.numeric(tz = 'UTC') %>%
as.character()
}
date_format <-
guess_date_format(
tibble(as.character.default(
sample(x[!is.na(x)], size = min(length(x[!is.na(x)]),20)))))
if(date_format$`% values formated` == 100){
x_temp <- as_any_date(as.character.default(x), date_format$`Date format`)
}else{x_temp <- NA}
}else{
x_temp <- do.call(dataType, list(x)) %>% unlist
}
condition <- tibble(to_test = x_temp, original = x)
if(length(x_temp) == 0){
return(x_temp)}
if(valueType %in% c("text","locale","point","linestring","polygon","binary")){
return(x_temp)}
if(!all(is.na(condition$`to_test`) == is.na(condition$`original`))){
test_condition <- FALSE
}else{
test_condition <-
distinct(condition[which(!is.na(condition['original'])),])
if(valueType %in% c("integer","decimal")){
test_condition <-
test_condition %>%
mutate(across(everything(), ~ as.numeric(as.character.default(.)))) %>%
mutate(test = .data$`to_test` == .data$`original`) %>%
pull(.data$`test`) %>% all}
if(valueType %in% c("boolean")){
test_condition <-
test_condition %>%
mutate(
across(everything(), ~ as_any_boolean(as.character.default(.)))) %>%
mutate(test = .data$`to_test` == .data$`original`) %>%
pull(.data$`test`) %>% all}
if(valueType %in% c("date")){
test_condition <-
test_condition %>%
mutate(across(
"original",
~ as_any_date(as.character.default(.),date_format$`Date format`))) %>%
mutate(
test = toString(.data$`to_test`) == toString(.data$`original`)) %>%
pull(.data$`test`) %>% all}
if(valueType %in% c("datetime")){
test_condition <-
test_condition %>%
mutate(
across(everything(), ~ as.POSIXct.default(.))) %>%
mutate(test = .data$`to_test` == .data$`original`) %>%
pull(.data$`test`) %>% all}
}
# test if dataset and data_dict content match
if(test_condition == FALSE){
stop(call. = FALSE,
"\n
The valueType conflicts with the data type. Object cannot be coerced to
valueType",
bold("\n\nUseful tip:"),
" Use valueType_guess(x) to evaluate the first potential valueType.
For further investigation, you can use dataset_evaluate(dataset, data_dict).")
}
return(x_temp)
}
#' @title
#' Validate and coerce any object as a taxonomy
#'
#' @description
#' Confirms that the input object is a valid taxonomy and returns it as a
#' taxonomy with the appropriate `madshapR::class` attribute. This function
#' mainly helps validate input within other functions of the package but could
#' be used to check if a taxonomy is valid.
#'
#' @details
#' A taxonomy is a classification schema that can be defined for variable
#' attributes. A taxonomy is usually extracted from an
#' [Opal environment](https://www.obiba.org/pages/products/opal/), and a
#' taxonomy object is a data frame that must contain at least the columns
#' `taxonomy`, `vocabulary`, and `terms`. Additional details about Opal
#' taxonomies are
#' [available online](https://opaldoc.obiba.org/en/latest/web-user-guide/administration/taxonomies.html).
#'
#' @seealso
#' [Opal documentation](https://opaldoc.obiba.org/en/dev/magma-user-guide/value/type.html)
#'
#' @param object A potential taxonomy to be coerced.
#'
#' @returns
#' A list of data frame(s) with `madshapR::class` 'taxonomy'.
#'
#' @examples
#' {
#'
#' # use madshapR_DEMO provided by the package
#'
#' ###### Example
#' as_taxonomy(madshapR_DEMO$taxonomy_PARIS)
#'
#'}
#'
#' @import dplyr tidyr
#' @importFrom rlang .data
#'
#' @export
as_taxonomy <- function(object){
# check if names in object exist
if(sum(names(object) %in% c("taxonomy","vocabulary" ,"term")) != 3){
stop(call. = FALSE,
"\n
This object is not a taxonomy as defined by Maelstrom standards, which must
be a data frame containing at least 'taxonomy', 'vocabulary' and 'term' columns.
Please refer to documentation.",
# bold("\n\nUseful tip:"),
# " Use taxonomy_opal_get(opal) or taxonomy_opal_mlstr_get(opal) to get
# the taxonomy present in your Opal environment."
)}
# check if names in taxonomy exist
if(sum(names(object) %in%
c("vocabulary_short","taxonomy_scale",
"vocabulary_scale","term_scale")) == 4){
## create index if not exists
attributes(object)$`madshapR::class` <- "taxonomy_mlstr"
}else{
attributes(object)$`madshapR::class` <- "taxonomy_opal"}
return(object)
}
#' @title
#' Test if a character object is one of the valid valueType values
#'
#' @description
#' Confirms whether the input object is a valid valueType. This function mainly
#' helps validate input within other functions of the package but could be used
#' to check if a valueType is valid.
#'
#' @details
#' The valueType is a declared property of a variable that is required in
#' certain functions to determine handling of the variables. Specifically,
#' valueType refers to the
#' [OBiBa data type of a variable](https://opaldoc.obiba.org/en/dev/variables-data.html#value-types).
#' The valueType is specified in a data dictionary in a column 'valueType' and
#' can be associated with variables as attributes. Acceptable valueTypes
#' include 'text', 'integer', 'decimal', 'boolean', datetime', 'date'. The full
#' list of OBiBa valueType possibilities and their correspondence with R data
#' types are available using [valueType_list]. The valueType can be used to
#' coerce the variable to the corresponding data type.
#'
#' @seealso
#' [Opal documentation](https://opaldoc.obiba.org/en/dev/magma-user-guide/value/type.html)
#'
#' @param object A potential valueType name to be evaluated.
#'
#' @returns
#' A logical.
#'
#' @examples
#' {
#'
#' is_valueType('integer')
#' is_valueType('integre')
#'
#'}
#'
#' @import dplyr tidyr
#' @importFrom rlang .data
#'
#' @export
is_valueType <- function(object){
object <- object
vT_list <- madshapR::valueType_list
# check if valueType exists
if(!all(object %in% vT_list$`valueType`)) return(FALSE)
# else
return(TRUE)
}
#' @title
#' Test if an object is a valid taxonomy
#'
#' @description
#' Confirms whether the input object is a valid taxonomy. This function mainly
#' helps validate input within other functions of the package but could be
#' used to check if a taxonomy is valid.
#'
#' @details
#' A taxonomy is a classification schema that can be defined for variable
#' attributes. A taxonomy is usually extracted from an
#' [Opal environment](https://www.obiba.org/pages/products/opal/), and a
#' taxonomy object is a data frame that must contain at least the columns
#' `taxonomy`, `vocabulary`, and `terms`. Additional details about Opal
#' taxonomies are
#' [available online](https://opaldoc.obiba.org/en/latest/web-user-guide/administration/taxonomies.html).
#'
#' @param object A potential taxonomy to be evaluated.
#'
#' @returns
#' A logical.
#'
#' @examples
#' {
#'
#' # use madshapR_DEMO provided by the package
#'
#' is_taxonomy(madshapR_DEMO$taxonomy_PARIS)
#'
#'}
#'
#' @import dplyr tidyr fabR
#' @importFrom rlang .data
#'
#' @export
is_taxonomy <- function(object){
object <- object
# if only the data frame is given in parameter
test <- silently_run(try(as_taxonomy(object),silent = TRUE))
if(class(test)[1] == 'try-error') return(FALSE)
return(TRUE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.