#'Summary of database
#'
#'@param dataset A database to summary
#'@return A summary of data base
#'@export
Summ_data <- function(dataset){
tipoNum <- sum(sapply(dataset, class) %in% c("double", "numeric"))
tipoDis <- sum(sapply(dataset, class) %in% c("double", "numeric"))
tipoNAs <- sum(sapply(as.data.frame(sapply(dataset, is.na)),sum))
return(cat(paste("La base de datos consta de", ncol(dataset), "columnas de", nrow(dataset),
"observaciones cada una y con un total de" , sum(is.na(dataset)),
"valores perdidos.\n"),
paste("De estas columnas,", tipoDis,
"son discretas,", tipoNum, "son continuas y", tipoNAs,
"estan completamente vacias.")))
}
#'Eliminate empty columns of dataset
#'
#'@param dataset A database to summary
#'@param perc A numeric value.
#'@return A dataset without empty columns
#'@export
all_miss_col <- function(dataset, perc){
int_dat <- sum(sapply(as.data.frame(sapply(iris, is.na)),sum))
if(int_dat == 0){
warning("No se tienen columnas completamente vacias.")
return(dataset)
}
col_na <- which(colMeans(is.na(dataset))>(perc/100))
return(dataset[,-col_na])
}
#'Change columns class
#'
#'@param dataset A database to use
#'@param columns A character vector with the names of columns to change
#'@param prop A character value with the class propriety: "factor" or "double"
#'@import tidyverse
#'@return A dataset object.
#'@export
cambio_prop <- function(dataset, columns, prop){
if (!is.data.frame(dataset)) {return(warning("dataset is not a data.frame object"))}
if (prop == "factor") {return(dataset %>% mutate_at(names(dataset[columns]), as.factor))}
if (prop == "double") {return(dataset %>% mutate_at(names(dataset[columns]), as.double))}
}
#'Impute missing values
#'
#'@param dataset A database to use
#'@param method A character value with method name: "omit" or "RF"
#'@import missForest
#'@return A dataset object.
#'@export
imp_data <- function(dataset, method){
if (!method %in% c("omit", "RF")) {warning("method not found");return(dataset)}
if (method == "omit") {return(na.omit(dataset))}
if (method == "RF") {return(missForest(dataset)$ximp)}
}
#'Summary of outliers and influence points
#'
#'@param data A database to use
#'@param var_est Acharacter value with the column name to study
#'@importFrom stats lm
#'@importFrom stats na.omit
#'@import olsrr
#'@import dplyr
#'@return A cat summary
#'@export
out_inf_points <- function(data, var_est){
data_aux <- data[which(sapply(data, class)=="numeric")]
data_aux2 <- data_aux[-which(names(data_aux)==var_est)]
model <- lm(data_aux[[var_est]] ~ ., data = data_aux2)
ati <- ols_plot_resid_stud_fit(model, print_plot = FALSE)
out <- ati$plot$data[as.vector(ati$plot$data["color"] == "outlier"),]$obs
inf <- ols_plot_cooksd_bar(model, print_plot = FALSE)
ifp <- inf$plot$data[as.vector(inf$plot$data["color"] == "outlier"),]$obs
a <- paste("Cuidado, se identificaron las siguientes observaciones con datos atipicos **",
toString(out), "**\n Se recomienda verificar que esta no sea un error de captura y, de ser asi, eliminarla.")
b <- paste("Cuidado, se identificaron que las siguientes observaciones tienen una alta influencia **",
toString(ifp), "**\n Se recomienda verificar que esta no sea un error de captura y, de ser asi, eliminarla.")
c <- "No se encontraron observaciones con datos atipicos."
d <- "No se encontraron observaciones con una alta influencia"
cat(if_else(length(out)>0, a, c),"\n\n",
if_else(length(ifp)>0, b, d))
}
#'Summary of all data
#'
#'@param data A database to use
#'@param var_est Acharacter value with the column name to study, NULL by default
#'@import olsrr
#'@import dplyr
#'@return A cat summary
#'@export
Summ_all_data <- function(data, var_est=NULL){
dataset <- data
tipoNum <- sum(sapply(data, class) %in% c("double", "numeric"))
tipoDis <- ncol(data) - tipoNum
tipoNAs <- sum(sapply(as.data.frame(sapply(data, is.na)),sum)>nrow(data)*0.8)
ss1 <- paste("La base de datos consta de", ncol(data), "columnas de", nrow(data),
"observaciones cada una y con un total de" , sum(is.na(data)),
"valores perdidos.\n",
"De estas columnas,", tipoDis,
"son discretas,", tipoNum, "son continuas y", tipoNAs,
"estan completamente vacias.")
if(is.null(var_est)){
e <- f <- ""
if(tipoNum>2){
e <- "Dado que no has ingresado una variable de estudio, te sugerimos que realices un analisis PCA"
}
if(tipoDis>2){
f <- "Dado que no has ingresado una variable de estudio, te sugerimos que realices un analisis MCA"
}
return(cat(ss1, "\n\n", e, "\n\n", f))
}else{
cVar <- class(data[[var_est]]) == "numeric"
nFactor <- length(unique(data[[var_est]]))
e <- f <- g <- ""
if(tipoNum>2){
e <- "Te sugerimos que realices un analisis PCA para intentar reducir dimensiones"
}
if(tipoDis>2){
f <- "Te sugerimos que realices un analisis MCA para intentar reducir dimensiones"
}
if(cVar){
g <- "Y posteriormente realizar un modelo lineal"
data_aux <- data[which(sapply(data, class)=="numeric")]
data_aux2 <- data_aux[-which(names(data_aux)==var_est)]
model <- lm(data_aux[[var_est]] ~ ., data = data_aux2)
ati <- ols_plot_resid_stud_fit(model, print_plot = FALSE)
out <- ati$plot$data[as.vector(ati$plot$data["color"] == "outlier"),]$obs
inf <- ols_plot_cooksd_bar(model, print_plot = FALSE)
ifp <- inf$plot$data[as.vector(inf$plot$data["color"] == "outlier"),]$obs
a <- paste("Cuidado, se identificaron las siguientes observaciones con datos atipicos **",
toString(out), "**\n Se recomienda verificar que esta no sea un error de captura y, de ser asi, eliminarla.")
b <- paste("Cuidado, se identificaron que las siguientes observaciones tienen una alta influencia **",
toString(ifp), "**\n Se recomienda verificar que esta no sea un error de captura y, de ser asi, eliminarla.")
c <- "No se encontraron observaciones con datos atipicos."
d <- "No se encontraron observaciones con una alta influencia"
cat(ss1, "\n\n",if_else(length(out)>0, a, c),"\n\n",
if_else(length(ifp)>0, b, d), "\n\n", e, "\n\n", f, "\n\n", g)
}else{
if(nFactor==2){
g <- "Y posteriormente reaalizar un modelo lineal generalizado"
cat(ss1, "\n\n", e, "\n\n", f, "\n\n", g)
}
if(nFactor>2){
g <- "Luego, realizar un analisis de cluster y finalmente, si se puede agrupar en dos conjuntos,
realizar un modelo lineal generalizado."
cat(ss1, "\n\n", e, "\n\n", f, "\n\n", g)
}
}
}
}
#'Create animated graphics
#'
#'@param data A database to use
#'@param varA1 A character value with the column of x axis
#'@param two_var A logical value: T if the graphic contains variable y; F if not
#'@param varA2 A character value with the column of y axis
#'@param group A character value with the column of group
#'@param type_graf A character value with the type of graphic
#'@import tidyverse
#'@import ggplot2
#'@import gganimate
#'@return A plot object
#'@export
animar <- function(data, varA1, two_var, varA2, group, type_graf){
if("" %in% c(varA1, two_var, group, type_graf) | "NULL" %in% c(varA1, two_var, group, type_graf)){return()}
data <- data %>%
mutate(group1 = data[[group]], aux1 = data[[varA1]])
if(two_var){
if(varA2 %in% c("NULL", "")){return()}
graf <- data %>%
ggplot(aes_string(x= varA1, y= varA2))
graf <- switch (type_graf,
"Scatterplot" = graf + geom_point(aes(colour=group1)) +
scale_colour_hue() +
transition_states(group1,
transition_length = 3,
state_length = 2) + shadow_mark() +
labs(title = paste("Scatterplot", varA1, "vs", varA2, "by", group),colour = group, subtitle = paste("State:" , "{closest_state}")),
"Columns" = graf + geom_col(aes_string(fill=varA1, group = 1L)) +
transition_states(aux1, wrap = FALSE) + shadow_mark() +
labs(title = "Grafico de barras", subtitle = paste("Suma total de", if_else(class(data[[varA1]])=="factor", varA2, varA1),
"by", if_else(class(data[[varA1]])=="factor", varA1, varA2)))
)
}else{
graf <- switch (type_graf,
"Columns" = data %>%
group_by(group1) %>%
rename(temp = varA1) %>%
summarise(Media = mean(temp)) %>%
ggplot(aes(x= group1, y = Media)) +
geom_col(aes(fill=group1, group = 1L)) +
transition_states(group1, wrap = FALSE) +
shadow_mark() +
geom_text(aes(label = round(Media, 2), y = Media + 0.07), position = position_dodge(0.9)) +
labs(title = "Grafico de barras", subtitle = paste("Media de", varA1,
"by", group), fill = group)
)
}
return(graf)
}
#'Create neural network
#'
#'@param data A database to use
#'@param var_est A character value with the column of x axis
#'@param vPerc A logical value: T if the graphic contains variable y; F if not
#'@param unitsE A character value with the column of y axis
#'@param activationE A character value with the column of group
#'@param lDrop A character value with the type of graphic
#'@param epochsE ---
#'@param batch_sizeE ---
#'@param validation_splitE ---
#'@import tidyverse
#'@import tidyr
#'@import caret
#'@import keras
#'@return A plot object
#'@export
redes <- function(data, var_est, vPerc, unitsE, activationE, lDrop= NULL, epochsE, batch_sizeE, validation_splitE){
if (!sum(sapply(data[-which(names(data)==var_est)], class) %in% c("numeric", "integer")) == length(data) -1) {
cat(paste("Solo se admiten variables numericas como variables explicativas"))
}else{
modelF <- function(model,training,trainLabels, test, testLabels,testtarget, data){
model %>%
compile(loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = 'accuracy')
history <- model %>%
fit(training,
trainLabels,
epochs = epochsE,
batch_size = batch_sizeE,
validation_split = validation_splitE)
ptest <- model %>%
evaluate(test, testLabels)
pred <- model %>%
predict_classes(test)
tble <- table(Predicted = pred, Actual = testtarget)
return(list(model=model, ptest = ptest, predT = tble, data = data, plt = history))
}
aux <- which(names(data)==var_est)
aux1 <- as.numeric(length(table(data[[var_est]])))
data[[var_est]] <- as.numeric(data[[var_est]])
data[[var_est]] <- data[[var_est]] - 1
data <- as.matrix(data)
data[,-aux] <- normalize(data[,-aux])
dimnames(data) <- NULL
# ind <- sample(2, nrow(data), replace = T, prob = c((vPerc/100), 1-(vPerc/100)))
# training <- data[ind==1, -aux]
# test <- data[ind==2, -aux]
ind <- createDataPartition(data[[var_est]], p = vPerc/100, list = F)
training <- datosMod[ind, -aux]
test <- datosMod[-ind, -aux]
trainingtarget <- data[ind==1, aux]
testtarget <- data[ind==2, aux]
trainLabels <- to_categorical(trainingtarget)
testLabels <- to_categorical(testtarget)
model <- keras_model_sequential()
model %>%
layer_dense(units = unitsE[1], activation = activationE[1], input_shape = c(ncol(data)-1))
if (length(unitsE)==1) {
model %>%
layer_dense(units = aux1, activation = activationE[2])
return(modelF(model,training,trainLabels, test, testLabels,testtarget,data))
}else{
model %>%
layer_dropout(rate = (lDrop[1]/100))
}
if (length(unitsE)==2) {
model %>%
layer_dense(units = aux1, activation = activationE[2])
return(modelF(model,training,trainLabels, test, testLabels,testtarget, data))
}else{
model %>%
layer_dropout(rate = (lDrop[2]/100))
}
if (length(unitsE)==3) {
model %>%
layer_dense(units = aux1, activation = activationE[3])
return(modelF(model,training,trainLabels, test, testLabels,testtarget, data))
}
}
}
#'Create model
#'
#'@param data A database to use
#'@param var_est A character value with the column of x axis
#'@param var_exp A logical value: T if the graphic contains variable y; F if not
#'@param intercep A character value with the column of y axis
#'@param method A character value with the column of group
#'@param percTest A character value with the type of graphic
#'@param ln_vars ---
#'@param ln_vars_tr ---
#'@param trC_method ---
#'@param trC_num ---
#'@param trC_rep ---
#'@import tidyverse
#'@import rsample
#'@import caret
#'@return A plot object
#'@export
crear_modelo <- function(data, var_est, var_exp, intercep=NULL, method, percTest,
ln_vars=F, ln_vars_tr = NULL, trC_method = "none",
trC_num, trC_rep){
data <- data[c(var_est, var_exp)]
if(ln_vars){data <- data %>% mutate_at(ln_vars_tr, log)}
if(trC_num == 0){trC_num<- NA}
if(trC_rep == 0){trC_rep<- NA}
aux <- as.numeric(which(names(data)==var_est))
data <- data %>% mutate(y = data[[var_est]])
data <- data[-aux]
train_test <- initial_split(data = data, (percTest/100))
train <- training(train_test)
test <- testing(train_test)
if(intercep){
model <- train(
y ~ .,
train,
method = method,
trControl = trainControl(
method = trC_method,
number = trC_num,
repeats = trC_rep,
verboseIter = F
)
)
}else{
model <- train(
y ~ -1 + .,
train,
method = method,
trControl = trainControl(
method = trC_method,
number = trC_num,
repeats = trC_rep,
verboseIter = F
)
)
}
return(list(modelo = model, test = test))
}
#'Significant correlations
#'
#'@param base A database to use
#'@param umbrales A numeric value with Significant umbrals
#'@importFrom stats cor
#'@return Significant correlations
#'@export
corSig <- function(base, umbrales){
cormat <- cor(base)
for (umbral in umbrales) {
for (i in 1:(nrow(cormat)-1)) {
for (j in (i+1):ncol(cormat)) {
if (abs(cormat[i,j]) > umbral & abs(cormat[i,j]) < umbral+.1) {
cat("Las variables:", names(base)[i], "y", names(base)[j], "tienen una correlacion de", round(cormat[i,j],4), "\n")
}
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.