Nothing
## Copyright (C) 2016 Clayton Vieira Fraga Filho
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License
## as published by the Free Software Foundation; either version 2
## of the License, or (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
##' @title Data Separates
##' @description divides the dataFrame as the percentage defined in percTraining enabling apply and measure the performance of the regression equation.
##' @param dataFrame source of data
##' @param fieldName column of dataFrame that will be applied regression
##' @param percTraining percentage that will be reserved for training (default 0.70)
##' @param seed integer that determines how the sample is randomly chosen (default NULL)
##' @import sqldf
##' @export
separaDados <- function(dataFrame, fieldName, percTraining=0.70, seed=NULL) {
if (length(fieldName) > 1) {
stop("fieldName nao pode ser um vetor, informe apenas um nome de campo!")
}
if (!is.null(seed)) set.seed(seed)
if (percTraining<=0.01 || percTraining>1 || is.na(percTraining) || is.null(percTraining)) {
stop("Informe um percTraining entre 0.01 e 1")
}
nomeDF = toString(substitute(dataFrame))
# print(nomeDF)
str_sql_individuos = paste0("SELECT distinct ",fieldName," from dataFrame group by ",fieldName)
dfIndividuos = sqldf(str_sql_individuos)
indice <- 1:nrow(dfIndividuos)
tamanho = floor(length(indice)*percTraining)
indiceTreino <- sample(indice, size=as.integer(tamanho))
dfIndividuos_treino = NULL;
eval(parse(text=paste0("dfIndividuos_treino = data.frame(",fieldName,
"=dfIndividuos[indiceTreino,])")))
dfIndividuos_validacao = NULL;
eval(parse(text=paste0("dfIndividuos_validacao = data.frame(",fieldName,"
=dfIndividuos[-indiceTreino,])")))
cat(paste0("\nTotal de individuos(",fieldName,"): ", nrow(dfIndividuos)))
cat(paste0("\nTotal para Ajuste/Treino: ", nrow(dfIndividuos_treino), " (",round((nrow(dfIndividuos_treino)/nrow(dfIndividuos))*100, 2),"%)"))
cat(paste0("\nTotal para Teste: ", nrow(dfIndividuos_validacao), " (",round((nrow(dfIndividuos_validacao)/nrow(dfIndividuos))*100, 2),"%)"))
percentual = (nrow(dfIndividuos_validacao)/nrow(dfIndividuos))
str_SQL_treino = paste0("SELECT *
from dataFrame
where ",fieldName," in (SELECT distinct ",fieldName,"
from dfIndividuos_treino)")
dataFrame_treino = sqldf(str_SQL_treino)
str_SQL_validacao = paste0("SELECT *
from dataFrame
where ",fieldName," in (SELECT distinct ",fieldName,"
from dfIndividuos_validacao)")
dataFrame_validacao = sqldf(str_SQL_validacao)
percentualResult = list()
percentualResult$validacao = round(percentual, 2)
percentualResult$treino = 1-percentualResult$validacao
cat("\nConcluido\n")
individuos = list()
individuos$treino = dfIndividuos_treino
individuos$validacao = dfIndividuos_validacao
return(list(individuos = individuos,
nroIndividuos=nrow(dfIndividuos),
percentual =percentualResult,
validacao=dataFrame_validacao,
treino=dataFrame_treino
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.