rmodel: Tools for data analysis

Documented in r.segment r.segment.dataframe r.segment.target r.segment.target.table

# roxygen2::roxygenise()

#' r.percentils
#' @export
r.percentils <- function (var,
                          ndiv = 10) {
  return(quantile(var[which(!is.na(var))], seq(from=0, to=1, length.out=(ndiv+1))))
}

#' r.binning
#' @seealso \code{\link{r.binning}} \code{\link{r.segment}} \code{\link{r.segment.target}} \code{\link{r.segment.dataframe}} \code{\link{r.segment.target.table}}
#' @export
r.binning <- function (var,
                       div = NULL,
                       ndiv = 10,
                       labelText = FALSE,
                       labelNumeric = FALSE,
                       prefix = "Grup",
                       link1 = " ",
                       link2 = ", ")
{
  if(missing(div) || is.null(div) || is.na(div)) {
    if(is.null(ndiv) || is.na(ndiv)) ndiv = 10
    div = r.percentils(var, ndiv=ndiv)
    div = unique(div)
    if (length(div)<=1 || ndiv==1) div = r.max(var)
    else if (length(div)<ndiv+1) {
      div = div[1:(length(div)-1)]
    } else {
      div = div[2:(length(div)-1)]
    }
  }
  ndiv = length(div)+1
  
  output = rep(NA, length(var))
  
  for (i in ndiv:1) {
    if (i==1) {      
      ind = which(var<=div[1])
      sufix = paste0("x<=",formatC(div[1],width=max(nchar(as.character(div))),format="f",flag="0"))
    } else if (i==ndiv) {
      ind = which(div[length(div)]<var)
      sufix = paste0(formatC(div[length(div)],width=max(nchar(as.character(div))),format="f",flag="0"),"<x")
    } else {
      ind1 = which(div[i-1]<var)
      ind2 = which(var<=div[i])  
      ind = intersect(ind1, ind2)
      sufix = paste0(formatC(div[i-1],width=max(nchar(as.character(div))),format="f",flag="0"),"<x",
                     link2,
                     "x<=",formatC(div[i],width=max(nchar(as.character(div))),format="f",flag="0"))
    }
    if (labelText && labelNumeric)
      output[ind] = paste0(prefix, link1, formatC(i-1,width=ceiling(log10(ndiv)),format="d",flag="0"), link1, sufix)
    else if (labelText)
      output[ind] = paste0(prefix, link1, sufix)
    else if (labelNumeric)
      output[ind] = paste0(prefix, link1, formatC(i-1,width=ceiling(log10(ndiv)),format="d",flag="0"))
    else
      output[ind] = i
  }
  return (output)
}

#' r.segment
#' @param dades dataframe
#' @param segmentacion Array of strings. Names of the columns to create segmentation. Those variables must be categorical.
#' @return An array of numeric or labels according to segmentation.
#' @seealso \code{\link{r.binning}} \code{\link{r.segment}} \code{\link{r.segment.target}} \code{\link{r.segment.dataframe}} \code{\link{r.segment.target.table}}
#' @export
r.segment <- function(dades, segmentacion, 
                      prefix = "Grupo",
                      link = " ",
                      labelText = FALSE,
                      labelNumeric = FALSE,
                      missing=NA)
{
  nelements = NaN
  listValues = list()
  nGroups = 1
  for (ipos in 1:length(segmentacion)) {
    segment = segmentacion[ipos]
    var = dades[,segment]
    values = unique(var)
    listValues[[segment]] = values
    nelements[ipos] = length(values)
    nGroups = nGroups * length(values)
  }
  
  iGroup=1
  column = rep(missing, nrow(dades))
  finished = FALSE
  index = rep(1, length(segmentacion))
  while (!finished) {
    label = prefix
    if (labelNumeric) {
      if (label!="") label = paste0(label, link, formatC(iGroup-1,width=ceiling(log10(nGroups)),format="d",flag="0"))
      else label = formatC(iGroup-1,width=ceiling(log10(nGroups)),format="d",flag="0")
    }
    
    ind = 1:nrow(dades)
    for (ipos in 1:length(segmentacion)) {
      segment = segmentacion[ipos]
      ind = intersect(ind, which(dades[, segment]==listValues[[segment]][index[ipos]]))
      
      if (labelText) {
        if (label!="") label = paste0(label, link, segment, "-", listValues[[segment]][index[ipos]])
        else label = paste0(segment, "-", listValues[[segment]][index[ipos]])
      }      
    }
    if (!labelNumeric && !labelText) {
      label=iGroup
    }
    
    if (length(ind)>0) {
      column[ind] = label
    }
    
    ipos=1
    incrementFinished = FALSE
    while (ipos<=length(segmentacion) && !incrementFinished) {
      index[ipos] = index[ipos]+1
      if (index[ipos]>nelements[ipos]) {
        index[ipos] = 1
        ipos = ipos+1
      } else {
        incrementFinished = TRUE
      }
    }
    if (!incrementFinished) finished = TRUE
    iGroup = iGroup+1
  }
  return (column)
}

#' r.segment.target
#' @param dades dataframe
#' @param segmentacion Array of strings. Names of the columns to create segmentation. Those variables must be categorical.
#' @param target String. Name of the target column to calculate means for each segment. If targetValue is provided, then means are not calculated, and percentatge of counting targetValue is provided instead.
#' @param targetValue Numeric or Categorical. Value of target column to calculate counts instead of means.
#' @return An array of percentages of target for each segmentation.
#' @seealso \code{\link{r.binning}} \code{\link{r.segment}} \code{\link{r.segment.target}} \code{\link{r.segment.dataframe}} \code{\link{r.segment.target.table}}
#' @export
r.segment.target <- function(dades, segmentacion, target,
                             targetValue = NULL)
{
  nelements = NaN
  listValues = list()
  for (ipos in 1:length(segmentacion)) {
    segment = segmentacion[ipos]
    var = dades[,segment]
    values = unique(var)
    listValues[[segment]] = values
    nelements[ipos] = length(values)
  }
  
  column = rep(0, nrow(dades))
  finished = FALSE
  index = rep(1, length(segmentacion))
  while (!finished) {
    ind = 1:nrow(dades)
    for (ipos in 1:length(segmentacion)) {
      segment = segmentacion[ipos]
      ind = intersect(ind, which(dades[, segment]==listValues[[segment]][index[ipos]]))
    }
    
    if (length(ind)>0) {
      if (missing(targetValue) && is.numeric(dades[ind, target])) column[ind] = mean(dades[ind, target])
      else if (missing(targetValue) && !is.numeric(dades[ind, target])) column[ind] = mean(as.numeric(as.character(dades[ind, target])))
      else column[ind] = length(which(dades[ind, target]==targetValue))/length(ind)
    }
    
    ipos=1
    incrementFinished = FALSE
    while (ipos<=length(segmentacion) && !incrementFinished) {
      index[ipos] = index[ipos]+1
      if (index[ipos]>nelements[ipos]) {
        index[ipos] = 1
        ipos = ipos+1
      } else {
        incrementFinished = TRUE
      }
    }
    if (!incrementFinished) finished = TRUE
  }
  return (column)
}

#' r.segment.dataframe
#' @param dades dataframe
#' @param segmentacion Array of strings. Names of the columns to create segmentation. Those variables must be categorical.
#' @param target String. Name of the target column to calculate means for each segment. If targetValue is provided, then means are not calculated, and percentatge of counting targetValue is provided instead.
#' @param targetValue Numeric or Categorical. Value of target column to calculate counts instead of means.
#' @return A dataframe with segmentation as rows with some extra statistics columns.
#' @seealso \code{\link{r.binning}} \code{\link{r.segment}} \code{\link{r.segment.target}} \code{\link{r.segment.dataframe}} \code{\link{r.segment.target.table}}
#' @export
r.segment.dataframe <- function(dades, segmentacion, 
                                target = NULL,
                                targetValue = NULL,
                                prefix = "",
                                link = " ",
                                labelText = TRUE,
                                labelNumeric = FALSE,
                                missing=NA)
{
  if (!missing(target) && (missing(targetValue) || is.null(targetValue))) {
    stop("Si proporciones un target has de proporcionar tambe un targetValue")
  }
  if (!missing(targetValue) && (missing(target) || is.null(target))) {
    stop("Si proporciones un targetValue has de proporcionar tambe un target")
  }  
  nelements = NaN
  listValues = list()
  nGroups = 1
  for (ipos in 1:length(segmentacion)) {
    segment = segmentacion[ipos]
    var = dades[,segment]
    values = unique(var)
    listValues[[segment]] = values
    nelements[ipos] = length(values)
    nGroups = nGroups * length(values)
  }

  if (!missing(target) && !is.null(target)
      && !missing(targetValue) && !is.null(targetValue)) {
    dfOutput = data.frame(matrix(NA, nrow=nGroups , ncol=length(segmentacion)+7), stringsAsFactors=FALSE)
    names(dfOutput) = c("Segment", segmentacion, "Freq", 
                        paste0(target, "_Hits"),
                        paste0(target, "_Perc_Hits_HitsTotal"),
                        paste0(target, "_Perc_Hits_Segment"),
                        paste0(target, "_Perc_Hits_Total"),
                        paste0(target, "_Mean"))
  } else {
    dfOutput = data.frame(matrix(NA, nrow=nGroups , ncol=length(segmentacion)+2), stringsAsFactors=FALSE)
    names(dfOutput) = c("Segment", segmentacion, "Freq")
  }
  
  iGroup=1
  column = rep(missing, nrow(dades))
  finished = FALSE
  index = rep(1, length(segmentacion))
  while (!finished) {
    label = prefix
    if (labelNumeric) {
      if (label!="") label = paste0(label, link, formatC(iGroup-1,width=ceiling(log10(nGroups)),format="d",flag="0"))
      else label = formatC(iGroup-1,width=ceiling(log10(nGroups)),format="d",flag="0")
    }
    
    ind = 1:nrow(dades)
    for (ipos in 1:length(segmentacion)) {
      segment = segmentacion[ipos]
      ind = intersect(ind, which(dades[, segment]==listValues[[segment]][index[ipos]]))
      
      if (labelText) {
        if (label!="") label = paste0(label, link, segment, "-", listValues[[segment]][index[ipos]])
        else label = paste0(segment, "-", listValues[[segment]][index[ipos]])
      }      
    }
    if (!labelNumeric && !labelText) {
      label=iGroup
    }
    
    dfOutput[iGroup, 1] = label
    for (ipos in 1:length(segmentacion)) {
      segment = segmentacion[ipos]
      dfOutput[iGroup, 1+ipos] = as.character(listValues[[segment]][index[ipos]])
    }
    dfOutput[iGroup, 1+length(segmentacion)+1] = length(ind)
    if (!missing(target) && !is.null(target) 
        && !missing(targetValue) && !is.null(targetValue)) {
      
      dfOutput[iGroup, 1+length(segmentacion)+2] = length(which(dades[ind, target]==targetValue))
      
      dfOutput[iGroup, 1+length(segmentacion)+3] = length(which(dades[ind, target]==targetValue))/length(which(dades[, target]==targetValue))
      if (length(ind)==0) 
        dfOutput[iGroup, 1+length(segmentacion)+4] = 0
      else
        dfOutput[iGroup, 1+length(segmentacion)+4] = length(which(dades[ind, target]==targetValue))/length(ind)
      dfOutput[iGroup, 1+length(segmentacion)+5] = length(which(dades[ind, target]==targetValue))/nrow(dades)
      
      if (length(ind)==0 || !is.numeric(dades[, target])) 
        dfOutput[iGroup, 1+length(segmentacion)+6] = 0
      else if (is.numeric(dades[ind, target])) 
        dfOutput[iGroup, 1+length(segmentacion)+6] = r.mean(dades[ind, target])
      else
        dfOutput[iGroup, 1+length(segmentacion)+6] = r.mean(as.numeric(as.character(dades[ind, target])))
    }
    
    ipos=1
    incrementFinished = FALSE
    while (ipos<=length(segmentacion) && !incrementFinished) {
      index[ipos] = index[ipos]+1
      if (index[ipos]>nelements[ipos]) {
        index[ipos] = 1
        ipos = ipos+1
      } else {
        incrementFinished = TRUE
      }
    }
    if (!incrementFinished) finished = TRUE
    iGroup = iGroup+1
  }
  
  rownames(dfOutput) = dfOutput[,1]
  return (dfOutput)
}

#' r.segment.target.table
#' @param dades dataframe
#' @param segmentacion Array of strings. Names of the columns to create segmentation. Those variables must be categorical.
#' @param target String. Name of the target column to calculate means for each segment. If targetValue is provided, then means are not calculated, and percentatge of counting targetValue is provided instead.
#' @param targetValue Numeric or Categorical. Value of target column to calculate counts instead of means.
#' @return An array of percentages of target for each segmentation with labels.
#' @seealso \code{\link{r.binning}} \code{\link{r.segment}} \code{\link{r.segment.target}} \code{\link{r.segment.dataframe}} \code{\link{r.segment.target.table}}
#' @export
r.segment.target.table <- function(dades, segmentacion, target,
                                   targetValue = NULL,
                                   prefix = "Grupo",
                                   link = " ",
                                   labelText = FALSE,
                                   labelNumeric = FALSE)
{
  nelements = NaN
  listValues = list()
  nGroups = 1
  for (ipos in 1:length(segmentacion)) {
    segment = segmentacion[ipos]
    var = dades[,segment]
    values = unique(var)
    listValues[[segment]] = values
    nelements[ipos] = length(values)
    nGroups = nGroups * length(values)
  }
  
  iGroup=1
  output = rep(0, nGroups)
  finished = FALSE
  index = rep(1, length(segmentacion))
  while (!finished) {
    label = prefix
    if (labelNumeric) {
      if (label!="") label = paste0(label, link, formatC(iGroup-1,width=ceiling(log10(nGroups)),format="d",flag="0"))
      else label = formatC(iGroup-1,width=ceiling(log10(nGroups)),format="d",flag="0")
    }
    
    ind = 1:nrow(dades)
    for (ipos in 1:length(segmentacion)) {
      segment = segmentacion[ipos]
      ind = intersect(ind, which(dades[, segment]==listValues[[segment]][index[ipos]]))
      
      if (labelText) {
        if (label!="") label = paste0(label, link, segment, "-", listValues[[segment]][index[ipos]])
        else label = paste0(segment, "-", listValues[[segment]][index[ipos]])
      }      
    }
    if (!labelNumeric && !labelText) {
      label=iGroup
    }
    
    if (length(ind)>0) {
      names(output)[iGroup] = label
      output[iGroup] = length(which(dades[ind, target]==1))/length(ind)
      if (missing(targetValue) && is.numeric(dades[ind, target])) output[iGroup] = mean(dades[ind, target])
      else if (missing(targetValue) && !is.numeric(dades[ind, target])) output[iGroup] = mean(as.numeric(as.character(dades[ind, target])))
      else output[iGroup] = length(which(dades[ind, target]==targetValue))/length(ind)
    } else {
      names(output)[iGroup] = label
      output[iGroup] = 0
    }
    
    ipos=1
    incrementFinished = FALSE
    while (ipos<=length(segmentacion) && !incrementFinished) {
      index[ipos] = index[ipos]+1
      if (index[ipos]>nelements[ipos]) {
        index[ipos] = 1
        ipos = ipos+1
      } else {
        incrementFinished = TRUE
      }
    }
    if (!incrementFinished) finished = TRUE
    iGroup = iGroup+1
  }
  return (output)
}
rocalabern/rmodel documentation built on May 27, 2019, 12:14 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
rocalabern/rmodel
Tools for data analysis

R/rmodel_datamanipulation.R
In rocalabern/rmodel: Tools for data analysis

Defines functions r.segment r.segment.target r.segment.dataframe r.segment.target.table

Documented in r.segment r.segment.dataframe r.segment.target r.segment.target.table

R Package Documentation

Browse R Packages

We want your feedback!

rocalabern/rmodel Tools for data analysis

R/rmodel_datamanipulation.R In rocalabern/rmodel: Tools for data analysis

Defines functions r.segment r.segment.target r.segment.dataframe r.segment.target.table

Documented in r.segment r.segment.dataframe r.segment.target r.segment.target.table

R Package Documentation

Browse R Packages

We want your feedback!

rocalabern/rmodel
Tools for data analysis

R/rmodel_datamanipulation.R
In rocalabern/rmodel: Tools for data analysis