# Copyright (C) 2018 Tillmann Nett for FernUni Hagen
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
###############################################################################
# A selection is a subset of the names, which is stored as an
# index into the complete name set for simplicity.
#
# Since indexes into sets can take different form in R, e.g. a vector
# of logicals which is True for the selecte positions or a vector
# of numericals with the index numbers, we use different representations
# depending on context and convert between them as needed.
# In some cases, this may cause us to loose information, such as duplicates
# or ordering structure, but the operations themselves should usually clearly
# show that this information is lost (e.g. using & on a numeric representation
# looses duplicates, as there is no easy way to keep this information).
make.names.selection <- function(v) {
rv <- v
class(rv) <- c("names.selection", class(rv))
rv
}
#' Filter a subset from the names based on percentiles
#'
#' This method filters a set of names from the complete
#' set. The set is filtered based on predicates, which
#' are evaluated in relation to the percentiles of the
#' given variable (see examples). Multiple predicates
#' are first all evaluated on the complete dataset and
#' then combined (i.e. the percentiles are not
#' re-evaluated in the filtered dataset).
#'
#' @param ... Predicates on the dataset using non-standard evaluation
#'
#' @examples
#'
#' # Retrieve all names without any further reduction
#' filter.names()
#'
#' # Retrieve all names with Intelligence ratings above the median
#' filter.names(Intelligence >= 0.5)
#'
#' # Retrieve all names, which are not too uncommon (Familiarity at least
#' # in the 20th percentile) and for which Intelligence is above the
#' # median
#' filter.names(Familiarity >= 0.2, Intelligence >= 0.5)
#'
#' @importFrom rlang quos eval_tidy
#'
#' @export
filter.names <- function(...) {
qs <- rlang::quos( ... )
if(length(qs) == 0) {
rv <- rep(T, nrow( names.mean.pc) )
} else {
# Create a matrix of percentiles for all attributes
percs <- seq(0,1,length.out = nrow(names.mean.pc) )
names.percs <- apply( names.mean.pc[,columns.ratings], MARGIN = 2, function(clm) percs[rank(clm, ties.method="min")])
msks <- lapply(qs, rlang::eval_tidy, data = as.data.frame( names.percs ) )
msks <- do.call( cbind, msks )
rv <- apply(msks, MARGIN=1, all)
}
make.names.selection( rv )
}
# Private method for polymorphic lookup depending on representation
names.selection.to.logical <- function(x, ...) {
UseMethod("names.selection.to.logical")
}
names.selection.to.logical.logical <- function(x, ...) {
# just strip off class type
rv <- x
class(rv) <- "logical"
rv
}
names.selection.to.logical.numeric <- function(x, ...) {
rv <- rep(F, nrow(names.mean.pc))
rv[x] <- T
rv
}
#' @export
as.logical.names.selection <- function(x, ...) {
names.selection.to.logical(x, ...)
}
# Private method for polymorphic lookup depending on representation
names.selection.to.double <- function(x, ...) {
UseMethod("names.selection.to.double")
}
names.selection.to.double.logical <- function(x, ...) {
rv <- as.double( which( x ) )
rv
}
names.selection.to.double.numeric <- function(x, ...) {
# Just strip of class type
rv <- x
class(rv) <- "numeric"
rv
}
#' @export
as.double.names.selection <- function(x, ...) {
names.selection.to.double(x, ...)
}
#' @export
as.character.names.selection <- function(x, ...) {
# We can select here independently of the actual representations
# so we do not need any conversion
names.mean.pc[x, "name"]
}
#' @export
as.data.frame.names.selection <- function(x, ...) {
ratings <- rlang::ensyms( ... )
if( length( ratings ) == 0 ) {
return( data.frame( name = as.character(x), stringsAsFactors = F ) )
}
data.frame( name = as.character(x), ratings(subset=x, ...), row.names = seq_along(x), stringsAsFactors = F )
}
#' @export
print.names.selection <- function(x, ... ) {
print( as.data.frame( x, ...) )
}
#' @export
`&.names.selection` <- function(x,y) {
# To intersect two selections, we must ignore the ordering
# and multiples, so we always have to convert to logical
# first
rv <- as.logical( x ) & as.logical( y )
class(rv) <- c("names.selection",class(rv))
rv
}
#' @export
`[.names.selection` <- function(x, i, ...) {
# If we select, we may always have multiples
# so we must always return a numeric index
#
# Convert to numeric index first, so we know
# that we can just select correctly
x.numeric <- as.numeric( x )
# Select the correct elements
rv <- x.numeric[ i ]
class(rv) <- c("names.selection", class(rv))
rv
}
# Private helper method to use polymorphism on the internal representation
the.length <- function(x) {
UseMethod("the.length")
}
the.length.logical <- function(x) {
sum(x)
}
the.length.numeric <- function(x) {
length( as.numeric( x ) )
}
#' @export
length.names.selection <- function(x) {
the.length(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.