###################################################################################
### This file contains functions useful during the modelling process
###################################################################################
###################################################################################
### lspoints (lowering Stress by excluding points)
###################################################################################
#' Lowering Stress by excluding points (lspoints)
#'
#' \code{lspoints} calculates Stress values for all combinations with n - 1
#' categories.
#'
#' This function is applicable to co-occurrence data. It shows the resulting
#' Stress values when single categories are excluded. The output consists in a
#' table showing which categories have been excluded and the resulting Stress
#' values. The table is sorted such that the the lowest Stress level occurs
#' at the top. The MDS model is computed using the package '\code{smacof}'
#' (Mair, De Leeuw, Borg, & Groenen). The first column of the input matrix
#' \code{data} should contain the ID of the unit of comparison, and the
#' following columns the categories for which the similarity matrices and MDS
#' maps are calculated (see also the description of \code{simi}). Note that
#' the purpose of this function is to assist modeling by helping to identify
#' potential problems. It is not, however, meant to be used for excluding
#' categories based solely on measures of fit and without substantial
#' justification.
#'
#' @param data Dataset; the first column must be the ID of the unit of
#' comparison, the other columns must be categories; see dataset requirements
#' \code{simi}.
#' @param method Specifies the similarity index used to compute the similarity
#' matrix, choose between "\code{as}" (Association Strength Index),
#' "\code{jaccard}" (Jaccard Index), "\code{cosine}" (Cosine Index),
#' and "\code{inclusion}" (Inclusion Index). Default is \code{as}.
#' @param single If \code{TRUE}, single mentionings (i.e. one respondent
#' mentioning just one category) are included when calculating the similarity
#' matrix.
#' @param comments If \code{TRUE}, comments relating to exclusion or possible
#' exclusion of categories and respondents are displayed. Default is
#' \code{FALSE}.
#' @param type Specifies the type of MDS model used (for more details see
#' the package '\code{smacof}' by Mair, De Leeuw, Borg, & Groenen). Default is
#' \code{ordinal}.
#' @param weight If \code{TRUE}, the MDS model is calcualted using weights,
#' i.e., similarities of zero are excluded. Default is \code{TRUE}.
#' @return Matrix showing the categories excluded and the Stress values of the
#' respective MDS models.
#' @seealso \code{\link[smacof]{smacofSym}} for details on calculating MDS
#' representations, \code{\link[thectar]{simi}} for details on calculating
#' similarity matrices.
#' @export
#' @examples
#' ## Calculate Stress values for all combinations with n - 1 (i.e., 16) SDGs
#' data(SDG_coocurrence)
#' SDG_coocurrence <- SDG_coocurrence[,-2] # Drop second column
#' stress <- lspoints(SDG_coocurrence)
#' stress
lspoints <- function(data, method = "as", single = TRUE, comments = FALSE, type = "ordinal", weight = TRUE){
if(any(is.na(data))){
stop("The dataset contains missing values.")
}else{
if(!(method %in% c("as", "jaccard","cosine", "inclusion"))){
method <- "as"
warning("The parameter 'method' is not properly defined. Default will be used.")
} #if
# Prepare basic variables
i <- 2
output <- matrix(c(rep(0, (length(data) - 1) * 2)), ncol = 2)
# Exclude the points and save the Stress values
while(i < length(data) + 1){
configuration <- data
configuration <- configuration[, -i]
similarity <- simi(configuration, method=method, single = single, comments = comments)
if(weight == TRUE){
w <- as.numeric(similarity < 1e-8)
weight_mat = matrix(1 - w, nrow = nrow(similarity), ncol = ncol(similarity))
dissimilarities <- 1 - similarity
res<-smacof::smacofSym(dissimilarities, type = type, weightmat = weight_mat)
}else{
dissimilarities <- 1 - similarity
res<-smacof::smacofSym(dissimilarities, type = type)
}#ifelse
output[i -1, 1] <- (colnames(data[i]))
output[i - 1, 2] <- res$stress
i <- i + 1
} #while
# Order the resulting vector such that the lowest Stress value is at the top
r <- as.numeric(output[, 2])
output <- output[order(r), ]
# Add labels to the columns
labels <- c("Excluded object", "Resulting Stress value")
colnames(output) <- labels
}#ifelse
# Output
return(output)
}
###################################################################################
### lscomb (lowering Stress by finding the 'ideal' combination of points)
###################################################################################
#' Lowering Stress by comparing combinations (lscomb)
#'
#' \code{lscomb} calculates Stress levels for all combinations of p out of
#' n categories
#'
#' This function is applicable to co-occurrence data. It shows the resulting
#' Stress values for all combinations of p out of n categories. The output
#' consists in a table showing which categories have been included and the
#' resulting Stress values. The table is sorted such that the lowest Stress
#' level occurs at the top. The MDS model is computed using the package
#' '\code{smacof}' (Mair, De Leeuw, Borg, & Groenen). The first column of the
#' input matrix \code{data} should contain the ID of the unit of comparison
#' and the following columns the categories for which the similarity matrices
#' and MDS maps are calculated (see also the description of \code{simi}). Note
#' that the purpose of this function is to assist modeling by helping to
#' identify potential problems. It is not, however, meant to be used for
#' excluding categories based solely on measures of fit and without substantial
#' justification.
#'
#' @param data Dataset; the first column must be the ID of the unit of comparison,
#' the other columns must be categories; see dataset requirements \code{simi}.
#' @param points Number of categories that should be included in the model
#' (p < n, wherein n equals the number of categories in the dataset).
#' @param method Specifies the similarity index used to compute the similarity
#' matrix, choose between "\code{as}" (Association Strength Index),
#' "\code{jaccard}" (Jaccard Index), "\code{cosine}" (Cosine Index),
#' and "\code{inclusion}" (Inclusion Index). Default is \code{as}.
#' @param single If \code{TRUE}, single mentionings (i.e. one respondent
#' mentioning just one category) are included when calculating the similarity
#' matrix. Default is \code{TRUE}.
#' @param comments If \code{TRUE}, comments relating to exclusion or possible
#' exclusion of categories and respondents are displayed. Default is
#' \code{FALSE}.
#' @param type Specifies the type of MDS model used (for more details see
#' the package '\code{smacof}' of Mair, De Leeuw, Borg, & Groenen). Default is
#' \code{ordinal}.
#' @param weight If \code{TRUE}, the MDS model is calcualted using weights,
#' i.e., similarities of zero are excluded. Default is \code{TRUE}.
#' @return Matrix showing the categories included and the Stress values of the
#' respective MDS models.
#' @seealso \code{\link[smacof]{smacofSym}} for details on calculating MDS
#' representations, \code{\link[thectar]{simi}} for details on calculating
#' similarity matrices.
#' @export
#' @examples
#' ## Calculate Stress values for 5 out of 7 SDGs
#' data(SDG_coocurrence)
#' SDG_coocurrence <- SDG_coocurrence[,-2] # Drop second column
#' input <- SDG_coocurrence[,1:8] # For computational reasons, we will work
#' # with 7 SDGs.
#' stress <- lscomb(input, points = 5)
#' stress
lscomb <- function(data, points, method = "as", single = TRUE, comments = FALSE, type = "ordinal", weight = TRUE){
if(any(is.na(data))){
stop("The dataset contains missing values.")
}else{
if(!(c(points %% 1) == 0 && points > 0 && points < nrow(data))){
stop("The number of points chosen is either bigger than or equal to the number of objects in data or not a natural number.")
}else{
if(!(method %in% c("as", "jaccard", "cosine", "inclusion"))){
method <- "as"
warning("The parameter 'method' is not properly defined. Default will be used.")
} #if
# Prepare the basic variables
list <- c(1 : c(length(data) - 1)) ### list with all possible elements
comb <- utils::combn(list, points) ### all possible combinations
combination <- matrix(c(data[, 1]), ncol = 1)
colnames(combination) <- c(colnames(data[1]))
output <- matrix(c(rep(0,points + 1)), nrow = 1)
labels <- c(colnames(data[1]))
# Give a warning to the user that the process may take some time
ncomb <- ncol(comb) ### number of different combinations
message(paste0("For ", points, " out of ", c(length(data) - 1), " variables, there are ",
ncomb, " different combinations. It may take a while to calculate the Stress values for all of them."))
# Combute the Stress for all possible combinations and save the combination and the Stress values
i <- 1
j <- 1
while(i < ncol(comb) + 1){
### Compute the new matrix
while(j < nrow(comb) + 1){
combination <- cbind(combination, data[, comb[j,i] + 1])
labels <- cbind(labels, colnames(data[comb[j, i] + 1]))
j <- j + 1
} #while
### Calculate model and save combination and Stress
colnames(combination) <- labels
combination <- as.data.frame(combination)
similarity <- simi(combination, method = method, single = single, comments = comments)
if(weight == TRUE){
w <- as.numeric(similarity < 1e-8)
weight_mat = matrix(1 - w, nrow = nrow(similarity), ncol = ncol(similarity))
dissimilarities <- 1 - similarity
res <- smacof::smacofSym(dissimilarities, type = type, weightmat = weight_mat)
}else{
dissimilarities <- 1 - similarity
res <- smacof::smacofSym(dissimilarities, type = type)
} #ifelse
C1 <- matrix(c(colnames(combination[2:c(nrow(comb) + 1)])), nrow = 1) # what was combined
C2 <- res$stress #stress of the solution
C3 <- cbind(C1, C2) #what was combined and the stress of the solution
output <- rbind(output, C3) #add this to output
i <- i + 1
j <- 1
combination <- data[, 1]
labels <- c(colnames(data[1]))
} #while
# Order the matrix such that the lowest Stress level is at the top
output <- output[-1, ]
output <- as.data.frame(output)
output <- output[order(output[, points+1]), ]
# Add labels to output
i <- 2
labels <- c("Included object 1")
while(i < points + 1){
next_label <- paste("Included object", i)
labels <- cbind(labels, next_label)
i <- i + 1
} #while
labels <- cbind(labels, "Resulting Stress value")
colnames(output) <- labels
} #ifelse
} #ifelse
# Output
return(output)
} #function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.