R/functions_description.r

Defines functions breakdown.variance headings csa.measures supplementary_variable supplementary.categories average.coord variance tab.variable tab.dim individuals contribution balance print.soc.mca

Documented in average.coord balance breakdown.variance contribution csa.measures headings print.soc.mca supplementary.categories variance

#' Print soc.ca objects
#' 
#' Prints commonly used measures used in the analysis of multiple correspondence
#' analysis
#' @param x is a soc.ca class object
#' @param ... further arguments are ignored
#' @return Active dimensions is the number of dimensions remaining after the
#'   reduction of the dimensionality of the analysis.
#' @return Active modalities is the number of modalities that are not set as
#'   passive.
#' @return Share of passive mass is the percentage of the total mass that is
#'   represented by the passive modalities.
#' @return The values represented in the scree plot are the adjusted inertias,
#'   see \link{variance}
#' @return The active variables are represented with their number of active
#'   modalities and their share of the total variance/inertia.
#' @export print.soc.mca
#' @seealso \link{soc.mca}, \link{contribution}
#'   
#' @examples
#' example(soc.ca)
#' print(result)
#' @export

print.soc.mca  <- function(x, ...){
  
  # Help functions
  scree       <- function(x, dim = 6){
    set.dim   <- dim
    dim       <- ifelse((nrow(x$adj.inertia)<dim) == TRUE, nrow(x$adj.inertia), dim)
    adj       <- round(x$adj.inertia[1:dim, 4], digits = 1)
    stars     <- round(round(adj)/2)
    starscree <- vector("list", set.dim)
    for (i in 1:length(stars)){
      starscree[[i]] <- noquote(rep("*", stars[i]))
    }
    return(starscree)
    # x is a soc.ca class object
    # Dim is the number of dimensions included in the plot
  }
    
  Nmodal       <- x$n.mod
  Nsup         <- sum(x$freq.sup != 0)
  Nid          <- x$n.ind
  Share.of.var <- round((x$modal[, "Nb. active modalities"] - 1) / (length(x$names.passive) + Nmodal - nrow(x$modal)), 2) * 100
  Vnames       <- paste(rownames(x$modal), " [", x$modal[, "Nb. active modalities"], " - ", format(Share.of.var), "%]", sep = "")
  Vnames       <- Vnames[order(Share.of.var, decreasing = TRUE)]
  Submass 	   <- 1 - round(sum(x$mass.mod), digits = 2) 
  act.dim 	   <- nrow(x$adj.inertia)
  dim80 		   <- which.min(x$adj.inertia[, 5] < 80)
  scree.dim	   <- 7
  N.pas.mod    <- length(x$names.passive)
  stars 		   <- scree(x, scree.dim)
  adj.dim      <- 1:scree.dim
  
  dim.a        <- ifelse((scree.dim < nrow(x$adj.inertia)), scree.dim, nrow(x$adj.inertia))
  adj          <- vector(mode = "numeric", length = scree.dim)
  adj[1:dim.a] <- x$adj.inertia[1:dim.a, 4]
  adj	         <- paste(formatC(adj,format = "f",digits = 1), sep = "", collide = "%")
  
  ## Output
  # Soc.csa title
  if (inherits(x, "soc.csa") == TRUE) cat(format("Class Specific Multiple Correspondence Analysis:", 	width = 90, justify = "centre"),"\n", "\n")
  # Soc.mca title
  if (inherits(x, "soc.csa") == FALSE) cat(format("Specific Multiple Correspondence Analysis:",   width = 90, justify = "centre"),"\n", "\n")
  
  cat(format("Statistics",  width = 50, justify = "centre"), format("Scree plot", width = 40, justify = "centre"),"\n",
      
      format("	Active dimensions: ", 			width = 40,), format(act.dim, 	width = 10, justify = "right"),
      format("|  1.", width = 10, justify = "centre" ), format(adj[1], width = 10, justify = "centre"), format(paste(stars[[1]]), width = 1), "\n",

      format("	Dimensions explaining 80% of inertia: ",width = 40,), format(dim80, 	width = 10, justify = "right"), 
      format("|  2.", width = 10, justify = "centre" ), format(adj[2], width = 10, justify = "centre"), format(paste(stars[[2]]), width = 1), "\n",
      
      format("	Active modalities: ", 			width = 40,), format(Nmodal, 	width = 10, justify = "right"), 
      format("|  3.", width = 10, justify = "centre" ), format(adj[3], width = 10, justify = "centre"), format(paste(stars[[3]]), width = 1), "\n",
      
      format("	Supplementary modalities: ",		width = 40,), format(Nsup, 	width = 10, justify = "right"), 
      format("|  4.", width = 10, justify = "centre" ), format(adj[4], width = 10, justify = "centre"), format(paste(stars[[4]]), width = 1), "\n",
      
      format("	Individuals: ",		 		width = 40,), format(Nid, 	width = 10, justify = "right"), 
      format("|  5.", width = 10, justify = "centre" ), format(adj[5], width = 10, justify = "centre"), format(paste(stars[[5]]), width = 1), "\n",
      
      format("	Share of passive mass:",	 		width = 40,), format(Submass, 	width = 10, justify = "right"), 
      format("|  6.", width = 10, justify = "centre" ), format(adj[6], width = 10, justify = "centre"), format(paste(stars[[6]]), width = 1), "\n",
      
      format("\tNumber of passive modalities:",	 		width = 40, ), format(N.pas.mod, 	width = 10, justify = "right"), 
      format("|  7.", width = 10, justify = "centre" ), format(adj[7], width = 10, justify = "centre"), format(paste(stars[[7]]), width = 1), "\n",
      
      "\n",
      format(paste("The", length(Vnames),"active variables: [No. modalities - share of variance]"), 			width = 100, justify = "centre" ),
      "\n",
      "\n",
      sep = "")
  
  Vnames <- Vnames[order(Share.of.var, decreasing = TRUE)]
  
  if(length(Vnames) > 20) Vnames <- c(head(Vnames, 20), "[...]")
  
  cat(format(Vnames, width = 25, justify = "right"), fill = 100)
}

#' Contribution balance
#' 
#' Calculates the balance of the contribution of each dimension. This measure
#' indicates whether too much of a dimensions contribution is placed on either
#' the + or - side of the dimension.
#' @param object is a soc.ca class object
#' @param act.dim is the number of active dimensions to be measured
#' @return A matrix with the share of contribution on each side of 0 and their
#'   balance (+/-)
#' @seealso \link{soc.mca}, \link{contribution}
#' @examples
#' example(soc.ca)
#' balance(result)
#' balance(result, act.dim = 3)
#' @export

balance   <- function(object, act.dim = object$nd){
  coord   <- object$coord.mod[, 1:act.dim]
  contrib <- object$ctr.mod[, 1:act.dim]
  pm      <- matrix(, nrow = act.dim, ncol = 3)
  for (i in 1:act.dim){
    temp  <- cbind(coord[, i], contrib[, i])
    temp  <- temp[order(temp[, 1]), ]
    plus  <- temp[which(temp[, 1] >= 0), ]
    minus <- temp[which(temp[, 1] <= 0), ]
    pcontrib <- sum(plus[, 2])
    mcontrib <- sum(minus[, 2])
    pm[i, 1] <- pcontrib
    pm[i, 2] <- mcontrib
    pm[i, 3] <- pcontrib/mcontrib
  }
  colnames(pm) <- c("+ Contrib.", "- Contrib.", "Balance (+/-)")
  return(pm)
}

#' Summaries of contribution values
#' 
#' Different forms of contribution summaries for \link{soc.ca} objects. Results
#' are presented according to the specified \code{mode}
#' @param object a \link{soc.ca} object
#' @param dim the included dimensions
#' @param all If TRUE returns all modalities instead of just those that
#'   contribute above average
#' @param indices If TRUE; returns a vector with the row indices of the
#'   modalities or individuals
#' @param mode indicates which form of output. Possible values: \code{"sort"},
#'   \code{"mod"}, \code{"ind"}, \code{"variable"}. If the mode is
#'   \code{"variable"}, \code{dim} can be a sequence of dimensions: \code{1:5}
#' @param matrix.output if TRUE; returns output as a matrix instead of as printed output.
#' @return Each mode prints different results:
#' @return   \item{"mod"}{Ranks all modalities according to their contribution}
#'   \item{"sort"}{Ranks all modalities according to their contribution and then sorts them according to their coordinates}
#'   \item{"ind"}{Ranks all individuals according to their contribution}
#'   \item{"variable"}{Sorts all modalities according to their variable and sums the contributions per variable}
#' @return The values reported:
#' \item{Ctr}{Contribution values in percentage. Contribution values for individuals are reported in permille}
#' \item{Coord}{Principal coordinates}
#' \item{Cor}{The correlation with the dimension}
#' @seealso \link{map.ctr}
#' @examples
#' 
#' example(soc.ca)
#' contribution(result)
#' contribution(result, 2)
#' contribution(result, dim = 3, all = TRUE)
#' contribution(result, indices = TRUE)
#' contribution(result, 1:2, mode = "variable")
#' @export

contribution <- function(object, dim = 1, all = FALSE, indices = FALSE, mode = "sort", matrix.output = FALSE){
  
  if (indices == TRUE & mode == "mod"){
    ctr     <- object$ctr.mod[,dim]
    av.ctr  <- as.vector(apply(as.matrix(ctr), 2, function(x) which(x >= mean(x, na.rm = TRUE))))
    if (is.list(av.ctr) == TRUE) av.ctr  <- unlist(av.ctr[dim], use.names = FALSE)
    av.ctr  <- av.ctr[duplicated(av.ctr) == FALSE]    
    return(av.ctr)
  }
    
  # Modalities
  if (identical(mode, "mod")){
    if (length(dim) > 1 ) stop("This mode does not support more than 1 dimension")
    ctr     <- round(100 * object$ctr.mod[, dim], digits = 1)
    cor     <- round(100 * object$cor.mod[, dim], digits = 1)
    coord   <- round(object$coord.mod[, dim], digits = 2)
    names   <- object$names.mod
    if (identical(all, FALSE) == TRUE){
      av.ctr <- contribution(object, dim = dim, indices = TRUE, mode = mode)    
      header <- paste("The modalities contributing above average to dimension: ", dim, ".", sep = "")
    }
    if (identical(all, TRUE) == TRUE){
      av.ctr <- 1:length(ctr)
      header <- paste("The contribution of all modalities to dimension: ", dim, ".", sep = "")
    }
    
    out           <- data.frame(ctr[av.ctr], cor[av.ctr], coord[av.ctr])
    rownames(out) <- names[av.ctr]
    colnames(out) <- c("   Ctr.", "   Cor." , "   Coord")
    out           <- out[order(-out[, 1]), ]
    maxwidth      <- max(nchar(names)) + sum(nchar(colnames(out)))
    
    cat("\n", format(header, width = maxwidth, justify = "centre"), "\n", "\n")
    print(out)
  }
  
  # Individuals  
  if (identical(mode, "ind")){
    individuals(object, dim, indices = indices, all = FALSE)
  }
  # Side sorted modalities
  if (identical(mode, "sort")){
    if(length(dim) > 1 ) stop("Sort mode does not support more than 1 dimension")
    tab.dim(object, dim)
  }
  # Variables
  if (identical(mode, "variable")){
    tab.variable(object, dim)
  }
}

# ' The most contributing individuals
# ' 
# ' Returns the individuals with above average contribution to the selected dimension
# ' @param object is a soc.ca object
# ' @param dim is the included dimensions
# ' @param all: If TRUE returns all individuals instead of just those that contribute above average
# ' @param ind.indices: If TRUE returns a vector with the row indices of the individuals
# ' @return Ctr is the contribution in 1000
# ' @return Cor is the correlation with the dimension
# ' @return Coord is the principal coordinate
# ' @seealso \link{tab.dim}, \link{soc.mca}, \link{contribution}, \link{p.id}
# ' @export

individuals <- function(object, dim = 1, all = FALSE, indices = FALSE){
  
  if (identical(indices, TRUE) == TRUE){
    ctr        <- object$ctr.ind[,dim]
    av.ctr     <- as.vector(apply(as.matrix(ctr), 2, function(x) which(x >= mean(x, na.rm = TRUE))))
    if(is.list(av.ctr) == TRUE) av.ctr  <- unlist(av.ctr[dim], use.names = FALSE)
    av.ctr     <- unique(av.ctr)
    return(av.ctr)
  }else{
    ctr        <- object$ctr.ind[, dim]
    ctr.round  <- round(100 * object$ctr.ind[, dim], 2)
    coord      <- round(object$coord.ind[, dim], 2)
    names      <- object$names.ind
    if (identical(all, FALSE) == TRUE){
      av.ctr   <- individuals(object, dim = dim, indices = TRUE, all = FALSE)    
      header   <- paste("The individuals contributing above average to dimension: ",
                        paste(dim, collapse = ", "), ".", sep = "")
    }
    if (identical(all, TRUE) == TRUE){
      av.ctr   <- 1:length(ctr)
      header   <- paste("The contribution of all individuals to dimension: ",
                        paste(dim, collapse = ", "), ".", sep = "")
    }
    
    out        <- data.frame(ctr.round, coord)[av.ctr, ]
    rownames(out) <- names[av.ctr]
    colnames(out) <- c(paste("   Ctr.", dim), paste("   Coord.", dim))
    out        <- out[order(-out[, 1]), ]
    
    maxwidth   <- max(nchar(names)) + sum(nchar(colnames(out)))
    cat("\n", format(header, width = maxwidth, justify = "centre"), "\n", "\n")
    print(out)
  }

}

# ' The most contributing modalities according to direction on dimension
# ' 
# ' Gives the most contributing modalities sorted according to direction on dimension
# ' @param x is a soc.ca object
# ' @param dim is the dimension
# ' @param label.plus is the label of the dimensions plus side
# ' @param label.minus is the label of the dimensions minus side
# ' @param all defines whether all modalities are to be printed
# ' @seealso \link{contribution}, \link{soc.mca}, \link{p.ctr}
# ' @examples
# ' example(soc.ca)
# ' tab.dim(result, 2)
# ' tab.dim(result, 2, label.plus = "Technical capital", label.minus = "Organizational capital")
# ' @export


tab.dim <- function(x, dim = 1, label.plus = NULL, label.minus = NULL, all = FALSE){
  
  if (identical(label.plus, NULL) == TRUE){
    label.plus    <- paste("Dimension ", dim ,". (+)", sep = "")
  }
  
  if (identical(label.minus, NULL) == TRUE){
    label.minus   <- paste("Dimension ", dim ,". (-)", sep = "")
  }
  
  ctr             <- round(100 * x$ctr.mod[, dim], digits = 1)
  coord           <- round(x$coord.mod[, dim], digits = 2)
  names           <- x$names.mod
  
  if (identical(all, FALSE) == TRUE){
    av.ctr        <- contribution(x, dim = dim, indices = TRUE, mode = "mod")    
  }
  if (identical(all, TRUE) == TRUE){
    av.ctr        <- seq(x$n.mod)
  }
  
  out             <- data.frame(ctr[av.ctr], coord[av.ctr])
  names           <- names[av.ctr]
  maxwidth        <- max(nchar(names))
  
  for (i in seq(names)){
    width         <- maxwidth-nchar(names[i])
    fill          <- paste(rep(" ", width), sep = "", collapse = "")
    names[i]      <- paste(names[i], fill, sep = "", collapse = "")
  }
  rownames(out)   <- names
  ctr.lab         <- paste("Ctr")
  coord.lab       <- paste("Coord")
  colnames(out)   <- c(ctr.lab, coord.lab)
  out             <- out[order(-out[, 1]), ]
  out.label       <- c(ctr.lab, coord.lab)
  outminus        <- out[which(out[, 2] <= 0), ]
  outplus         <- out[which(out[, 2] >= 0), ]
  
  cat("\n", format(label.plus, width = maxwidth, justify = "centre"), "\n")
  print(format(outplus, justify = "centre", width = 8))
  cat("\n", format(label.minus, width = maxwidth, justify = "centre"), "\n")
  print(format(outminus, justify = "centre", width = 8))
  
  }

# ' Contribution per variabel
# ' 
# ' tab.variable returns the contribution values of all modalities ordered by variable
# ' 
# ' @param object is a soc.ca object
# ' @param dim is the included dimensions. The default is 1:3
# ' @param If sup = TRUE the coordinates of the supplementary variables are given instead
# ' @return If assigned using <- tab.variable returns a list of matrixes with the contribution values
# ' @return The returned list is a tab.variable class object and can be exported with the \link{export} function included in the soc.mca package.  
# ' @seealso \link{export}, \link{contribution}
# ' @examples
# ' example(soc.ca)
# ' tab.variable(result)
# ' tab.variable(result, dim = c(1, 3))
# ' tab.variable(result, sup = TRUE)
# ' @export

tab.variable    <- function(object, dim = 1:3, sup = FALSE){
    variable    <- as.factor(object$variable)
    ctr.mod     <- as.matrix(object$ctr.mod[, dim])
    lev.var     <- levels(variable)
    names.mod   <- object$names.mod
    freq.mod    <- object$freq.mod
    
    var.list    <- list()
    for (i in seq(length(lev.var))){
        var.ctr           <- round(ctr.mod[variable == lev.var[i], ] * 100, digits = 1)
        var.ctr           <- cbind(var.ctr, freq.mod[variable == lev.var[i]])
        var.ctr           <- rbind(var.ctr, colSums(var.ctr))
        rownames(var.ctr) <- c(names.mod[variable == lev.var[i]], "Total")
        colnames(var.ctr) <- c(paste(" Dim.", dim, sep = ""), "  Freq")
        
        var.list[[paste(lev.var[i])]] <- var.ctr
    }
    
    ### Supplementary modalities
    
    if (identical(sup, TRUE)){
      
      variable    <- as.factor(object$variable.sup)
      coord.sup   <- object$coord.sup[, dim]
      lev.var     <- levels(variable)
      names.mod   <- object$names.sup
      freq.mod    <- object$freq.sup
      
      var.list    <- list()
      for (i in seq(length(lev.var))){
        var.ctr           <- round(coord.sup[variable == lev.var[i], ], digits = 2)
        var.ctr           <- cbind(var.ctr, freq.mod[variable == lev.var[i]])
        rownames(var.ctr) <- c(names.mod[variable == lev.var[i]])
        colnames(var.ctr) <- c(paste(" Dim.", dim, sep = ""), "  Freq")
        
        var.list[[paste(lev.var[i])]] <- var.ctr
      }
          
      
    }
    
    # The printing
    
    av.ctr      <- round(100/object$n.mod, digits = 1)
    maxwidth    <- max(nchar(names.mod))
    l           <- ncol(var.ctr)
        
    if (identical(sup, FALSE)) cat("The contribution of the active variables")
    if (identical(sup, TRUE))  cat("The coordinates of the supplementary variables")
    
    # Beautiful printing!
    for (i in seq(length(lev.var))){
        var.ctr <- var.list[[i]]
        cat("\n", "\n", format(lev.var[i], width = maxwidth), colnames(var.ctr))
        
        for (q in seq(nrow(var.ctr))){
            cat("\n", format(rownames(var.ctr)[q], width = maxwidth),
                format(var.ctr[q, -l], width = 6), format(var.ctr[q, length(dim) + 1], width = 6, drop0trailing = TRUE))
        }
        
    }
    
    
    if (identical(sup, FALSE)) cat("\n", "Average contribution per modality: ", av.ctr, sep = "")
    cat("\n", "Total number of individuals: ", object$n.ind, sep = "")
    
    class(var.list) <- "tab.variable"
    invisible(var.list)
}


#' Variance table
#' 
#' variance returns a table of variance for the selected dimensions.
#' 
#' @param object is a soc.ca object
#' @param dim is the included dimensions, if set to NULL, then only the
#'   dimensions explaining approx. more than 0.90 of the adjusted variance are included
#' @return If assigned variance returns a matrix version of the table
#'   of variance.
#' @seealso \link{soc.mca}, \link{print.soc.mca}
#' @examples
#' example(soc.ca)
#' variance(result)
#' variance(result, dim = 1:4)
#' @export

variance    <- function(object, dim = NULL){
    
    variance <- object$adj.inertia
    if (identical(dim, NULL) == TRUE){
        dim  <- variance[, 5] <= 91
    }
    variance <- t(variance[dim, ])
    line.dim <- paste(1:ncol(variance), ".", sep = "")
    cat("\n", "Dim        ", format(line.dim, width = 6), sep = "")
    cat("\n", "Eigen    ", format(round(variance[2, ], 2), width = 6), sep = "")
    cat("\n", "Var     ", format(round(variance[3, ], 2), width = 6), sep = "")
    cat("\n", "Adj.Var ", format(round(variance[4, ], 1), width = 6), sep = "")
    cat("\n", "Cum %   ", format(round(variance[5, ], 1), width = 6), sep = "")
    
    invisible(variance)
}



#' Average coordinates
#' 
#' Find the average coordinates for each category in a variable on two dimensions.
#' 
#' @param object is soc.ca result object
#' @param x is a variable of the same length and order as the active variables used to construct the soc.ca object
#' @param dim is the two dimensions used
#' @return a matrix with the mean points and frequencies of the given variable
#' @export
#' @examples
#' example(soc.ca)
#' average.coord(result, sup$Income)

average.coord <- function(object, x, dim = c(1, 2)){
  
  coord             <- object$coord.ind[, dim]
  point.coord       <- aggregate(coord, list(x), mean)
  point.coord[, 2]  <- point.coord[, 2] / sqrt(object$eigen[dim[1]])
  point.coord[, 3]  <- point.coord[, 3] / sqrt(object$eigen[dim[2]])
  point.coord       <- cbind(point.coord, table(x)) 
  
  colnames(point.coord) <- c("label", "X", "Y", "group", "Freq")
  point.coord
}

#' Supplementary coordinates for a data.frame of factors
#'
#' Calculate the average coordinates in the category cloud of a soc.mca analysis.
#' 
#' @param object a soc.mca result object
#' @param sup a data.frame of factors or an indicator matrix
#' @param dim a numeric vector with the two dimensions calculated
#'
#' @return a data.frame with coordinates and labels
#' @export
#'
#' @examples
#' example(soc.mca)
#' supplementary.categories(result, sup)
supplementary.categories <- function(object, sup, dim = 1:2){
  
  sup.is <- soc.ca:::what.is.x(sup)
  object.is <- soc.ca:::what.is.x(object)
  
  
  # If sup is an indicator
  if(sup.is == "indicator"){
    
    coord <- object$coord.ind[, dim]
    ind.x <- sup * coord[, 1] 
    ind.y <- sup * coord[, 2]
    ind.x[ind.x == 0] <- NA
    ind.y[ind.y == 0] <- NA
    
    cmx   <- colMeans(ind.x, na.rm = TRUE) / sqrt(object$eigen[dim[1]])  
    cmy   <- colMeans(ind.y, na.rm = TRUE) / sqrt(object$eigen[dim[2]])
    
    # Variable names
    
    var.names <- attr(colnames(sup), "variable")
    if(length(var.names) != ncol(sup)) var.names <- NULL
    if(is.null(var.names)) var.names <- colnames(sup) %>% strsplit(": ") %>% map_chr(~head(.x, 1))
    
    labels <- names(colnames(sup))
    if(is.null(labels)) labels <- colnames(sup) %>% strsplit(": ") %>% map_chr(~tail(.x, 1))
    
    out <- tibble(Variable = var.names, label = labels, X = cmx, Y = cmy, Frequency = colSums(sup), Modality = colnames(sup))
    return(out)
  }
  
  sup <- sup %>% tibble()
  # Add tests
  out   <- map(sup, ~average.coord(object, .x, dim)) %>% bind_rows(.id = "Variable")
  out   <- out %>% select(Variable, label = label, X, Y, Frequency = Freq) %>% tibble() %>% mutate(Modality = paste0(Variable, ": ", label))
  out
}


supplementary_variable <- function(res, var) {
  
  eigen    <- res$eigen
  if(length(eigen) != ncol(res$coord.ind)){
  eigen    <- res$eigen[1:ncol(res$coord.ind)]
  warning("Your soc.ca object has been trimmed prior to this function. This may influence calculations")
  }
  
  weight   <- res$weight
  variable <- factor(var)
  n  <- sum(weight)
  
  FK <- colSums(weight * (indicator(as.data.frame(variable))))/n
  ind <- res$coord.ind
  coord <- aggregate(weight * ind, list(variable), sum)[, -1]/n/FK
  vrc   <- aggregate(weight * ind * ind, list(variable), sum)[, -1]/n/FK - coord * coord
  
  for (i in 1:ncol(coord)) coord[, i] <- coord[, i]/ sqrt(eigen[i])
  cos2 <- coord * coord/((1/FK) - 1)
  freq = n * FK
  
  s <- 1:length(eigen)
  
  names(freq) <- levels(variable)
  rownames(coord) <- levels(variable)
  rownames(cos2) <- levels(variable)
  wi <- apply(vrc, 2, weighted.mean, w = freq)
  be <- eigen - wi[s]
  eta2 <- be/eigen[s]
  
  vrc <- rbind(vrc[s], wi[s], be, eigen[1:length(be)], eta2)
  vrc <- round(vrc, 6)
  
  rownames(vrc) <- c(levels(variable), "within", "between", "total", 
                     "eta2")
  coord <- round(coord, 6)
  typic <- sqrt(cos2) * sqrt(length(variable) - 1)
  typic <- (((abs(coord) + coord)/coord) - 1) * typic
  pval <- 2 * (1 - pnorm(abs(as.matrix(typic))))
  list(freq = round(freq, 1), coord = coord, cos2 = round(cos2, 
                                                          6), var = round(vrc, 6), typic = round(typic, 6), pval = round(pval, 
                                                                                                                         6))
}



#' CSA measures
#' 
#' Several measures for the evaluation of the relations between the dimensions of the CSA and the dimensions the of original MCA
#' 
#' @param csa.object is a "soc.csa" class object created by the \link{soc.csa} function
#' @param correlations if TRUE correlations calculated by the \link{cor} function is returned
#' @param cosines if TRUE cosine similarities are returned
#' @param cosine.angles if TRUE angles are calculated in the basis of the cosine values
#' @param dim.csa the dimensions included from the csa
#' @param dim.mca the dimensions included from the original mca
#' @param format if TRUE results are formatted, rounded and printed for screen reading, if FALSE the raw numbers are returned
#' @param ... furhter arguments are send to the \link{cor} function
#' @return A list of measures in either formatted or raw form.
#' @export
#' @examples
#' example(soc.csa)
#' csa.measures(res.csa)
#' csa.measures(res.csa, correlations = FALSE, cosine.angles = FALSE, dim.mca = 1:5, format = FALSE)

csa.measures      <- function(csa.object, correlations = FALSE, cosines = TRUE, cosine.angles = TRUE,
                               dim.mca = 1:5, dim.csa= 1:5, format = TRUE, ...){
  
  csca.coord      <- csa.object$coord.ind
  object          <- csa.object$original.result
  class.indicator <- csa.object$original.class.indicator
  ca.coord        <- object$coord.ind[class.indicator, ]
  csca.coord      <- csca.coord[, dim.mca]
  ca.coord        <- ca.coord[, dim.csa]
  
  ##################################
  # Correlations
  cor.mat             <- cor(csca.coord, ca.coord, ...)
  rownames(cor.mat)   <- paste("CSA:", dim.mca)
  colnames(cor.mat)   <- paste("MCA:", dim.csa)
  
  ####################################
  # Cosines similarity
  cosine.similarity      <- function(x, y) x %*% y / sqrt(x %*% x * y %*% y)
  cosine.mat             <- matrix(ncol = ncol(ca.coord), nrow = ncol(csca.coord))
  rownames(cosine.mat)   <- paste("CSA:", dim.mca)
  colnames(cosine.mat)   <- paste("MCA:", dim.csa)
  
  for (i in 1:ncol(csca.coord)){
    cosine.mat[i,]       <- apply(ca.coord, 2, cosine.similarity, csca.coord[, i])
  }
  
  cosine.mat <- cbind(cosine.mat, 
                      sqrt(cosine.mat[,1]^2 + cosine.mat[,2]^2), 
                      sqrt(cosine.mat[,1]^2 + cosine.mat[,3]^2),
                      sqrt(cosine.mat[,1]^2 + cosine.mat[,4]^2),
                      sqrt(cosine.mat[,1]^2 + cosine.mat[,5]^2),
                      sqrt(cosine.mat[,2]^2 + cosine.mat[,3]^2),
                      sqrt(cosine.mat[,2]^2 + cosine.mat[,4]^2),
                      sqrt(cosine.mat[,2]^2 + cosine.mat[,5]^2),
                      sqrt(cosine.mat[,3]^2 + cosine.mat[,4]^2),
                      sqrt(cosine.mat[,3]^2 + cosine.mat[,5]^2),
                      sqrt(cosine.mat[,4]^2 + cosine.mat[,5]^2),
                      sqrt(cosine.mat[,1]^2 + cosine.mat[,2]^2 + cosine.mat[,3]^2),
                      sqrt(cosine.mat[,2]^2 + cosine.mat[,3]^2 + cosine.mat[,4]^2),
                      sqrt(cosine.mat[,3]^2 + cosine.mat[,4]^2 + cosine.mat[,5]^2))
  
  colnames(cosine.mat)[length(dim.csa)+1]  <- "MCA: 1&2"
  colnames(cosine.mat)[length(dim.csa)+2]  <- "MCA: 1&3"
  colnames(cosine.mat)[length(dim.csa)+3]  <- "MCA: 1&4"
  colnames(cosine.mat)[length(dim.csa)+4]  <- "MCA: 1&5"
  colnames(cosine.mat)[length(dim.csa)+5]  <- "MCA: 2&3"
  colnames(cosine.mat)[length(dim.csa)+6]  <- "MCA: 2&4"
  colnames(cosine.mat)[length(dim.csa)+7]  <- "MCA: 2&5"
  colnames(cosine.mat)[length(dim.csa)+8]  <- "MCA: 3&4"
  colnames(cosine.mat)[length(dim.csa)+9]  <- "MCA: 3&5"
  colnames(cosine.mat)[length(dim.csa)+10] <- "MCA: 4&5"
  colnames(cosine.mat)[length(dim.csa)+11] <- "MCA: 1&2&3"
  colnames(cosine.mat)[length(dim.csa)+12] <- "MCA: 2&3&4"
  colnames(cosine.mat)[length(dim.csa)+13] <- "MCA: 3&4&5"
  #View(cosine.mat)
  #####################################
  # Angles
  cosine.to.angle       <- function(x) acos(abs(x))/pi * 180 
  angles                <- suppressWarnings(cosine.to.angle(cosine.mat))
  #View(angles)
  ####################################
  # Out.list
  out.list              <- list() 
  if (cosines == TRUE)        out.list$cosines      <- cosine.mat
  if (cosine.angles == TRUE)  out.list$angles       <- angles
  if (correlations == TRUE)   out.list$cor          <- cor.mat
  
  if (identical(format, FALSE)) return(out.list)
  
  ###################################
  # Formatted output
  if (identical(format, TRUE)){
    
    cat("\n", format("Measures for Class Specific Multiple Correspondence Analysis:",
                     width = 90, justify = "centre"), "\n", "\n")
    
    #############
    # Cosines
    if (cosines == TRUE){
      
      cat("\n", format("Cosine similarity:",   width = 10, justify = "right"), "\n", "\n")
      cosine.fat           <- cosine.mat
      rownames(cosine.fat) <- format(rownames(cosine.fat), width = 10, justify = "centre")
      colnames(cosine.fat) <- format(colnames(cosine.fat), width = 8, justify = "right")
      cosine.fat           <- format(cosine.fat, width = 8, justify = "left")
      print(noquote(cosine.fat))
      cat("\n", "\n")
    }
    
    #############
    # Angles
    if (cosine.angles == TRUE){
      cat("\n", format("Cosine angles:",   width = 10, justify = "right"), "\n", "\n")
      
      angles.fat           <- round(angles, 1) 
      rownames(angles.fat) <- format(rownames(angles.fat), width = 10, justify = "centre")
      colnames(angles.fat) <- format(colnames(angles.fat), width = 8, justify = "right")
      angles.fat           <- format(angles.fat, width = 8, justify = "left")
      print(noquote(angles.fat))
      cat("\n", "\n") 
    }
    
    ##############
    # Correlations
    if(correlations == TRUE){
      cat("\n", format("Correlations:",   width = 10, justify = "right"), "\n", "\n")
      cor.fat           <- round(cor.mat, 2) 
      rownames(cor.fat) <- format(rownames(cor.mat), width = 10, justify = "centre")
      colnames(cor.fat) <- format(colnames(cor.mat), width = 8, justify = "right")
      cor.fat           <- format(cor.fat, width = 8, justify = "left")
      print(noquote(cor.fat))
      cat("\n", "\n")
    }
  }
}


#' Calculate contributions per heading
#'
#' @param object a soc.ca object with headings
#' @param dim a numeric vector with the dimensions
#'
#' @return a matrix
#' @export
#'
#' @examples
#' data(taste)
#' active.headings <- list()
#' active.headings$Consumption <- na.omit(taste)[, c("TV", "Film", "Art", "Eat")]
#' active.headings$Background  <- na.omit(taste)[, c("Gender", "Age", "Income")]
#' result.headings <- soc.mca(active.headings)
#' headings(result.headings)

headings      <- function(object, dim = 1:3) {
  if(identical(object$headings, NULL) == TRUE) stop("You have defined no headings")
  
  headings    <- object$headings
  lev.head    <- unique(headings)
  variable    <- object$variable
#  lev.var     <- unique(variable)
  head.var    <- cbind(object$headings, object$variable)
  head.var    <- head.var[!duplicated(head.var[,2]),]
  
  head.ctr.total <- rep(1, (max(dim) + 3))
  
  tot     <- tibble(ctr = rowSums(object$ctr.mod.raw), heading = object$headings %>% fct() %>% fct_relevel(lev.head)) %>% group_by(heading) %>% summarise(x = sum(ctr))
  tot$x  <- round(tot$x / sum(tot$x) * 100,1)
  # tot    <- aggregate(rowSums(object$ctr.mod.raw), by = list(object$headings), sum)
  # tot$x  <- round(tot$x / sum(tot$x) * 100,1)
  
  for (i in seq(length(lev.head))) {
    var.under.head     <- head.var[which(head.var[,1] == lev.head[i]),2]
    head.ctr           <- object$ctr.mod[which(variable %in% var.under.head),dim]
    head.ctr.total2    <- colSums(head.ctr)
    head.ctr.total2    <- c(length(var.under.head), nrow(head.ctr), tot$x[i], round(head.ctr.total2*100,1))
    head.ctr.total     <- rbind(head.ctr.total, head.ctr.total2)
  }
  
  head.ctr.total <- head.ctr.total[-1,]
  rownames(head.ctr.total) <- lev.head
  colnames(head.ctr.total) <- c("Variables", "Active Modalities", "Ctr. total",  paste("Ctr. dim: ", dim, sep = "")) 
  head.ctr.total
}


#' Breakdown of variance by group
#' 
#' Defining a partition of the cloud of individuals into groups, one can calculate the midpoints of the various groups. 
#' The total variance of the cloud of individuals can then be broken down to between–within variances, i.e. variance between the groups partitioning the cloud, and variance within the groups
#' The ratio of the between-variance to the total variance is denoted by n2 (eta-square), and accounts for the percentage of variance 'explained' by the group-variable.
#' (see Le Roux & Rouanet 2010, p. 20ff, 69, 114)
#' 
#' @param object is a soc.ca class object
#' @param dim the dimensions
#' @param variable a factor in the same length and order as the active variables
#'
#' @return a matrix
#' @references Le Roux, Brigitte, and Henry Rouanet. 2010. Multiple Correspondence Analysis. Thousand Oaks, Calif.: Sage Publications.
#' @export
#' @examples
#' example(soc.ca)
#' breakdown.variance(result, dim = 1:3, variable = sup$Gender)

breakdown.variance          <- function(object, dim = 1:3, variable) {

  if (anyNA(variable)) stop(substitute(variable), " contains NA - convert to missing")
  
  s          <- levels(variable)
  coords     <- matrix(NA, nrow = length(s), ncol = length(dim))
  var        <- matrix(NA, nrow = length(s), ncol = length(dim))
  weights    <- matrix(NA, nrow = length(s), ncol = 2)
  
  for (i in 1:length(s)) {
    weights[i,1] <- nrow(object$coord.ind[variable == s[i], dim]) 
    weights[i,2] <- nrow(object$coord.ind[variable == s[i], dim]) / length(variable)
  }
  
  for (i in 1:length(s)) {
    coords[i, 1:(length(dim))] <- colSums(object$coord.ind[variable == s[i], dim] / sum(variable == s[i]))
    
  }
  rownames(coords) <- s
  
  var        <- matrix(NA, nrow = length(s), ncol = length(dim))
  
  for (j in 1:length(dim)) {
    for (i in 1:length(s)) {
      var[i,j] <- sum((object$coord.ind[variable == s[i],j] - coords[i,j])^2)/sum(variable == s[i])
    }}
  
  within      <- colSums(weights[,2]*var)
  bet         <- colSums(coords[,dim]^2 * weights[,2])
  total       <- within + bet
  n2          <- bet / total
  variance    <- rbind(var, within, bet, total, n2)
  weights[,2] <- round(100*weights[,2], 1)
  meanpoints  <- rbind(cbind(weights, round(coords,3)), matrix(NA, ncol = ncol(weights) + ncol(coords), nrow= 4))
  
  out <- cbind(meanpoints, round(variance,4))
  
  rownames(out) <- c(s, "Within", "Between", "Total", "Correlation ratio")
  colnames(out) <- c("freq", "rel.freq", paste("Meanpoint: Axis ", dim), paste("Variance: Axis ", dim))
  out
  
}
Rsoc/soc.ca documentation built on March 7, 2024, 8:59 p.m.