R/k-circles.R

Defines functions print.k.circles level.summary level.membership merge.perfect.overlap k.circles minimal.members.decomposition

Documented in k.circles level.membership minimal.members.decomposition

#' Minimal members decomposition
#'
#' @param incidence a sparse incidence matrix
#' @param minimum.memberships the minimum number of memberships for the individuals (rows)
#'
#' @return a list of incidence matrices
#' @export
#'
#' @examples
#' data(den)
#' den         <- den[den$SOURCE != "Events",]
#' incidence   <- xtabs(~NAME + AFFILIATION, droplevels(den), sparse = TRUE)
#' l.inc       <- minimal.members.decomposition(incidence, 3)
#' level.membership(l.inc)
#' l.inc[[5]] %>% colSums() %>% sort() %>% as.matrix()
#' l.inc[[5]] %>% rowSums() %>% sort() %>% as.matrix()

minimal.members.decomposition <- function(incidence, minimum.memberships = 3, check.for.nested = TRUE){
  
  
  ##############
  # Tests
  
  
  # Is it a sparse matrix?
  if(!inherits(incidence, "dgCMatrix")) stop("incidence has to be a sparse matrix of class dgCMatrix. With xtabs you can set sparse to TRUE and get a valid matrix.")
  
  # Check for multiple memberships and odd values
  if(any(incidence@x != 1)) warning("incidence has values other than 1 and . (the sparse version of 0). Try table(incidence@x) to see them.")
  
  # Backup of the incidence matrix
  inc         <- incidence
  
  # k is the minimum number of members
  k  <- 1
  
  # l.inc is a list of incidence matrices
  l.inc       <- list()
  
  # j is the minimum number of memberships for any individual
  level.up <- function(inc, k, j = 3, check.for.nested = TRUE){
    
    test.mat <- function(inc, j, k){
      
      cond                       <- !is.null(dim(inc))
      if(cond){
        cond              <- any(
          c(
            any(Matrix::rowSums(inc) < j), # Is there any individuals with less than j positions
            any(Matrix::colSums(inc) < k)  # Is there any affiliations with less than k members
          ))
      }
      cond
    } 
    
    # Levelling up
    while(test.mat(inc, j, k)){
      # Removing rows
      inc.t       <- inc[Matrix::rowSums(inc) >= j, ]
      if(is.null(dim(inc.t))) break
      inc         <- inc.t  # Keep only those members with j or more positions
      
      # Removing columns
      inc.t       <- inc[, Matrix::colSums(inc) >= k]  # Keep only those affiliations with more than k members
      if(is.null(dim(inc.t))) break
      inc         <- inc.t
      
      # Merging overlapping affiliations
      if(identical(check.for.nested, TRUE)){
        inc         <- merge.perfect.overlap(inc, combine.labels = "&")
      }
      
    }
    inc
  }
  
  while(
    k <= suppressWarnings(min(Matrix::colSums(inc))) & ncol(inc) > minimum.memberships # While k is smaller than the lowest number of members and the number of affiliations is larger than the minimum number of memberships
  ){
    k           <- k + 1
    tmp         <- level.up(inc, k, j = minimum.memberships, check.for.nested = check.for.nested)
    inc         <- tmp
    #if(identical(duplicate.check, TRUE)) inc <- unique.matrix(inc, MARGIN = 2)
    l.inc[[k]]  <- inc
    
  }
  
  # It gives us an annoying warning because level.up doesn't use a proper test of whether the inc is valid for further operation
  
  l.inc  <- c(incidence, l.inc)
  
  compact(l.inc)
  
}


#' K-circles decomposition
#'
#' @param incidence a sparse incidence matrix
#' @param minimum.memberships the minimum number of memberships for the individuals (rows)
#'
#' @return an object of class "k.circles"
#' @export
#'
#' @examples
#' data(den)
#' den         <- den[den$SOURCE != "Events",]
#' incidence   <- xtabs(~NAME + AFFILIATION, droplevels(den), sparse = TRUE)
#' l.inc       <- k.circles(incidence, 3, check.for.nested = TRUE)
#' level.membership(l.inc)
#' l.inc[[5]] %>% colSums() %>% sort() %>% as.matrix()
#' l.inc[[5]] %>% rowSums() %>% sort() %>% as.matrix()


k.circles <- function(incidence, minimum.memberships = 3, check.for.nested = TRUE){
  
  
  ##############
  # Tests
  
  
  # Is it a sparse matrix?
  if(!inherits(incidence, "dgCMatrix")) stop("incidence has to be a sparse matrix of class dgCMatrix. With xtabs you can set sparse to TRUE and get a valid matrix.")
  
  # Check for multiple memberships and odd values
  if(any(incidence@x != 1)) warning("incidence has values other than 1 and . (the sparse version of 0). Try table(incidence@x) to see them.")
  
  # Backup of the incidence matrix
  original.colnames           <- colnames(incidence)
  colnames(incidence)         <- 1:ncol(incidence)
  
  inc         <- incidence
  
  
  
  # k is the minimum number of members
  k  <- 1
  
  # l.inc is a list of incidence matrices
  l.inc       <- list()
  
  # j is the minimum number of memberships for any individual
  level.up <- function(inc, k, j = 3, check.for.nested = TRUE){
    
    test.mat <- function(inc, j, k){
      
      cond                       <- !is.null(dim(inc))
      if(cond){
        cond              <- any(
          c(
            any(Matrix::rowSums(inc) < j), # Is there any individuals with less than j positions
            any(Matrix::colSums(inc) < k)  # Is there any affiliations with less than k members
          ))
      }
      cond
    } 
    
    # Levelling up
    while(test.mat(inc, j, k)){
      # Removing rows
      inc.t       <- inc[Matrix::rowSums(inc) >= j, ]
      if(is.null(dim(inc.t))) break
      inc         <- inc.t  # Keep only those members with j or more positions
      
      # Removing columns
      inc.t       <- inc[, Matrix::colSums(inc) >= k]  # Keep only those affiliations with more than k members
      if(is.null(dim(inc.t))) break
      inc         <- inc.t
      
      # Merging overlapping affiliations
      if(identical(check.for.nested, TRUE)){
        inc         <- eliter:::merge.perfect.overlap(inc, combine.labels = "&")
      }
      
    }
    inc
  }
  
  while(
    k <= suppressWarnings(min(Matrix::colSums(inc))) & ncol(inc) > minimum.memberships # While k is smaller than the lowest number of members and the number of affiliations is larger than the minimum number of memberships
  ){
    k           <- k + 1
    tmp         <- level.up(inc, k, j = minimum.memberships, check.for.nested = check.for.nested)
    inc         <- tmp
    #if(identical(duplicate.check, TRUE)) inc <- unique.matrix(inc, MARGIN = 2)
    l.inc[[k]]  <- inc
    
  }
  
  # It gives us an annoying warning because level.up doesn't use a proper test of whether the inc is valid for further operation
  
  
  
  # Clean up and class
  l.inc        <- c(incidence, l.inc)
  l.inc        <- compact(l.inc)
  class(l.inc) <- append("k.circle", class(l.inc))
  
  # Here we reconstruct the original names and we add the merge.clusters as an attribute.
  
  give.the.old.names.back <- function(i){
    n            <- str_split(colnames(i), pattern = "&") %>% map(str_trim) %>% map(as.numeric)
    colnames(i)  <- map_chr(n, ~paste(original.colnames[.x], collapse = " & ")) 
    names(n)     <- colnames(i)
    n            <- map(n, ~ enframe(original.colnames[.x])) %>% bind_rows(.id = "name") %>% transmute(AFFILIATION = value, overlap.cluster = name)
    attr(i, "col.circle.merge.cluster") <- n
    i
  }
  
  l.inc <- map(l.inc, give.the.old.names.back)
  l.inc
  
}




merge.perfect.overlap <- function(incidence, combine.labels = "&", cutoff = 1){
  # This functions throws an error if any of the affiliations are empty
  
  # Goal: Merge perfectly overlapping affiliations
  # Combine their labels and remove one of the columns.
  # They merge into either to largest affiliation or to the first in the order
  # It is run when the incidence has been pruned. So affiliations with just a single member will have disappeared
  # The merged affiliation will have all its values set to 0
  
  adj                    <- Matrix::crossprod(incidence)
  affil.members          <- Matrix::diag(adj)
  names(affil.members)   <- rownames(adj)
  adj.s                  <- adj / affil.members
  diag(adj.s)            <- 0
  merge.ind              <- Matrix::which(adj.s >= cutoff, arr.ind = TRUE)
  merge.ind              <- tibble(row = merge.ind[, 1], col = merge.ind[, 2]) # Row and column indices
  
  s                      <- merge.ind %>% apply(1, sort)  %>% t()         
  if(nrow(s) > 1) merge.ind <- merge.ind %>% filter(!duplicated(s)) # Check if two of equal size are there.
  
  merge.ind$row.name <- colnames(adj)[merge.ind$row]
  merge.ind$col.name <- colnames(adj)[merge.ind$col]
  
  
  
  s                      <- merge.ind$col %in% merge.ind$row  # col må ikke være i row - fordi vi må ikke slette noget der er blevet merget ind i.
  merge.ind              <- merge.ind %>% filter(!s)
  
  
  
  # If one of the nodes are there twice - it means that it will be merged two times. That is not a great problem as it does not influence the scores - except for merged clusters.
  # graph_from_edgelist(merge.ind[, 3:4] %>% as.matrix()) %>% plot() # Here we check if any node has an outdegree of 2 - that is not allowed!
  merge.ind             <- merge.ind[!duplicated(merge.ind$row), ]
  
  if(nrow(merge.ind) == 0) return(incidence)
  
  if(identical(combine.labels, FALSE) == FALSE){
    for(i in 1:nrow(merge.ind)){
      cr             <- merge.ind$row[i]
      cc             <- merge.ind$col[i]
      label          <- paste(colnames(incidence)[cc], combine.labels,  colnames(incidence)[cr])
      colnames(incidence)[cc] <- label
    }
  }
  
  add_together <- function(x,i){
    row <- merge.ind[i, ]$row
    col <-  merge.ind[i, ]$col
    
    change <- x[, row] > 0 & x[, col] == 0   
    x[change, col] <- x[change, row] 
    x
  }
  
  for(i in 1:nrow(merge.ind)){
    incidence <- add_together(incidence, i)
    
  }
  
  incidence[, merge.ind$row] <- 0 
  drop0(incidence)
}

#' Level membership from minimal membership decomposition
#'
#' @param l.inc a list of nested incidence matrices
#'
#' @return a tibble with rownames and level membership
#' @export
#'
#' @examples
level.membership <- function(l.inc, mode = c("ind", "affil", "two-mode"), levels = seq_along(l.inc)){
  
  
  
  # When we merge affilations the naming of the affil and two-mode will be more complicated
  mode           <- match.arg(mode)
  l.inc          <- l.inc[levels]
  l              <- length(l.inc)
  
  # Membership for individuals
  membership  <- map(l.inc, rownames) %>% imap(~ tibble(Name = .x, Level = .y)) %>%
    bind_rows() %>% arrange(Name)
  
  mem         <- membership %>% group_by(Name) %>% summarise(Level = max(Level))
  mem.ind     <- tibble(Name = rownames(l.inc[[1]])) %>% left_join(., mem, by = "Name")
  
  if(mode == "ind") return(mem.ind)
  
  # Membership for affiliations  
  
  inc        <- l.inc[[1]]
  f          <- function(x, inc) Matrix::colSums(inc[x,]) %>% as_tibble(rownames = "Name")
  level.mem  <- map(l.inc, rownames) %>% map(., f, inc = inc) %>% set_names(1:l) %>% bind_rows(.id = "level")
  level.mem  <- level.mem %>% mutate(level = as.numeric(level)) %>% filter(value >= level)
  level.mem  <- level.mem %>% group_by(Name) %>% summarise(Level = max(level))
  
  mem.affil   <- tibble(Name = colnames(inc)) %>% left_join(., level.mem, by = "Name")
  
  if(mode == "affil") return(mem.ind)
  
  # Membership for two-mode
  # We assume that Igraph or other will sort first by rows and then columns
  
  mem.two         <- bind_rows(mem.ind, mem.affil, .id = "type")
  
  if(mode == "two-mode")  return(mem.two)
}


level.summary <- function(l.inc){
  l.inc  <- compact(l.inc)
  l.g    <- map(l.inc, ~graph_from_incidence_matrix(incidence = .x))
  l.cl   <- map(l.g, clusters)
  
  map_dbl(l.cl, "no")  
  l.g %>% map(~bipartite.projection(.x)[[1]]) %>% map(degree) %>% map_dbl(mean)
  
}

print.k.circles <- function(x){
  x
}
antongrau/eliter documentation built on March 2, 2024, 8:05 p.m.