R/math.R

Defines functions switch.edges check.dpl.edges

Documented in check.dpl.edges switch.edges

################################################################################
# COLLECTION OF FUNCTIONS FOR EDGES ANALYSIS
################################################################################



#' Check (and count) duplicated edges
#' 
#' Looks for duplicated edges and reports the number of instances of them.
#' 
#' `check.dpl.edges` looks for duplicated edges reporting duplicates and
#' counting how many times each edge is duplicated.
#' 
#' For every group of duplicated edges only one will be accounted to report
#' number of instances (which will be recognized with a value higher than 2 in
#' the `reps` column), the other ones will be assigned an `NA` at the
#' `reps` value.
#' 
#' @param edges A matrix or data frame structured as a list of edges
#' @param undirected Declares if the net is directed or not (does de difference)
#' @param order.edgelist Whether to sort the resulting matrix or not
#' @return A three column `data.frame` with colnames \dQuote{source},
#' \dQuote{target} \dQuote{reps}.
#' @author George Vega Yon 
#' @keywords manip
#' @examples
#' 
#'   # An edgelist with duplicated dyads
#'   relations <- cbind(c(1,1,3,3,4,2,5,6), c(2,3,1,1,2,4,1,1))
#'   
#'   # Checking duplicated edges (undirected graph)
#'   check.dpl.edges(edges=relations, undirected=TRUE)
#' @family manipulation
#' @export
check.dpl.edges <- function(edges, undirected=FALSE, order.edgelist=TRUE) {
################################################################################
# Checks for duplicated edges, and switchs between source and target
# (optionally).
################################################################################  
  
  if (any(!is.finite(edges) | is.null(edges)))
    stop("No NA, NULL or NaN elements can be passed to this function.")
 
  edges0 <- cbind(edges, id = 1:nrow(edges))
  if (undirected)
    edges <- switch.edges(edges)
  
  # Unique dataset
  edges_unique <- unique(edges)
  
  # Counting
  edges <- cbind(edges, id = 1:nrow(edges))
  edges <- edges[order(edges[,1], edges[,2]), ]
  
  # Binary of counts
  counts <- c(
    TRUE,
    (edges[-1, 1] != edges[-nrow(edges), 1]) |
      (edges[-1, 2] != edges[-nrow(edges), 2])
  )
  
  counts <- cumsum(counts)
  
  # Now we say which to keep, we keep the thing if the count is either equal or smaller
  to_keep <- which( c(counts[-1] > counts[-length(counts)], TRUE))
  
  dupls <- cumsum(
    c(TRUE, (edges[-1, 1] == edges[-nrow(edges), 1]) &
        (edges[-1, 2] == edges[-nrow(edges), 2])
      ))
  
  edges <- cbind(edges, counts = dupls)[to_keep, , drop = FALSE]
  edges[, "counts"] <- edges[, "counts"] - c(1, edges[-nrow(edges), "counts"]) + 1
  edges <- edges[order(edges[, "id"]), , drop = FALSE]
  
  
  result <- data.frame(id = edges[,"id"], reps=edges[,"counts"], check.names=FALSE)
  
  # Merging 
  result <- merge(
    data.frame(source = edges0[,1], target = edges0[,2], id = edges0[,3]),
    subset(result), by = "id", all.x = TRUE
  )
  result <- result[order(result$id),]

  if (order.edgelist) 
    result <- result[order(result[,1], result[,2]),]
  
  return(result[, c("source", "target", "reps")])
}



#' Switches between source and target
#' 
#' Puts the lowest id node among every dyad as source (and the other as target)
#' 
#' `edge.list` transforms the input into a two-elements list containing a
#' dataframe of nodes (with columns \dQuote{id} and \dQuote{label}) and a
#' dataframe of edges. The last one is numeric (with columns \dQuote{source}
#' and \dQuote{target}) and based on auto-generated nodes' ids.
#' 
#' @param edges A matrix or data frame structured as a list of edges
#' @return A list containing two data frames.
#' @author George Vega Yon 
#' @keywords manip
#' @examples
#' 
#'   relations <- cbind(c(1,1,3,4,2,5,6), c(2,3,1,2,4,1,1))
#'   relations
#'   
#'   switch.edges(relations)
#' @export
#' @family manipulation
switch.edges <- function(edges) {
################################################################################
# Orders pairs of edges by putting the lowest id first as source
################################################################################
  if (any(is.na(edges) | is.null(edges) | is.nan(edges))) 
    stop("No NA, NULL or NaN elements can be passed to this function.")

  which_min <- which(max.col(edges) == 1)
  edges[which_min, ] <- edges[which_min, 2:1]
  
  return(
    data.frame(
      source=edges[,1], 
      target=edges[,2],
      check.names=FALSE)
  )
}

#try(dyn.unload("src/RCheckDplEdges.so"))
#dyn.load("src/RCheckDplEdges.so")

#relations <- cbind(c(1,1,3,4,2,5,6), c(2,3,1,2,4,1,1))


#check.dpl.edges(relations)
#ordAndCheckDplEdges(relations, undirected=FALSE)
#switch.edges(relations)

Try the rgexf package in your browser

Any scripts or data that you put into this service are public.

rgexf documentation built on Aug. 13, 2021, 1:06 a.m.