# IGraph R package
# Copyright (C) 2011-2012 Gabor Csardi <csardi.gabor@gmail.com>
# 334 Harvard street, Cambridge, MA 02139 USA
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA
#
###################################################################
makeNexusDatasetInfo <- function(entries) {
dsi <- lapply(entries, "[", 2)
nam <- sapply(entries, "[", 1)
attr <- nam=="attribute"
myattr <- unlist(dsi[attr])
dsi <- dsi[!attr]
nam <- nam[!attr]
names(dsi) <- nam
class(dsi) <- "nexusDatasetInfo"
if (length(myattr) != 0) {
myattr <- strsplit(myattr, "\n", fixed=TRUE)
attrdat <- lapply(myattr, function(x) strsplit(x[1], " ")[[1]])
myattr <- sapply(myattr, "[", 2)
dsi$attributes <- mapply(attrdat, myattr, SIMPLIFY=FALSE,
FUN=function(dat, desc) {
list(type=dat[1], datatype=dat[2], name=dat[3],
description=desc)
})
}
dsi$id <- as.numeric(dsi$id)
dsi$tags <- strsplit(dsi$tags, ";", fixed=TRUE)[[1]]
dsi
}
#' @method print nexusDatasetInfo
#' @rdname nexus
print.nexusDatasetInfo <- function(x, ...) {
ve <- strsplit(parseVE(x$`vertices/edges`), "/")[[1]]
nc <- c("U", "-", "-", "-")
if ("directed" %in% x$tags && "undirected" %in% x$tags) {
nc[1] <- "B"
} else if ("directed" %in% x$tags) {
nc[1] <- "D"
}
if (is.null(x$attributes)) {
nc[2] <- "?"
} else if (any(sapply(x$attributes,
function(X) X$name=="name" && X$type=="vertex"))) {
nc[2] <- "N"
}
if ("weighted" %in% x$tags) {
nc[3] <- "W"
}
if ("bipartite" %in% x$tags) {
nc[4] <- "B"
}
nc <- paste(nc, collapse="")
head <- paste(sep="", "NEXUS ", nc, " ", ve[1], " ", ve[2], " #",
x$id, " ", x$sid, " -- ", x$name)
if (nchar(head) > getOption("width")) {
head <- paste(sep="", substr(head, 1, getOption("width")-1), "+")
}
cat(head, sep="", "\n")
if (length(x$tags) != 0) {
tt <- strwrap(paste(sep="", "+ tags: ", paste(x$tags, collapse="; ")),
initial="", prefix=" ")
cat(tt, sep="\n")
}
if ("networks" %in% names(x)) {
nets <- strsplit(x$networks, " ")[[1]]
nn <- strwrap(paste(sep="", "+ nets: ", paste(nets, collapse="; ")),
initial="", prefix=" ")
cat(nn, sep="\n")
}
attr <- x[["attributes"]]
printed <- c("id", "sid", "vertices/edges", "name", "tags", "networks",
"attributes")
x <- x[ setdiff(names(x), printed) ]
if (length(attr)>0) {
dcode <- function(d) {
if (d=="numeric") return("n")
if (d=="string") return("c")
"x"
}
cat("+ attr: ")
astr <- sapply(attr, function(a) {
paste(sep="", a$name, " (", substr(a$type, 1, 1), "/",
dcode(a$datatype), ")")
})
cat(strwrap(paste(astr, collapse=", "), exdent=2), "\n")
}
for (i in names(x)) {
xx <- strsplit(x[[i]], "\n")[[1]]
ff <- strwrap(paste(sep="", "+ ", i, ": ", xx[1]), initial="",
prefix=" ")
xx <- unlist(sapply(xx[-1], strwrap, prefix=" "))
cat(ff, sep="\n")
if (length(xx)>0) {
cat(xx, sep="\n")
}
}
invisible(x)
}
#' @method summary nexusDatasetInfoList
#' @rdname nexus
summary.nexusDatasetInfoList <- function(object, ...) {
o <- as.numeric(attr(object, "offset"))
s <- as.numeric(attr(object, "size"))
t <- as.numeric(attr(object, "totalsize"))
n <- attr(object, "name")
cat(sep="", "NEXUS ", o+1, "-", o+s, "/", t, " -- ", n, "\n")
invisible(object)
}
parseVE <- function(ve) {
if (length(ve)==0) { return(character(0)) }
ve <- strsplit(unname(ve), " ")
ve <- lapply(ve, strsplit, "/")
v <- lapply(ve, function(x) sapply(x, "[", 1))
e <- lapply(ve, function(x) sapply(x, "[", 2))
int <- function(x) {
if (length(unique(x))==1) {
as.character(x[1])
} else {
paste(sep="", min(x), "-", max(x))
}
}
v <- sapply(v, int)
e <- sapply(e, int)
paste(v, sep="/", e)
}
#' @method print nexusDatasetInfoList
#' @rdname nexus
print.nexusDatasetInfoList <- function(x, ...) {
summary(x)
if (length(x)==0) { return(invisible(x)) }
ve <- parseVE(unname(sapply(x, "[[", "vertices/edges")))
nets <- sapply(x, function(y) length(strsplit(y$networks, " ")[[1]]))
sid <- sapply(x, "[[", "sid")
if (any(nets>1)) {
sid[nets > 1] <- paste(sep="", sid[nets>1], ".", nets[nets>1])
}
df <- data.frame(no=paste(sep="", "[", format(seq_along(x)), "] "),
sid=format(sid),
size=paste(sep="", " ", format(ve)),
id=paste(sep="", " #", format(sapply(x, "[[", "id")), " "),
name=sapply(x, "[[", "name"))
out <- do.call(paste, c(as.list(df), sep=""))
long <- nchar(out) > getOption("width")
out <- paste(sep="", substr(out, 1, getOption("width")-1),
ifelse(long, "+", ""))
cat(out, sep="\n")
invisible(x)
}
nexus.format.result <- function(l, name="") {
if (length(l)==0) {
res <- list()
class(res) <- "nexusDatasetInfoList"
return(res)
}
l <- lapply(l, function(x) c(sub("[ ]*:[^:]*$", "", x),
sub("^[^:]*:[ ]*", "", x)))
spos <- which(sapply(l, function(x) x[1]=="id"))
epos <- c((spos-1), length(l))
ehead <- epos[1]
epos <- epos[-1]
res <- mapply(spos, epos, SIMPLIFY=FALSE, FUN=function(s, e)
makeNexusDatasetInfo(l[s:e]))
class(res) <- "nexusDatasetInfoList"
for (h in 1:ehead) {
attr(res, l[[h]][1]) <- l[[h]][2]
attr(res, "name") <- name
}
res
}
#' Query and download from the Nexus network repository
#'
#' The Nexus network repository is an online collection of network data sets.
#' These functions can be used to query it and download data from it, directly
#' as an igraph graph.
#'
#' Nexus is an online repository of networks, with an API that allow
#' programatic queries against it, and programatic data download as well.
#'
#' The \code{nexus_list} and \code{nexus_info} functions query the online
#' database. They both return \code{nexusDatasetInfo} objects.
#' \code{nexus_info} returns more information than \code{nexus_list}.
#'
#' \code{nexus_search} searches Nexus, and returns a list of data sets, as
#' \code{nexusDatasetInfo} objects. See below for some search examples.
#'
#' \code{nexus_get} downloads a data set from Nexus, based on its numeric id,
#' or based on a Nexus search string. For search strings, only the first search
#' hit is downloaded, but see also the \code{offset} argument. (If there are
#' not data sets found, then the function returns an error.)
#'
#' The \code{nexusDatasetInfo} objects returned by \code{nexus_list} have the
#' following fields: \describe{
#' \item{id}{The numeric id of the dataset.}
#' \item{sid}{The character id of the dataset.}
#' \item{name}{Character scalar, the name of the dataset.}
#' \item{vertices/edges}{Character, the number of vertices and edges in
#' the graph(s). Vertices and edges are separated by a slash, and if
#' the data set consists of multiple networks, then they are separated
#' by spaces.}
#' \item{tags}{Character vector, the tags of the dataset. Directed graph
#' have the tags \sQuote{directed}. Undirected graphs are tagged
#' as \sQuote{undirected}. Other common tags are: \sQuote{weighted},
#' \sQuote{bipartite}, \sQuote{social network}, etc.}
#' \item{networks}{The ids and names of the networks in the data set. The
#' numeric and character id are separated by a slash, and multiple networks
#' are separated by spaces.}
#' }
#'
#' \code{nexusDatasetInfo} objects returned by \code{nexus_info} have the
#' following additional fields: \describe{
#' \item{date}{Character scalar, e.g. \sQuote{2011-01-09}, the date when
#' the dataset was added to the database.}
#' \item{formats}{Character vector, the data formats in which the data set is
#' available. The various formats are separated by semicolons.}
#' \item{licence}{Character scalar, the licence of the dataset.}
#' \item{licence url}{Character scalar, the URL of the licence of the
#' dataset. Pleaase make sure you consult this before using a dataset.}
#' \item{summary}{Character scalar, the short description of the dataset,
#' this is usually a single sentence.}
#' \item{description}{Character scalar, the full description of the
#' dataset.}
#' \item{citation}{Character scalar, the paper(s) describing the
#' dataset. Please cite these papers if you are using the dataset in your
#' research, the licence of most datasets requires this.}
#' \item{attributes}{A list of lists, each list entry is a graph, vertex
#' or edge attribute and has the following entries: \describe{
#' \item{type}{Type of the attribute, either \sQuote{graph},
#' \sQuote{vertex} or \sQuote{edge}.}
#' \item{datatype}{Data type of the attribute, currently it can be
#' \sQuote{numeric} and \sQuote{string}.}
#' \item{name}{Character scalar, the name of the attribute.}
#' \item{description}{Character scalar, the description of the
#' attribute.}
#' }
#' }
#' }
#'
#' The results of the Nexus queries are printed to the screen in a consise
#' format, similar to the format of igraph graphs. A data set list (typically
#' the result of \code{nexus_list} and \code{nexus_search}) looks like this:
#' \preformatted{NEXUS 1-5/18 -- data set list
#' [1] kaptail.4 39/109-223 #18 Kapferer tailor shop
#' [2] condmatcollab2003 31163/120029 #17 Condensed matter collaborations+
#' [3] condmatcollab 16726/47594 #16 Condensed matter collaborations+
#' [4] powergrid 4941/6594 #15 Western US power grid
#' [5] celegansneural 297/2359 #14 C. Elegans neural network }
#' Each line here represents a data set, and the following information is
#' given about them: the character id of the data set (e.g. \code{kaptail}
#' or \code{powergrid}), the number of vertices and number of edges in the
#' graph of the data sets. For data sets with multiple graphs, intervals
#' are given here. Then the numeric id of the data set and the reamining
#' space is filled with the name of the data set.
#'
#' Summary information about an individual Nexus data set is printed as
#' \preformatted{NEXUS B--- 39 109-223 #18 kaptail -- Kapferer tailor shop
#' + tags: directed; social network; undirected
#' + nets: 1/KAPFTI2; 2/KAPFTS2; 3/KAPFTI1; 4/KAPFTS1}
#' This is very similar to the header that is used for printing igraph
#' graphs, but there are some differences as well. The four characters
#' after the \code{NEXUS} word give the most important properties of the
#' graph(s): the first is \sQuote{\code{U}} for undirected and
#' \sQuote{\code{D}} for directed graphs, and \sQuote{\code{B}} if the data
#' set contains both directed and undirected graphs. The second is
#' \sQuote{\code{N}} named graphs. The third character is \sQuote{\code{W}}
#' for weighted graphs, the fourth is \sQuote{\code{B}} if the data set
#' contains bipartite graphs. Then the number of vertices and number of
#' edges are printed, for data sets with multiple graphs, the smallest and
#' the largest values are given. Then comes the numeric id, and the string
#' id of the data set. The end of the first line contains the name of the
#' data set. The second row lists the data set tags, and the third row the
#' networks that are included in the data set.
#'
#' Detailed data set information is printed similarly, but it contains more
#' fields.
#'
#' @rdname nexus
#' @aliases nexus nexus.list nexus.info nexus.get nexus.search nexus_list
#' nexus_info nexus_get nexus_search nexusDatasetInfo print.nexusDatasetInfo
#' print.nexusDatasetInfoList summary.nexusDatasetInfoList
#' @param tags A character vector, the tags that are searched. If not given (or
#' \code{NULL}), then all datasets are listed.
#' @param offset An offset to select part of the results. Results are listed
#' from \code{offset}+1.
#' @param limit The maximum number of results to return.
#' @param operator A character scalar. If \sQuote{or} (the default), then all
#' datasets that have at least one of the given tags, are returned. If it if
#' \sQuote{and}, then only datasets that have all the given tags, are returned.
#' @param order The ordering of the results, possible values are:
#' \sQuote{date}, \sQuote{name}, \sQuote{popularity}.
#' @param id The numeric or character id of the data set to query or download.
#' Instead of the data set ids, it is possible to supply a
#' \code{nexusDatasetInfo} or \code{nexusDatasetInfoList} object here directly
#' and then the query is done on the corresponding data set(s).
#' @param q Nexus search string. See examples below.
#' @param nexus.url The URL of the Nexus server. Don't change this from the
#' default, unless you set up your own Nexus server.
#' @param x,object The \code{nexusDatasetInfo} object to print.
#' @param \dots Currently ignored.
#' @return \code{nexus_list} and \code{nexus_search} return a list of
#' \code{nexusDatasetInfo} objects. The list also has these attributes:
#' \describe{ \item{size}{The number of data sets returned by the query.}
#' \item{totalsize}{The total number of data sets found for the query.}
#' \item{offset}{The offset parameter of the query.} \item{limit}{The limit
#' parameter of the query.} }
#'
#' \code{nexus_info} returns a single \code{nexusDatasetInfo} object.
#'
#' \code{nexus_get} returns an igraph graph object, or a list of graph objects,
#' if the data set consists of multiple networks.
#' @examples
#' \dontrun{nexus_list(tag="weighted")
#' nexus_list(limit=3, order="name")
#' nexus_list(limit=3, order="name")[[1]]
#' nexus_info(2)
#' g <- nexus_get(2)
#' summary(g)
#'
#' ## Data sets related to 'US':
#' nexus_search("US")
#'
#' ## Search for data sets that have 'network' in their name:
#' nexus_search("name:network")
#'
#' ## Any word can match
#' nexus_search("blog or US or karate")
#' }
#' @export
#' @importFrom utils URLencode
nexus_list <- function(tags=NULL, offset=0, limit=10,
operator=c("or", "and"),
order=c("date", "name", "popularity"),
nexus.url=igraph_opt("nexus.url")) {
operator=igraph.match.arg(operator)
order=igraph.match.arg(order)
if (is.null(tags)) {
u <- paste(sep="", nexus.url, "/api/dataset_info?format=text",
"&offset=", offset, "&limit=", limit, "&order=", order)
name <- "data set list"
} else {
tags <- paste(tags, collapse="|")
u <- paste(sep="", nexus.url, "/api/dataset_info?tag=", tags,
"&operator=", operator, "&format=text",
"&offset=", offset, "&limit=", limit, "&order=", order)
name <- paste("tags:", gsub("|", "; ", tags, fixed=TRUE))
}
f <- url(URLencode(u))
l <- readLines(f)
close(f)
nexus.format.result(l, name)
}
#' @export
#' @rdname nexus
#' @importFrom utils URLencode
nexus_info <- function(id, nexus.url=igraph_opt("nexus.url")) {
if (inherits(id, "nexusDatasetInfo")) {
id <- id$id
} else if (inherits(id, "nexusDatasetInfoList")) {
rid <- sapply(id, "[[", "id")
res <- lapply(rid, nexus_info, nexus.url=nexus.url)
class(res) <- class(id)
attributes(res) <- attributes(id)
return(res)
}
u <- paste(sep="", nexus.url, "/api/dataset_info?format=text&id=", id)
f <- url(URLencode(u))
l <- readLines(f)
close(f)
l2 <- character()
for (i in seq_along(l)) {
if (!grepl("^ ", l[i])) {
l2 <- c(l2, l[i])
} else {
l2[length(l2)] <- paste(sep="\n", l2[length(l2)],
sub(" ", "", l[i], fixed=TRUE))
}
}
l2 <- lapply(l2, function(x)
c(sub("[ ]*:.*$", "", x), sub("^[^:]*:[ ]*", "", x)))
res <- makeNexusDatasetInfo(l2)
if (! "attributes" %in% names(res)) { res$attributes <- list() }
return(res)
}
#' @export
#' @rdname nexus
#' @importFrom utils URLencode
nexus_get <- function(id, offset=0,
order=c("date", "name", "popularity"),
nexus.url=igraph_opt("nexus.url")) {
order=igraph.match.arg(order)
if (inherits(id, "nexusDatasetInfo")) {
id <- id$id
} else if (inherits(id, "nexusDatasetInfoList")) {
id <- sapply(id, "[[", "id")
return(lapply(id, nexus_get, nexus.url=nexus.url))
}
u <- paste(sep="", nexus.url, "/api/dataset?id=", id, "&format=R-igraph")
env <- new.env()
rdata <- url(URLencode(u))
load(rdata, envir=env)
close(rdata)
res <- get(ls(env)[1], env)
upgrade_if_igraph <- function(x) if (is_igraph(x)) upgrade_graph(x) else x
if (is_igraph(res)) {
upgrade_if_igraph(res)
} else if (is.list(res)) {
res2 <- lapply(res, upgrade_if_igraph)
attributes(res2) <- attributes(res)
res2
}
}
#' @export
#' @rdname nexus
#' @importFrom utils URLencode
nexus_search <- function(q, offset=0, limit=10,
order=c("date", "name", "popularity"),
nexus.url=igraph_opt("nexus.url")) {
order=igraph.match.arg(order)
u <- paste(sep="", nexus.url, "/api/search?q=", q,
"&format=text","&offset=", offset, "&limit=", limit,
"&order=", order)
f <- url(URLencode(u))
l <- readLines(f)
close(f)
if (length(l)==0) {
res <- list()
class(res) <- "nexusDatasetInfoList"
return(res)
}
nexus.format.result(l, name=paste("q:", q))
}
#' @param i Index.
#' @method [ nexusDatasetInfoList
#' @rdname nexus
`[.nexusDatasetInfoList` <- function(x, i) {
res <- unclass(x)[i]
class(res) <- class(x)
attributes(res) <- attributes(x)
res
}
'
DATA SET LIST:
--------------
NEXUS 1-10/18 -- data set list
[ 1] kaptail.4 #18 39/109-223 Kapferer tailor shop
[ 2] condmatcollab2003 #17 31163/120029 Condensed matter collaborations, 2003
[ 3] condmatcollab #16 16726/47594 Condensed matter collaborations, 1999
[ 4] powergrid #15 4941/6594 Western US power grid
[ 5] celegansneural #14 297/2359 C. Elegans neural network
[ 6] polblogs #13 1490/19090 US political blog network
[ 7] dolphins #12 62/159 Dolphin social network
[ 8] football #11 115/616 Network of American college ...
[ 9] adjnoun #10 112/425 Word adjacencies from David ...
[10] huckleberry # 9 74/301 Coappearance network from ...
TAG SEARCH:
-----------
NEXUS 1-4/4 -- tags: directed
[1] kaptail.4 #18 39/109-223 Kapferer tailor shop
[2] polblogs #13 1490/19090 US political blog network
[3] macaque # 4 45/463 Macaque visuotactile brain areas
[4] UKfaculty # 2 81/817 UK faculty social network
FULL TEXT SEARCH:
-----------------
NEXUS 1-2/2 -- q: US
[1] powergrid #15 4941/6594 Western US power grid
[2] polblogs #13 1490/19090 US political blog network
DATA SET SUMMARY:
-----------------
NEXUS B--- 39 109-223 -- #18 Kapferer tailor shop
+ tags: directed; social network; undirected
+ networks: 1/KAPFTI2; 2/KAPFTS2; 3/KAPFTI1; 4/KAPFTS1
NEXUS U--- 4941 6594 -- #15 Western US power grid
+ tags: technology
DATA SET INFO:
--------------
NEXUS B--- 39 109-223 -- #18 Kapferer tailor shop
+ tags: directed; social network; undirected
+ attr: name (v/c) [Actor names]
+ networks: 1/KAPFTI2; 2/KAPFTS2; 3/KAPFTI1; 4/KAPFTS1
+ nets: #1 KAPFTI2; #2 KAPFTS2; #3 KAPFTI1; #4 KAPFTS1
+ date: 2011-01-23
+ licence: Creative Commons by-sa 3.0
+ licence url: http://creativecommons.org/licenses/by-sa/3.0/
+ summary: Interactions in a tailor shop in Zambia (then
Northern Rhodesia) over a period of ten months.
+ details: Bruce Kapferer (1972) observed interactions in a tailor
shop in Zambia (then Northern Rhodesia) over a period of ten months.
His focus was the changing patterns of alliance among workers during
extended negotiations for higher wages. . The matrices represent two
different types of interaction, recorded at two different times
(seven months apart) over a period of one month. TI1 and TI2 record
the "instrumental" (work- and assistance-related) interactions at the
two times; TS1 and TS2 the "sociational" (friendship, socioemotional)
interactions. . The data are particularly interesting since an
abortive strike occurred after the first set of observations, and a
successful strike took place after the second.
+ formats: Pajek; R-igraph
+ citation: Kapferer B. (1972). Strategy and transaction in an African
factory. Manchester: Manchester University Press.
'
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.