R/computeGraph.R

#' Define an object corresponding to a graph in Aster database.
#' 
#' In Aster Database, to process graphs using SQL-GR, it is recommended to represent
#' a graph using two tables:
#' \enumerate{
#'   \item Vertices table
#'   \item edges table
#' }
#' Vertices table must contain a unique key so that each row represents a vertex.
#' Edges table must contain a pair of source and target keys (from vertices table)
#' so that each row represents an edge.
#' Both vertices and edges tables may contain additional columns representing 
#' optional attributes. For example if edges table has column 'weight' it can
#' correspond a graph with edge weights.
#' 
#' Graph object can be defined using single table with edges. In that case vertices
#' are derived from the edges and arguments \code{vertexWhere} and \code{vertexAttrnames}
#' will be disallowed or ignored.
#' 
#' In place of both or any of the tables could be views or even queries. The only requirement
#' is that they correspond to a graph in Aster database by containing non-null set of vertices,
#' 0 or more edges and do not break referential integrity (see \code{\link{validateGraph}}).
#' No speical syntactic designation is necessary when using queries, thus \code{"public.edges"}
#' and \code{"SELECT * FROM public.edges"} are valid and equivalent values for the argument
#' \code{edges}.
#' 
#' @param vertices A table, view, or query of a collection of vertices in the graph. The 
#'   collection must contain at least one column representing the vertex key.
#' @param edges A table, view, or query of a collection of edges of the graph. 
#'   The collection must contain at least two columns, one list that represents 
#'   the source vertex key and another list that represents the target vertex key.
#' @param directed logical: should edges be interpreted as directed?
#' @param key name of the column with vertex unique id (in the table \code{vertices}).
#' @param source name of the column with the from vertex (in the table \code{edges}).
#' @param target name of the column with the to vertex (in the tbale \code{edges}).
#' @param vertexAttrnames optionally, a list of columns containing vertex 
#'   attribute names.
#' @param edgeAttrnames optionally, a list of columns containing edge 
#'   attribute names.
#' @param vertexWhere optionally, a \code{SQL WHERE} clause to subset vertex table (use SQL 
#'   as if in \code{WHERE} clause but omit the keyword \code{WHERE}).
#' @param edgeWhere optionally, a \code{SQL WHERE} clause to subset edge table (use SQL 
#'   as if in \code{WHERE} clause but omit the keyword \code{WHERE}).
#' 
#' @export
#' @examples 
#' # undirected graph
#' policeGraphUn = toaGraph("dallaspolice_officer_vertices", "dallaspolice_officer_edges_un", 
#'      directed = FALSE, key = "officer", source = "officer1", target = "officer2", 
#'      vertexAttrnames = c("offense_count"), edgeAttrnames = c("weight"))
#'                          
#' # directed graph with the vertex filter
#' policeGraphDi = toaGraph("dallaspolice_officer_vertices", "dallaspolice_officer_edges_di", 
#'      directed = TRUE, key = "officer", source = "officer1", target = "officer2", 
#'      vertexAttrnames = c("offense_count"), edgeAttrnames = c("weight"),
#'      vertexWhere = "officer ~ '[A-Z].*'")
#'      
toaGraph <- function(vertices, edges, directed=FALSE, 
                     key='id', source='source', target='target', 
                     vertexAttrnames=NULL, edgeAttrnames=NULL, 
                     vertexWhere = NULL, edgeWhere = NULL) {
  
  if(missing(edges) || is.null(edges))
    stop("An edge table must be defined.")
  
  if(missing(vertices) || is.null(vertices)) {
    if (!is.null(vertexWhere))
      stop("Graph with derived vertices can not have where clause.")
    vertices = NULL
  }
  
  z <- structure(list(vertices = vertices,
                      edges = edges,
                      directed = directed,
                      key = key,
                      source = source,
                      target = target,
                      vertexAttrnames = vertexAttrnames,
                      edgeAttrnames = edgeAttrnames,
                      vertexWhere = vertexWhere,
                      edgeWhere = edgeWhere
  ),
  class = "toagraph")
 
  z 
}


#' Materialize Aster graph as network object in R.
#'
#' Results in \code{\link{network}} object representation of the graph 
#' stored in Aster tables. Usually in Aster database a graph is represented
#' using a pair of vertice and edge tables (see \code{\link{toaGraph}}).
#' 
#' Use caution when computing network objects stored in Aster with this function 
#' as data may include considerable amount of vertices and edges which are too large to
#' load into a memory.
#' 
#' \code{copmuteGraph} will use graph defined by \code{graph} if neither of the arguments
#' \code{vertexWhere}, or \code{edgeWhere} provided. When one or more of them 
#' defined then they override corresponding attributes in the \code{graph}. On top of it,
#' argument \code{v} specifies concrete vertices to include in the resulting graph.
#' In case when only edges table specified argument \code{vertexWhere} will trigger error
#' while argument \code{v} is still valid.
#'
#' @param channel connection object as returned by \code{\link{odbcConnect}}
#' @param graph an object of class \code{'toagraph'} referencing graph 
#'   tables in Aster database.
#' @param v a SQL \code{SELECT} that returns key values or a list of key values (corresponding 
#'   to the \code{vertex.names} attribute) of the vertices to include in the graph. 
#'   When not \code{NULL} this guarentees that no other vertices or edges between other vertices 
#'   are included in the resulting network.
#' @param vertexWhere optionally, a \code{SQL WHERE} clause to subset vertex table. When not \code{NULL}
#'   it overrides \code{vertexWhere} condition from the \code{graph}.
#' @param edgeWhere optionally, a \code{SQL WHERE} clause to subset edge table. When not \code{NULL}
#'   it overrides \code{edgeWhere} condition from the \code{graph}.
#' @param allTables pre-built information about existing tables.
#' @param test logical: if TRUE show what would be done, only (similar to parameter \code{test} in \pkg{RODBC} 
#'   functions: \link{sqlQuery} and \link{sqlSave}).
#'   
#' @return \code{\link{network}} class object materializing an Aster graph represented by \code{\link{toaGraph}}.
#'   
#' @export
#' @examples 
#' if(interactive()) {
#' library(GGally)
#'
#' policeGraphUn = toaGraph("dallaspolice_officer_vertices", "dallaspolice_officer_edges_un", 
#'                          directed = FALSE, key = "officer", 
#'                          source = "officer1", target = "officer2", 
#'                          vertexAttrnames = c("offense_count"), edgeAttrnames = c("weight"))
#'                
#' # initialize connection to Lahman baseball database in Aster 
#' conn = odbcDriverConnect(connection="driver={Aster ODBC Driver};
#'                          server=<dbhost>;port=2406;database=<dbname>;uid=<user>;pwd=<pw>")
#'                              
#' # create network object and visualize with ggplot2
#' net1 = computeGraph(conn, policeGraphUn)
#' ggnet2(net1, node.label="vertex.names", node.size="offense_count", 
#'        legend.position="none")
#'        
#' # network object with filters and color attribute
#' net2 = computeGraph(conn, policeGraphUn, vertexWhere = "officer ~ '[A-Z].*'", 
#'                     edgeWhere = "weight > 0.36")
#' net2 %v% "color" = substr(get.vertex.attribute(net2, "vertex.names"), 1, 1)
#' ggnet2(net2, node.label="vertex.names", node.size="offense_count", 
#'        size.cut=TRUE, node.color="color", legend.position="none", 
#'        palette = "Set2")
#' 
#' # networ object for subgraph of top degree vertices
#' topDegree = computeGraphMetric(conn, policeGraphUn, type="degree", top=50)
#' net3 = computeGraph(conn, policeGraphUn, v=as.list(as.character(topDegree$key)))
#' net3 %v% "degree" = topDegree[match(get.vertex.attribute(net3, "vertex.names"), 
#'                                             topDegree$key), "degree"]
#' ggnet2(net3, node.label="vertex.names", node.size="degree", 
#'        legend.position="none")
#'                          
#' }
computeGraph <- function(channel, graph, v=NULL,
                         vertexWhere=graph$vertexWhere,
                         edgeWhere=graph$edgeWhere,
                         allTables=NULL, test=FALSE) {
  
  if (missing(graph) || !is.object(graph) || !inherits(graph, "toagraph"))
    stop("Graph object must be specified.")
  
  if (test && is.null(allTables))
    stop("Must provide allTables when test==TRUE.")
  
  isValidConnection(channel, test)
  
  isTableFlag = isTable(channel, list(vertices=graph$vertices, edges=graph$edges), allTables=allTables)

  return(computeGraphInternal(channel, graph, v, vertexWhere, edgeWhere,
                              isTableFlag, test, closeOnError=FALSE))
    
} 

computeGraphInternal <- function(channel, graph, v=NULL,
                         vertexWhere=graph$vertexWhere, 
                         edgeWhere=graph$edgeWhere, 
                         isTableFlag, test=FALSE, closeOnError=FALSE) {
  
  if(!all(isTableFlag | is.na(isTableFlag)))
    stop("Both vertices and edges must exist as tables or views.")
  
  if(is.null(graph$vertices)) {
    if(!is.null(vertexWhere))
      stop("Derived vertices can not have where clause.")
    
    graph$vertices = makeDerivedVerticesTable(graph, isTableFlag, edgeWhere)
  }
    
  
  # Handle vertex list if defined
  vertexWhere = addVerticesInVertexWhere(graph, v, vertexWhere)
  
  if(test) {
    emptyLine = "--"
    sqlText = ""
  }
  
  # Edges select
  sqlComment = "-- Edges Select"
  edgesSql = makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere)

  if(test)
    sqlText = paste(sqlComment, edgesSql, sep='\n')
  else
    e = toaSqlQuery(channel, edgesSql, stringsAsFactors=FALSE, closeOnError=closeOnError)
  
  # Vertices select
  sqlComment = "-- Vertices Select"
  verticesSql = makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE, TRUE)
      
  if(test)
    sqlText = paste(sqlText, paste(emptyLine, sqlComment, verticesSql, sep='\n'), sep=';\n')
  else
    vx = toaSqlQuery(channel, verticesSql, stringsAsFactors=FALSE, closeOnError=closeOnError)

  # result
  if (test) {
    return(sqlText)
  }else {
    net = makeNetworkResult(graph, vx, e)

    return(net)
  }
  
}


#' Find the vertices not farther than a given limit from another fixed vertex, 
#' and create egographs (subgraphs) with the given order parameter.
#'
#' @param channel connection object as returned by \code{\link{odbcConnect}}
#' @param graph an object of class \code{'toagraph'} referencing graph 
#'   tables in Aster database.
#' @param ego list of vertices for which the calculation of corresponding ego graphs is performed.
#' @param order	integer giving the order of the ego graph neighborhood.
#' @param mode character constant, it specifies how to use the direction of the edges if a directed graph is analyzed. 
#'   For \code{'out'} only the outgoing edges are followed, so all vertices reachable from the source vertex in at 
#'   most order steps are counted. For \code{'in'} all vertices from which the source vertex is reachable in at most 
#'   \code{order} steps are counted. \code{'all'} ignores the direction of the edges. This argument is ignored 
#'   for undirected graphs.
#' @param createDistanceAttr logical: indicates if vertices should receive attribute with the distance
#'   to ego graph centeral vertex.
#' @param distanceAttrname name of the vertex distance attribute.
#' @param vertexWhere SQL WHERE clause limiting data from the vertex table. This value when not null
#'   overrides corresponding value \code{vertexWhere} from \code{graph} (use SQL as if in WHERE clause but 
#'   omit keyword WHERE).
#' @param edgeWhere SQL WHERE clause limiting data from the edge table. This value when not null
#'   overrides corresponding value \code{edgeWhere} from \code{graph} (use SQL as if in WHERE clause but 
#'   omit keyword WHERE).
#' @param allTables pre-built information about existing tables.
#' @param test logical: if TRUE show what would be done, only (similar to parameter \code{test} in \pkg{RODBC} 
#'   functions: \link{sqlQuery} and \link{sqlSave}).
#' @export
#' @examples 
#' if(interactive()) {
#' library(GGally)
#' 
#' policeGraphDi = toaGraph(vertices = "dallaspolice_officer_vertices", 
#'                          edges = "dallaspolice_officer_edges_di", 
#'                          directed = TRUE,
#'                          key = "officer", source = "officer1", target = "officer2", 
#'                          vertexAttrnames = c("offense_count"),
#'                          edgeAttrnames = c("weight"))
#'                
#' # initialize connection to Lahman baseball database in Aster 
#' conn = odbcDriverConnect(connection="driver={Aster ODBC Driver};
#'                          server=<dbhost>;port=2406;database=<dbname>;uid=<user>;pwd=<pw>")
#'                          
#' setVertexColor <- function(graph, vertex, color="red", default="grey") {
#'   graph %v% "color" = 
#'   ifelse(get.vertex.attribute(graph, "vertex.names") == as.character(vertex),
#'          color, default)
#'   
#'   return(graph)
#' }
#' 
#' topPagerankPolice = computeGraphMetric(conn, policeGraphDi, type='pagerank', top=3)
#' egoCenters = as.list(as.character(topPagerankPolice$key))
#' 
#' egoGraphsTopPagerank = computeEgoGraph(conn, policeGraphDi, order = 1, ego = egoCenters)
#' 
#' egoGraph = setVertexColor(egoGraphsTopPagerank[[1]], egoCenters[[1]])
#' ggnet2(egoGraph, node.label="vertex.names",  node.size="offense_count",
#'        legend.position="none", color="color")
#'
#' egoGraph = setVertexColor(egoGraphsTopPagerank[[2]], egoCenters[[2]])
#' ggnet2(egoGraph, node.label="vertex.names",  node.size="offense_count",
#'        legend.position="none", color="color")
#'
#' egoGraph = setVertexColor(egoGraphsTopPagerank[[3]], egoCenters[[3]])
#' ggnet2(egoGraph, node.label="vertex.names",  node.size="offense_count",
#'        legend.position="none", color="color")

#' }
computeEgoGraph <- function(channel, graph, ego, order=1, mode="all",
                            createDistanceAttr=TRUE, distanceAttrname="ego.distance",
                            vertexWhere=graph$vertexWhere, edgeWhere=graph$edgeWhere,
                            allTables=NULL, test=FALSE) {
  
  # match argument values
  mode = match.arg(mode, c('all','both','in','out'))
  
  if (missing(graph) || !is.object(graph) || !inherits(graph, "toagraph"))
    stop("Graph object must be specified.")
  
  if (test && is.null(allTables))
    stop("Must provide allTables when test==TRUE.")
  
  if (is.null(ego) || length(ego) == 0)
    stop("Must have at least one ego vertex defined.")
  
  if (!graph$directed && mode %in% c('in','out'))
    stop("Must be a directed graph when mode is 'in' or 'out'.")
  
  isValidConnection(channel, test)
  
  isTableFlag = isTable(channel, list(vertices=graph$vertices, edges=graph$edges), allTables=allTables)
  
  if(!all(isTableFlag | is.na(isTableFlag)))
    stop("Both vertices and edges must exist as tables or views.")
  
  if(test) {
    emptyLine = "--"
    sqlText = ""
  }
  
  egoVertexWhere = addVerticesInVertexWhere(graph, ego, vertexWhere)
  selfTargetOrSource = ifelse(mode == 'in', 'target', 'source')
  otherTargetOrSource = ifelse(mode == 'in', 'source', 'target')
  sqlComment = "-- Create temp table of the shortest paths from ego vertices"
  sqlBeginTran = "BEGIN"
  shortestPathSql = paste0(
      "CREATE TEMP FACT TABLE egographtemp 
       DISTRIBUTE BY HASH(source) 
       AS
       SELECT source, target, distance FROM AllPairsShortestPath(
         ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key , "
         ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
         ON (", makeVerticesSql(graph, isTableFlag, egoVertexWhere, FALSE), ") AS ", selfTargetOrSource,"s PARTITION BY ", graph$key, "
         TARGETKEY('",graph$target,"')
         DIRECTED('", ifelse(graph$directed, 'true', 'false'), "')
         MAXDISTANCE('",order,"')
       )"
  )
  
  if(test) {
    sqlText = paste(sqlBeginTran, paste(emptyLine, sqlComment, shortestPathSql, sep='\n'), sep=';\n')
  }else {
    odbcSetAutoCommit(channel, autoCommit = FALSE)
    toaSqlQuery(channel, sqlBeginTran)
    toaSqlQuery(channel, shortestPathSql)
  }
  
  if (createDistanceAttr) {
    distanceColumnSql = paste0(", eg.distance ", "__distance_attr__")
    distance0ColumnSql = paste0(", 0 ", "__distance_attr__")
  }else {
    distanceColumnSql = ""
    distance0ColumnSql = ""
  }

  egoGraphs = list()
  for(i in 1:length(ego)) {
    
    key = ego[[i]]
    ego.graph = graph
    
    sqlComment = "-- Edges Select"
    edgesSql = paste0(
      "SELECT e.*
         FROM (", makeEdgesSql(ego.graph, isTableFlag, vertexWhere, edgeWhere), ") e 
        WHERE ", graph$source, " IN (SELECT ", otherTargetOrSource, " FROM egographtemp WHERE ", selfTargetOrSource, " = '",key,"')
          AND ", graph$target, " IN (SELECT ", otherTargetOrSource, " FROM egographtemp WHERE ", selfTargetOrSource, " = '",key,"')
       UNION
       SELECT e.*
         FROM (", makeEdgesSql(ego.graph, isTableFlag, vertexWhere, edgeWhere), ") e
        WHERE ", makeEgoSelfEdgeWhereSql(graph, key, mode)
    )
    
    if(test)
      sqlText = paste(sqlText, paste(emptyLine, sqlComment, edgesSql, sep='\n'), sep=';\n')
    else
      e = toaSqlQuery(channel, edgesSql, stringsAsFactors=FALSE)
    
    if ((!is.null(ego.graph$vertexAttrnames) && length(ego.graph$vertexAttrnames) > 0) ||
        createDistanceAttr) {
      sqlComment = "-- Vertices Select"
      verticesSql = paste0(
        "SELECT v.*", distanceColumnSql, " 
           FROM egographtemp eg JOIN
                (", makeVerticesSql(ego.graph, isTableFlag, vertexWhere, FALSE), ") v ON (eg.",otherTargetOrSource," = v.",graph$key,")
          WHERE eg.",selfTargetOrSource," = '",key,"'
         UNION
         SELECT v.*", distance0ColumnSql, " 
           FROM egographtemp eg JOIN
                (", makeVerticesSql(ego.graph, isTableFlag, vertexWhere, FALSE), ") v ON (eg.",selfTargetOrSource," = v.",graph$key,")
          WHERE eg.",selfTargetOrSource," = '",key,"'"
      )
      
      if(test)
        sqlText = paste(sqlText, paste(emptyLine, sqlComment, verticesSql, sep='\n'), sep=';\n')
      else {
        v = toaSqlQuery(channel, verticesSql, stringsAsFactors=FALSE)
        if (createDistanceAttr) {
          ego.graph$vertexAttrnames = c(ego.graph$vertexAttrnames, distanceAttrname)
          names(v)[[length(v)]] = distanceAttrname
        }
      }
    }else
      v = NULL
    
    if(!test)
      egoGraphs[[i]] = makeNetworkResult(ego.graph, v, e)

  }
  
  sqlEndTran = "END"
  if(test) {
    sqlText = paste(sqlText, paste(emptyLine, sqlEndTran, sep='\n'), sep=';\n')
    
    return(sqlText)
  }else {
    toaSqlQuery(channel, sqlEndTran)
    odbcSetAutoCommit(channel, autoCommit = TRUE)
  
    return(egoGraphs)
  }
  
}


#' Compute various statistic distributions on graph edges and vertices.
#' 
#' @param channel connection object as returned by \code{\link{odbcConnect}}
#' @param graph an object of class \code{'toagraph'} referencing graph 
#'   tables in Aster database.
#' @param type choose between graph measures to compute histogram distribution for: 
#'   \code{'degree', 'clustering', 'shortestpath', 'pagerank', 'betweenness', 'eigenvector'}.
#' @param weight logical or character: if logical then \code{TRUE} indicates using \code{'weight'} edge
#'   attribute, otherwise no weight used. If character then use as a name for the edge weight attribute. 
#'   The edge weight may apply with types \code{'clustering', 'shortestpath'} and centrality measures.
#' @param binMethod one of several methods to determine number and size of bins: \code{'manual'} indicates to use 
#'   paramters below, both \code{'Sturges'} or \code{'Scott'} will use corresponding methods of computing number
#'   of bins and width (see \url{http://en.wikipedia.org/wiki/Histogram#Number_of_bins_and_width}).
#' @param binsize size (width) of discrete intervals defining histogram (all bins are equal).
#' @param startvalue lower end (bound) of values to include in histogram.
#' @param endvalue upper end (bound) of values to include in histogram.
#' @param numbins number of bins to use in histogram.
#' @param vertexWhere SQL WHERE clause limiting data from the vertex table. This value when not null
#'   overrides corresponding value \code{vertexWhere} from \code{graph} (use SQL as if in WHERE clause but 
#'   omit keyword WHERE).
#' @param edgeWhere SQL WHERE clause limiting data from the edge table. This value when not null
#'   overrides corresponding value \code{edgeWhere} from \code{graph} (use SQL as if in WHERE clause but 
#'   omit keyword WHERE).
#' @param allTables pre-built information about existing tables.
#' @param test logical: if TRUE show what would be done, only (similar to parameter \code{test} in \pkg{RODBC} 
#'   functions: \link{sqlQuery} and \link{sqlSave}).
#' @param ... other arguments passed on to Aster graph functions except for \code{EDGEWEIGHT} argument -
#'   use argument \code{weight} instead. Aster function areguments are not casesensetive 
#' 
#' @export
#' @examples 
#' if(interactive()) {
#' 
#' policeGraphUn = toaGraph("dallaspolice_officer_vertices", "dallaspolice_officer_edges_un", 
#'                          directed = FALSE, key = "officer", 
#'                          source = "officer1", target = "officer2", 
#'                          vertexAttrnames = c("offense_count"), edgeAttrnames = c("weight"))
#'                
#' # initialize connection to Lahman baseball database in Aster 
#' conn = odbcDriverConnect(connection="driver={Aster ODBC Driver};
#'                          server=<dbhost>;port=2406;database=<dbname>;uid=<user>;pwd=<pw>")
#'                          
#' hdegreePolice = computeGraphHistogram(conn, policeGraphUn, type='degree', numbins=36) 
#' createHistogram(hdegreePolice, 
#'                 title = "Dallas Police Graph Degree Distribution", 
#'                 xlab='Degree', ylab='Count')
#'
#' hshortestpathPolice = computeGraphHistogram(conn, policeGraphUn, type='shortestpath',
#'                               numbins = 10)
#' createHistogram(hshortestpathPolice, 
#'                 title = "Dallas Police Shortest Path Distribution", 
#'                 xlab = "Distance", ylab = "Count")
#' }
computeGraphHistogram <- function(channel, graph, type='degree', weight=FALSE,
                                  binMethod='manual', numbins = NULL, binsize = NULL, 
                                  startvalue = NULL, endvalue = NULL,
                                  vertexWhere=graph$vertexWhere, edgeWhere=graph$edgeWhere,
                                  allTables=NULL, test=FALSE, ...) {
  
  # match argument values
  type = match.arg(type, c('degree', 'clustering', 'shortestpath', 'pagerank', 'betweenness',
                           'eigenvector', 'closeness', 'avg-closeness', 'k-degree', 'alt-closeness'))
  binMethod = match.arg(binMethod, c('manual', 'Sturges', 'Scott'))
  
  if (missing(graph) || !is.object(graph) || !inherits(graph, "toagraph"))
    stop("Graph object must be specified.")
  
  if (test && is.null(allTables))
    stop("Must provide allTables when test==TRUE.")
  
  isValidConnection(channel, test)
  
  isTableFlag = isTable(channel, list(vertices=graph$vertices, edges=graph$edges), allTables=allTables)
  
  if(!all(isTableFlag | is.na(isTableFlag)))
    stop("Both vertices and edges must exist as tables or views.")
  
  # weight attribute if present
  weight = parseWeightArgument(graph, weight)
  
  argsSql = makeGraphFunctionArgumentsSql(...)
  
  # Graph sql, names
  if (type=='degree') {
    histogramSelectTempTableSql = makeDegreeSelectSql(graph, isTableFlag, NULL, vertexWhere, edgeWhere)
    histogramValue = 'degree'
    histogramGroup = 'degree_type'
  }else if (type=='clustering') {
    histogramSelectTempTableSql = makeClusteringSelectSql(graph, isTableFlag, weight, NULL, vertexWhere, edgeWhere, argsSql)
    histogramValue = 'cc'
    histogramGroup = 'cc_type'
  }else if (type=='closeness') {
    histogramSelectTempTableSql = makeClosenessSelectSql(graph, isTableFlag, weight, NULL, vertexWhere, edgeWhere, argsSql)
    histogramValue = 'closeness'
    histogramGroup = NULL
  }else if (type=='avg-closeness') {
    histogramSelectTempTableSql = makeAvgClosenessSelectSql(graph, isTableFlag, weight, NULL, vertexWhere, edgeWhere, argsSql)
    histogramValue = 'closeness'
    histogramGroup = NULL
  }else if (type=='alt-closeness') {
    histogramSelectTempTableSql = makeAltClosenessSelectSql(graph, isTableFlag, weight, NULL, vertexWhere, edgeWhere, argsSql)
    histogramValue = 'closeness'
    histogramGroup = NULL
  }else if (type=='k-degree') {
    histogramSelectTempTableSql = makeKdegreeSelectSql(graph, isTableFlag, weight, NULL, vertexWhere, edgeWhere, argsSql)
    histogramValue = 'kdegree'
    histogramGroup = NULL
  }else if (type=='shortestpath') {
    histogramSelectTempTableSql = makeShortestPathSelectSql(graph, isTableFlag, weight, vertexWhere, edgeWhere, argsSql)
    histogramValue = 'distance'
    histogramGroup = NULL
  }else if (type=='pagerank') {
    histogramSelectTempTableSql = makePageRankSelectSql(graph, isTableFlag, weight, NULL, vertexWhere, edgeWhere, argsSql)
    histogramValue = 'pagerank'
    histogramGroup = NULL
  }else if (type == 'betweenness') {
    histogramSelectTempTableSql = makeBetweennessSelectSql(graph, isTableFlag, weight, NULL, vertexWhere, edgeWhere, argsSql)
    histogramValue = 'betweenness'
    histogramGroup = NULL
  }else if (type == 'eigenvector') {
    histogramSelectTempTableSql = makeEigenVectorSelectSql(graph, isTableFlag, weight, NULL, vertexWhere, edgeWhere, argsSql)
    histogramValue = 'centrality'
    histogramGroup = NULL
  }

  if (binMethod=='manual') {
    if (is.null(startvalue) && is.null(endvalue) && is.null(numbins))
      stop("Number of bins or/and at least startvalue and endvalue must be defined when method is 'manual'.")
    
    # set number of bins if manual
    if( is.null(startvalue))
      startvalue = 0
    
    if( is.null(endvalue)) 
      endvalue = startvalue + numbins * binsize
    
    if(is.null(startvalue) || is.null(endvalue) || length(startvalue)==0 || length(endvalue)==0) {
      histPrep = paste0("ON hist_prep(
                           ON graphdataforhisttemp VALUE_COLUMN('",histogramValue,"')) as data_stat DIMENSION
                           BIN_SELECT('",numbins,"')")
    }else {
      if (startvalue >= endvalue)
        stop("End value should be greater than start value.")
      
      if(is.null(binsize))
        binsize = (endvalue - startvalue) / numbins
      
      histPrep = paste0("binsize('", binsize, "')
                         startvalue('", startvalue, "')
                         endvalue('", endvalue, "')")
    }
  }else {
    # compute histogram parameters if missing
    histPrep = paste0("ON hist_prep(
                         ON graphdataforhisttemp VALUE_COLUMN('",histogramValue,"')) as data_stat DIMENSION
                         BIN_SELECT('",binMethod,"')")
  }
  
  if(test) {
    emptyLine = "--"
    sqlText = ""
  }
  
  # Create temp table with the node metric
  sqlComment = paste("-- Compute", type, "into temp table with all vertices")
  sqlBeginTran = "BEGIN"
  graphDegreeSql = paste0(
    "CREATE TEMP FACT TABLE graphdataforhisttemp 
     DISTRIBUTE BY HASH(key) 
     AS
     ", histogramSelectTempTableSql
  )
  
  if(test) {
    sqlText = paste(sqlBeginTran, paste(emptyLine, sqlComment, graphDegreeSql, sep='\n'), sep=';\n')
  }else {
    odbcSetAutoCommit(channel, autoCommit = FALSE)
    toaSqlQuery(channel, sqlBeginTran)
    toaSqlQuery(channel, graphDegreeSql)
  }
  
  # Compute metric histogram
  sqlComment = "-- Compute metric histogram"
  histDegreeSql = paste0(
    "SELECT * FROM Hist_Reduce(
           ON Hist_Map(
             ON graphdataforhisttemp as data_input PARTITION BY ANY 
          ", histPrep, "
             VALUE_COLUMN('",histogramValue,"')
            ",ifelse(is.null(histogramGroup),"",paste0("GROUP_COLUMNS('",histogramGroup,"')")),"
         ) PARTITION BY ",ifelse(is.null(histogramGroup),"1",histogramGroup),"
    )"
  )
  
  if(test)
    sqlText = paste(sqlText, paste(emptyLine, sqlComment, histDegreeSql, sep='\n'), sep=';\n')
  else
    histograms = toaSqlQuery(channel, histDegreeSql, stringsAsFactors=FALSE)
  
  sqlEndTran = "END"
  if(test) {
    sqlText = paste(sqlText, paste(emptyLine, sqlEndTran, sep='\n'), sep=';\n')
    
    return(sqlText)
  }else {
    toaSqlQuery(channel, sqlEndTran)
    odbcSetAutoCommit(channel, autoCommit = TRUE)
  
    return(histograms)
  }
  
}


#' Compute top vertices by the metric values on a graph.
#' 
#' @param channel connection object as returned by \code{\link{odbcConnect}}
#' @param graph an object of class \code{'toagraph'} referencing graph 
#'   tables in Aster database.
#' @param type choose between graph metrics to compute: \code{'degree', 'in-degree', 
#'   'out-degree', 'clustering', 'shortestpath', 'pagerank', 'betweenness', 
#'   'eigenvector', 'closeness', 'avg-closeness', 'k-degree', 'alt-closeness'}.
#' @param top the number of vertices to return. If \code{top >= 0} then \code{top} vertices 
#'   sorted by the metric value are returned, otherwise all vertices are returned. 
#'   Returned vertices are ordered by the computed graph metric only when \code{top >= 0}. 
#' @param rankFunction one of \code{rownumber, rank, denserank, percentrank}. Rank computed and
#'   returned for each vertex and each metric type. \code{rankFunction} determines which SQL window 
#'   function computes vertex rank value (default \code{rank} corresponds to SQL \code{RANK()} window function). 
#'   When threshold \code{top} is greater than 0 ranking function used to limit number of 
#'   vertices returned (see details).
#' @param weight logical or character: if logical then \code{TRUE} indicates using \code{'weight'} edge
#'   attribute, otherwise no weight used. If character then use as a name for the edge weight attribute. 
#'   The edge weight may apply with types \code{'clustering', 'shortestpath'} and centrality measures.
#' @param vertexWhere SQL WHERE clause limiting data from the vertex table. This value when not null
#'   overrides corresponding value \code{vertexWhere} from \code{graph} (use SQL as if in WHERE clause but 
#'   omit keyword WHERE).
#' @param edgeWhere SQL WHERE clause limiting data from the edge table. This value when not null
#'   overrides corresponding value \code{edgeWhere} from \code{graph} (use SQL as if in WHERE clause but 
#'   omit keyword WHERE).
#' @param keyAsFactor logical: should key column be converted to factor? If \code{TRUE} then conversion
#'   will always take place for any of integer, numeric, or character data types.
#' @param allTables pre-built information about existing tables.
#' @param test logical: if TRUE show what would be done, only (similar to parameter \code{test} in \pkg{RODBC} 
#'   functions: \link{sqlQuery} and \link{sqlSave}).
#' @param ... other arguments passed on to Aster graph functions except for \code{EDGEWEIGHT} argument -
#'   use argument \code{weight} instead. Aster function areguments are not case-sensetive.
#'   
#' @return dataframe containing one vertice per row with key value, computed metric value, and its rank 
#'   using \code{rankFunction}.
#' 
#' @export
#' @examples 
#' if(interactive()) {
#' library(ggplot2)
#' 
#' policeGraphUn = toaGraph("dallaspolice_officer_vertices", "dallaspolice_officer_edges_un", 
#'                          directed = FALSE, key = "officer", 
#'                          source = "officer1", target = "officer2", 
#'                          vertexAttrnames = c("offense_count"), edgeAttrnames = c("weight"))
#'                
#' # initialize connection to Lahman baseball database in Aster 
#' conn = odbcDriverConnect(connection="driver={Aster ODBC Driver};
#'                          server=<dbhost>;port=2406;database=<dbname>;uid=<user>;pwd=<pw>")
#'                          
#' createTopMetricPlot <- function(data, metric, xlab='Officer', ylab='Degree', title) {
#'    p = ggplot(data) +
#'        geom_bar(aes_string("key", metric, fill="key"), stat='identity') +
#'        labs(x=xlab,y=ylab,title=title) +
#'        ggthemes::theme_tufte() + 
#'        theme(legend.position='none', 
#'              axis.text.x = element_text(size=16, angle = 315, vjust = 1), 
#'              plot.title = element_text(size=20),
#'              axis.ticks = element_blank())
#'    
#'    return(p)
#' }
#' 
#' # top degree officers
#' topDegree = computeGraphMetric(conn, policeGraphUn, type="degree", top=30)
#' createTopMetricPlot(topDegree, 'degree', ylab='Degree', title='Top 30 Officers by Degree') 
#' 
#' # top betweenness officers
#' topbetweenness = computeGraphMetric(conn, policeGraphUn, type='betweenness', top=25)
#' createTopMetricPlot(topbetweenness, 'betweenness', ylab='Betweenness', 
#'                     title='Top 25 Officers (Betweenness)')                     
#'                          
#' }
computeGraphMetric <- function(channel, graph, type='degree', top=10, rankFunction='rank',
                               weight=FALSE, vertexWhere=graph$vertexWhere, edgeWhere=graph$edgeWhere,
                               keyAsFactor=TRUE, allTables=NULL, test=FALSE, ...) {
  
  # match argument values
  type = match.arg(type, c('degree', 'in-degree', 'out-degree', 'clustering', 'shortestpath', 'pagerank',
                           'betweenness', 'eigenvector', 'closeness', 'avg-closeness', 'k-degree', 'alt-closeness'))
  rankFunction = match.arg(rankFunction, c('rank', 'rownumber', 'row', 'denserank', 'percentrank'))
  
  if (missing(graph) || !is.object(graph) || !inherits(graph, "toagraph"))
    stop("Graph object must be specified.")
  
  if (test && is.null(allTables))
    stop("Must provide allTables when test==TRUE.")
  
  isValidConnection(channel, test)
  
  isTableFlag = isTable(channel, list(vertices=graph$vertices, edges=graph$edges), allTables=allTables)
  
  if(!all(isTableFlag | is.na(isTableFlag)))
    stop("Both vertices and edges must exist as tables or views.")
  
  # weight attribute if present
  weight = parseWeightArgument(graph, weight)
  
  argsSql = makeGraphFunctionArgumentsSql(...)
  windowFunction = getWindowFunction(rankFunction)
  
  if (type=='degree') {
    selectSql = makeDegreeSelectSql(graph, isTableFlag, windowFunction, vertexWhere, edgeWhere)
    value = 'degree'
    if (graph$directed) {
      group = 'degree_type'
      groupValue = 'degree'
    }else
      group = NULL
  }else if(type=='in-degree') {
    selectSql = makeDegreeSelectSql(graph, isTableFlag, windowFunction, vertexWhere, edgeWhere)
    value = 'degree'
    group = 'degree_type'
    groupValue = 'indegree'
  }else if(type=='out-degree') {
    selectSql = makeDegreeSelectSql(graph, isTableFlag, windowFunction, vertexWhere, edgeWhere)
    value = 'degree'
    group = 'degree_type'
    groupValue = 'outdegree'
  }else if (type=='clustering') {
    selectSql = makeClusteringSelectSql(graph, isTableFlag, weight, windowFunction, vertexWhere, edgeWhere, argsSql)
    value = 'cc'
    group = 'cc_type'
    groupValue = ifelse(graph$directed, 'avg_cc', 'cc')
  }else if (type=='pagerank') {
    selectSql = makePageRankSelectSql(graph, isTableFlag, weight, windowFunction, vertexWhere, edgeWhere, argsSql)
    value = 'pagerank'
    group = NULL
  }else if (type == 'betweenness') {
    selectSql = makeBetweennessSelectSql(graph, isTableFlag, weight, windowFunction, vertexWhere, edgeWhere, argsSql)
    value = 'betweenness'
    group = NULL
  }else if (type == 'eigenvector') {
    selectSql = makeEigenVectorSelectSql(graph, isTableFlag, weight, windowFunction, vertexWhere, edgeWhere, argsSql)
    value = 'centrality'
    group = NULL
  }else if (type == 'closeness') {
    selectSql = makeClosenessSelectSql(graph, isTableFlag, weight, windowFunction, vertexWhere, edgeWhere, argsSql)
    value = 'closeness'
    group = NULL
  }else if (type == 'avg-closeness') {
    selectSql = makeAvgClosenessSelectSql(graph, isTableFlag, weight, windowFunction, vertexWhere, edgeWhere, argsSql)
    value = 'closeness'
    group = NULL
  }else if (type == 'alt-closeness') {
    selectSql = makeAltClosenessSelectSql(graph, isTableFlag, weight, windowFunction, vertexWhere, edgeWhere, argsSql)
    value = 'closeness'
    group = NULL
  }else if (type == 'k-degree') {
    selectSql = makeKdegreeSelectSql(graph, isTableFlag, weight, windowFunction, vertexWhere, edgeWhere, argsSql)
    value = 'kdegree'
    group = NULL
  }
  
  selectSql = paste0(
      selectSql, "
     ",ifelse(is.null(group), "", paste0(" WHERE ", group, " = '", groupValue, "'")), "
     ",ifelse(!is.null(top) && is.numeric(top) && top >= 0, 
              paste0("ORDER BY ", value, " DESC LIMIT ", as.character(top)), "")
  )

  if (test)
    return(selectSql)
  else
    result = toaSqlQuery(channel, selectSql, stringsAsFactors=FALSE)
  
  # make a factor ordered by rank
  if(keyAsFactor) {
    result$key = factor(result$key)
    result$key = factor(result$key, levels=result$key[order(result$rank)], ordered = TRUE)
  }
  
  return(result)
}


parseWeightArgument <- function(graph, weight) {
  if (is.null(weight) || weight==FALSE) weight = NULL
  if (is.logical(weight) && weight) weight = 'weight'
  if (!is.null(weight) && !weight %in% graph$edgeAttrnames)
    stop(paste0("No edge attribute '", weight, "' found in graph."))
  
  return(weight)
}


makeGraphFunctionArgumentsSql <- function(...) {
  
  args = list(...)
  
  if (is.null(args) || length(args)==0) return("")
  
  argNames = names(args)
  result = ""
  for(i in 1:length(args)) {
      result = paste0(result, "
           ", argNames[[i]],"('",args[[i]],"')")
  }
  
  return(result)
}


makeDegreeSelectSql <- function(graph, isTableFlag, rankFunction, vertexWhere, edgeWhere) {
  
  if(graph$directed) {
    sql = paste0(
    "SELECT key, degree_type, degree_long degree", 
            ifelse(!is.null(rankFunction), paste0(", ", rankFunction, " OVER (PARTITION BY degree_type ORDER BY degree_long DESC) rank"), ""), "
       FROM unpivot(
         ON (SELECT COALESCE(s.key, t.key) key, 
                    COALESCE(s.cnt_source,0) outdegree, 
                    COALESCE(t.cnt_target,0) indegree,  
                    COALESCE(s.cnt_source,0) + COALESCE(t.cnt_target,0) degree,
                    (COALESCE(t.cnt_target,0) + 1)/(COALESCE(s.cnt_source,0) + 1) inbyoutdegree
               FROM (SELECT ",graph$source," key, COUNT(*) cnt_source 
                       FROM (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") e 
                      GROUP BY 1) s FULL JOIN
                    (SELECT ",graph$target," key, COUNT(*) cnt_target 
                       FROM (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") e
                      GROUP BY 1) t ON (s.key = t.key)
         )
         colsToUnpivot('outdegree','indegree','degree','inbyoutdegree')
         colsToAccumulate('key')
         keepInputColumnTypes('true')
         ATTRIBUTECOLUMNNAME('degree_type')
         VALUECOLUMNNAME('degree')
     )"
    )
  }else {
    sql = paste0(
    "SELECT COALESCE(s.key, t.key) key,
            'degree'::varchar degree_type,
            COALESCE(s.cnt_source,0) + COALESCE(t.cnt_target,0) degree",
            ifelse(!is.null(rankFunction), 
                   paste0(", ", rankFunction, " OVER (PARTITION BY 1 ORDER BY COALESCE(s.cnt_source,0) + COALESCE(t.cnt_target,0) DESC) rank"), 
                   ""), "
      FROM (SELECT ",graph$source," key, COUNT(*) cnt_source 
              FROM (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") e 
             GROUP BY 1) s FULL JOIN
           (SELECT ",graph$target," key, COUNT(*) cnt_target 
              FROM (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") e
             GROUP BY 1) t ON (s.key = t.key)"
    )
  }
  
  return(sql)
}


makeClusteringSelectSql <- function(graph, isTableFlag, weight, rankFunction, vertexWhere, edgeWhere, argsSql) {
  
  sql = paste0(
    "SELECT ", graph$key, " key, cc_type, coalesce(cc_double, cc_str::double) cc",
               ifelse(!is.null(rankFunction),
                      paste0(", ", rankFunction, " OVER (PARTITION BY cc_type ORDER BY coalesce(cc_double, cc_str::double) DESC) rank"), ""), "
       FROM unpivot(
         ON (SELECT * FROM LocalClusteringCoefficient(
               ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key, "
               ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
               targetKey('",graph$target,"')
             ",ifelse(is.null(weight), "", paste0("edgeweight('",weight,"')")),"
               directed('",ifelse(graph$directed,"true","false"),"')
               accumulate('",graph$key,"')",
               argsSql,"
         ))
         colsToUnpivot(",ifelse(graph$directed,"'cyc_cc','mid_cc','in_cc','out_cc','avg_cc'","'cc'"),")
         colsToAccumulate('",graph$key,"')
         keepInputColumnTypes('true')
         ATTRIBUTECOLUMNNAME('cc_type')
         VALUECOLUMNNAME('cc')
     )"
  )
}


makeClosenessSelectSql <- function(graph, isTableFlag, weight, rankFunction, vertexWhere, edgeWhere, argsSql) {
  
  sql = paste0(
    "SELECT ", graph$key, " key, inv_sum_dist closeness",
            ifelse(!is.null(rankFunction),
                      paste0(", ", rankFunction, " OVER (PARTITION BY 1 ORDER BY inv_sum_dist DESC) rank"), ""), "
               FROM Closeness(
               ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key, "
               ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
               targetKey('",graph$target,"')
             ",ifelse(is.null(weight), "", paste0("edgeweight('",weight,"')")),"
               directed('",ifelse(graph$directed,"true","false"),"')
               accumulate('",graph$key,"')",
               argsSql,"
         )"
  )
}


makeAvgClosenessSelectSql <- function(graph, isTableFlag, weight, rankFunction, vertexWhere, edgeWhere, argsSql) {
  
  sql = paste0(
    "SELECT ", graph$key, " key, inv_avg_dist closeness",
            ifelse(!is.null(rankFunction),
                      paste0(", ", rankFunction, " OVER (PARTITION BY 1 ORDER BY inv_avg_dist DESC) rank"), ""), "
               FROM Closeness(
               ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key, "
               ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
               targetKey('",graph$target,"')
             ",ifelse(is.null(weight), "", paste0("edgeweight('",weight,"')")),"
               directed('",ifelse(graph$directed,"true","false"),"')
               accumulate('",graph$key,"')",
               argsSql,"
         )"
  )
}


makeAltClosenessSelectSql <- function(graph, isTableFlag, weight, rankFunction, vertexWhere, edgeWhere, argsSql) {
  
  sql = paste0(
    "SELECT ", graph$key, " key, sum_inv_dist closeness",
            ifelse(!is.null(rankFunction),
                      paste0(", ", rankFunction, " OVER (PARTITION BY 1 ORDER BY sum_inv_dist DESC) rank"), ""), "
               FROM Closeness(
               ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key, "
               ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
               targetKey('",graph$target,"')
             ",ifelse(is.null(weight), "", paste0("edgeweight('",weight,"')")),"
               directed('",ifelse(graph$directed,"true","false"),"')
               accumulate('",graph$key,"')",
               argsSql,"
         )"
  )
}


makeKdegreeSelectSql <- function(graph, isTableFlag, weight, rankFunction, vertexWhere, edgeWhere, argsSql) {
  
  sql = paste0(
    "SELECT ", graph$key, " key, kdegree",
            ifelse(!is.null(rankFunction),
                      paste0(", ", rankFunction, " OVER (PARTITION BY 1 ORDER BY kdegree DESC) rank"), ""), "
               FROM Closeness(
               ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key, "
               ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
               targetKey('",graph$target,"')
             ",ifelse(is.null(weight), "", paste0("edgeweight('",weight,"')")),"
               directed('",ifelse(graph$directed,"true","false"),"')
               accumulate('",graph$key,"')",
               argsSql,"
         )"
  )
}


makeShortestPathSelectSql <- function(graph, isTableFlag, weight, vertexWhere, edgeWhere, argsSql) {
  
  sql = paste0(
    "SELECT source key, target, distance FROM AllPairsShortestPath(
       ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key, "
       ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
       targetKey('",graph$target,"')
       directed('",ifelse(graph$directed,"true","false"),"')
     ",ifelse(is.null(weight), "", paste0("edgeweight('",weight,"')")),
       argsSql,"
)"
  )
}


makePageRankSelectSql <- function(graph, isTableFlag, weight, rankFunction, vertexWhere, edgeWhere, argsSql) {
  
  sql = paste0(
    "SELECT ", graph$key, " key, pagerank",
       ifelse(!is.null(rankFunction),
                      paste0(", ", rankFunction, " OVER (PARTITION BY 1 ORDER BY pagerank DESC) rank"), ""), "
       FROM PageRank(
       ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key, "
       ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
       targetKey('",graph$target,"')
       accumulate('",graph$key,"')
     ",ifelse(is.null(weight), "", paste0("edgeweight('",weight,"')")),
       argsSql,"
)"
  )
}


makeBetweennessSelectSql <- function(graph, isTableFlag, weight, rankFunction, vertexWhere, edgeWhere, argsSql) {
  
  sql = paste0(
    "SELECT ", graph$key, " key, betweenness", 
       ifelse(!is.null(rankFunction),
                      paste0(", ", rankFunction, " OVER (PARTITION BY 1 ORDER BY betweenness DESC) rank"), ""), "
       FROM Betweenness(
       ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key, "
       ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
       targetKey('",graph$target,"')
       accumulate('",graph$key,"')
     ",ifelse(is.null(weight), "", paste0("edgeweight('",weight,"')")),
       argsSql,"
)"
  )
}


makeEigenVectorSelectSql <- function(graph, isTableFlag, weight, rankFunction, vertexWhere, edgeWhere, argsSql) {
  
  sql = paste0(
    "SELECT ", graph$key, " key, centrality",
       ifelse(!is.null(rankFunction),
                      paste0(", ", rankFunction, " OVER (PARTITION BY 1 ORDER BY centrality DESC) rank"), ""), "
       FROM EigenVectorCentrality(
       ON (", makeVerticesSql(graph, isTableFlag, vertexWhere, FALSE), ") AS vertices PARTITION BY ", graph$key, "
       ON (", makeEdgesSql(graph, isTableFlag, vertexWhere, edgeWhere), ") AS edges PARTITION BY ", graph$source, "
       targetKey('",graph$target,"')
       accumulate('",graph$key,"')
       directed('",ifelse(graph$directed,"true","false"),"')
     ",ifelse(is.null(weight), "", paste0("edgeweight('",weight,"')")),
       argsSql,"
     )"
  )
}

addVerticesInVertexWhere <- function(graph, v, vertexWhere) {
  
  if(!is.null(v) && length(v) > 0) {
    if(is.list(v))
      vertexValueWhere = paste0(graph$key, " IN (", makeSqlValueList(unlist(v)) , ")")
    else
      vertexValueWhere = paste0(graph$key, " IN (", v[[1]], ")")
    
    if(is.null(vertexWhere))
      vertexWhere = vertexValueWhere
    else
      vertexWhere = paste0("(", vertexWhere, ") AND ", vertexValueWhere)
  }
  
  return(vertexWhere)
}


makeVerticesSql <- function(graph, isTableFlag, vertexWhere, keyOnlyFlag, ordered=FALSE) {
  
  if(keyOnlyFlag) 
    selectList = graph$key
  else
    selectList = makeSqlColumnList(c(graph$key, graph$vertexAttrnames))
  
  paste0(
    "SELECT ", selectList, " 
       FROM ", makeFromClause(graph$vertices, isTableFlag[['vertices']], "t"),
    makeWhereClause(vertexWhere),
    ifelse(ordered, paste0(" ORDER BY ", graph$key), "")
  )
}
 

makeEdgesSql <- function(graph, isTableFlag, vertexWhere, edgeWhere) {
  
  if (!is.null(vertexWhere)) {
    verticesSql = makeVerticesSql(graph, isTableFlag, vertexWhere, TRUE)
    
    if(!is.null(edgeWhere)) {
      edgeWhere = paste0(
        "(", edgeWhere, ") AND 
         ", graph$source, " IN (", verticesSql, ") AND 
         ", graph$target, " IN (", verticesSql, ")"
      )
    }else {
      edgeWhere = paste0(
        graph$source, " IN (", verticesSql, ") AND 
     ", graph$target, " IN (", verticesSql, ")"
      )
    }
  }
  
  paste0(
      "SELECT ", makeSqlColumnList(c(graph$source, graph$target, graph$edgeAttrnames)), " 
         FROM ", makeFromClause(graph$edges, isTableFlag[['edges']], "t"),
      makeWhereClause(edgeWhere)
  )
}

makeDerivedVerticesTable <- function(graph, isTableFlag, edgeWhere) {
  
  vertices = paste0(
    "SELECT ",graph$source," ",graph$key," 
       FROM ", makeFromClause(graph$edges, isTableFlag[['edges']], "t"), 
       makeWhereClause(edgeWhere), "
     UNION
     SELECT ",graph$target," ",graph$key," 
       FROM ", makeFromClause(graph$edges, isTableFlag[['edges']], "t"), 
       makeWhereClause(edgeWhere)
  )
  
}

makeEgoSelfEdgeWhereSql <- function(graph, key, mode) {
  
  if (mode %in% c('all','both'))
    whereSql = paste0(graph$source," = '",key,"'
                 OR ",graph$target," = '",key,"'")
  else if (mode == 'in')
    whereSql = paste0(graph$target, " = '",key,"'")
  else
    whereSql = paste0(graph$source, " = '",key,"'")
  
  return(whereSql)
}


makeNetworkResult <- function(graph, v, e){
  
  # validate resulting graph data
  if (!is.null(v) && nrow(v) == 0)
    stop("Graph object may not have 0 vertices.")
  #if (nrow(e) == 0) 
  #  stop("Graph object may not have 0 edges.")

  # this step is necessary to eliminate integer values which are processed 
  # not like character values by network constructor
  if (is.numeric(e[,graph$source]))
    e[,graph$source] = as.character(e[,graph$source])
  if (is.numeric(e[,graph$target]))
    e[,graph$target] = as.character(e[,graph$target])
  if (!is.null(v) && is.numeric(v[,graph$key]))
    v[,graph$key] = as.character(v[,graph$key])
  
  # create network object using the edge list
  if (nrow(e) > 0) {
    net = network(e, directed=graph$directed, matrix.type="edgelist", ignore.eval=FALSE)
    
    # handle those nodes that has no edges attached
    if (!is.null(v) && nrow(v) > 0) {
      vnames = v[, graph$key]
      evnames = get.vertex.attribute(net, "vertex.names")
      if(!all(vnames %in% evnames)) {
        add.vertices(net, length(v[!vnames %in% evnames, graph$key]), 
                     lapply(v[!vnames %in% evnames, graph$key], 
                            FUN = function(x) {list(na=FALSE, vertex.names=x)}))
      }
    }
  # create network with no edges
  }else {
    net = network.initialize(nrow(v), directed=graph$directed)
    
    set.vertex.attribute(net, "vertex.names", v[, graph$key], v=1:nrow(v))
  }
  
  # vertex data is optional; if it's not null then it contains vertex attributes
  if(!is.null(v) && nrow(v) > 0) {
    net.v = data.frame(id=1:length(net$val), vertex.names=matrix(unlist(net$val), ncol=2, byrow=TRUE)[,2],
                       stringsAsFactors=FALSE)
    net.v = merge(net.v, v, by.x="vertex.names", by.y=graph$key, all=FALSE, sort=FALSE)
      
    for(attrname in graph$vertexAttrnames) {
      set.vertex.attribute(net, attrname, net.v[, attrname], net.v[, "id"])
    }
  }
  
  return(net)
}
teradata-aster-field/toaster documentation built on May 31, 2019, 8:36 a.m.